Source code for couchbase.search

from datetime import timedelta
from enum import Enum
from typing import *

import attr
import couchbase_core

from couchbase.exceptions import CouchbaseException
from couchbase_core.views.iterator import AlreadyQueriedException
from couchbase_core import abstractmethod, JSON, _to_json, iterable_wrapper
from couchbase_core._pyport import unicode
from couchbase_core.supportability import internal
from .options import OptionBlockTimeOut, UnsignedInt32, UnsignedInt64, forward_args
import abc
import couchbase.mutation_state as MutationState


SearchQueryRow = JSON


def _genprop(converter, *apipaths, **kwargs):
    """
    This internal helper method returns a property (similar to the
    @property decorator). In additional to a simple Python property,
    this also adds a type validator (`converter`) and most importantly,
    specifies the path within a dictionary where the value should be
    stored.

    Any object using this property *must* have a dictionary called ``_json``
    as a property

    :param converter: The converter to be used, e.g. `int` or `lambda x: x`
        for passthrough
    :param apipaths:
        Components of a path to store, e.g. `foo, bar, baz` for
            `foo['bar']['baz']`
    :return: A property object
    """
    if not apipaths:
        raise TypeError('Must have at least one API path')

    def fget(self):
        d = self._json_
        try:
            for x in apipaths:
                d = d[x]
            return d
        except KeyError:
            return None

    def fset(self, value):
        value = converter(value)
        d = self._json_
        for x in apipaths[:-1]:
            d = d.setdefault(x, {})
        d[apipaths[-1]] = value

    def fdel(self):
        d = self._json_
        try:
            for x in apipaths[:-1]:
                d = d[x]
            del d[apipaths[-1]]
        except KeyError:
            pass

    doc = kwargs.pop(
        'doc', 'Corresponds to the ``{0}`` field'.format('.'.join(apipaths)))
    return property(fget, fset, fdel, doc)


def _genprop_str(*apipaths, **kwargs):
    """
    Convenience function to return a string property in which the value
    is converted to a string
    """
    return _genprop(unicode, *apipaths, **kwargs)


def _highlight(value):
    """
    highlight 'type validator'.
    """
    if value not in ('html', 'ansi'):
        raise ValueError(
            'Highlight must be "html" or "ansi", got {0}'.format(value))
    return value


def _consistency(value):
    """
    Validator for 'consistency' parameter
    """
    if value and value.lower() not in ('', 'not_bounded'):
        raise ValueError('Invalid value!')
    return value

def _assign_kwargs(self, kwargs):
    """
    Assigns all keyword arguments to a given instance, raising an exception
    if one of the keywords is not already the name of a property.
    """
    for k in kwargs:
        if not hasattr(self, k):
            raise AttributeError(k, 'Not valid for', self.__class__.__name__)
        setattr(self, k, kwargs[k])


class Facet(object):
    """
    Base facet class. Each facet must have a field which it aggregates
    """
    def __init__(self, field):
        self._json_ = {'field': field}

    @property
    def encodable(self):
        """
        Returns a reprentation of the object suitable for serialization
        """
        return self._json_

    """
    Field upon which the facet will aggregate
    """
    field = _genprop_str('field')

    def __repr__(self):
        return '{0.__class__.__name__}<{0._json_!r}>'.format(self)


[docs]class TermFacet(Facet): """ Facet aggregating the most frequent terms used. """ def __init__(self, field, limit=0): super(TermFacet, self).__init__(field) if limit: self.limit = limit limit = _genprop(int, 'size', doc="Maximum number of terms/count pairs to return")
def _mk_range_bucket(name, n1, n2, r1, r2): """ Create a named range specification for encoding. :param name: The name of the range as it should appear in the result :param n1: The name of the lower bound of the range specifier :param n2: The name of the upper bound of the range specified :param r1: The value of the lower bound (user value) :param r2: The value of the upper bound (user value) :return: A dictionary containing the range bounds. The upper and lower bounds are keyed under ``n1`` and ``n2``. More than just a simple wrapper, this will not include any range bound which has a user value of `None`. Likewise it will raise an exception if both range values are ``None``. """ d = {} if r1 is not None: d[n1] = r1 if r2 is not None: d[n2] = r2 if not d: raise TypeError('Must specify at least one range boundary!') d['name'] = name return d
[docs]class DateFacet(Facet): """ Facet to aggregate results based on a date range. This facet must have at least one invocation of :meth:`add_range` before it is added to :attr:`~Params.facets`. """ def __init__(self, field): super(DateFacet, self).__init__(field) self._ranges = []
[docs] def add_range(self, name, start=None, end=None): """ Adds a date range to the given facet. :param str name: The name by which the results within the range can be accessed :param str start: Lower date range. Should be in RFC 3339 format :param str end: Upper date range. :return: The `DateFacet` object, so calls to this method may be chained """ self._ranges.append(_mk_range_bucket(name, 'start', 'end', start, end)) return self
_ranges = _genprop(list, 'date_ranges')
[docs]class NumericFacet(Facet): """ Facet to aggregate results based on a numeric range. This facet must have at least one invocation of :meth:`add_range` before it is added to :attr:`Params.facets` """ def __init__(self, field): super(NumericFacet, self).__init__(field) self._ranges = []
[docs] def add_range(self, name, min=None, max=None): """ Add a numeric range. :param str name: the name by which the range is accessed in the results :param int | float min: Lower range bound :param int | float max: Upper range bound :return: This object; suitable for method chaining """ self._ranges.append(_mk_range_bucket(name, 'min', 'max', min, max)) return self
_ranges = _genprop(list, 'numeric_ranges')
class _FacetDict(dict): """ Internal dict subclass which ensures that facets added to this dictionary have properly defined ranges. """ # noinspection PyMissingConstructor def __init__(self, *args, **kwargs): self.update(*args, **kwargs) def __setitem__(self, key, value): if not isinstance(value, Facet): raise ValueError('Can only add facet') if hasattr(value, '_ranges') and not getattr(value, '_ranges'): raise ValueError('{} object must have at least one range. Use ' 'add_range'.format(value.__class__.__name__)) super(_FacetDict, self).__setitem__(key, value) def update(self, *args, **kwargs): if args: if len(args) > 1: raise TypeError('only one merge at a time!') other = dict(args[0]) for key in other: self[key] = other[key] for key in kwargs: self[key] = kwargs[key] def setdefault(self, key, value=None): if key not in self: self[key] = value return self[key] class Sort(object): def __init__(self, by, **kwargs): by = by.replace('descending','desc') self._json_ = { 'by': by } if 'descending' in kwargs: kwargs['desc']=kwargs.pop('descending') _assign_kwargs(self, kwargs) desc = _genprop(bool, 'desc', doc='Sort using descending order') @property def descending(self): return self.desc @descending.setter def descending(self, value): self.desc = value def as_encodable(self): return self._json_ class SortString(Sort): """ Sorts by a list of fields. This is similar to specifying a list of fields in :attr:`Params.sort` """ def __init__(self, *fields): self._json_ = list(fields) class SortScore(Sort): """ Sorts by the score of each match. """ def __init__(self, **kwargs): super(SortScore, self).__init__('score', **kwargs) class SortID(Sort): """ Sorts lexically by the document ID of each match """ def __init__(self, **kwargs): super(SortID, self).__init__('id', **kwargs) class SortField(Sort): """ Sorts according to the properties of a given field """ def __init__(self, field, **kwargs): kwargs['field'] = field super(SortField, self).__init__('field', **kwargs) field = _genprop_str('field', doc='Field to sort by') type = _genprop_str('type', doc='Coerce field to this type') mode = _genprop_str('mode') missing = _genprop_str('missing') def _location_conv(l): if len(l) != 2: raise ValueError('Require list of two numbers') return [float(l[0]), float(l[1])] class SortGeoDistance(Sort): """ Sorts matches based on their distance from a specific location """ def __init__(self, location, field, **kwargs): kwargs.update(location=location, field=field) super(SortGeoDistance, self).__init__('geo_distance', **kwargs) location = _genprop(_location_conv, 'location', doc='`(lon, lat)` of point of origin') field = _genprop_str('field', doc='Field that contains the distance') unit = _genprop_str('unit', doc='Distance unit used for measuring') class SortRaw(Sort): def __init__(self, raw): self._json_ = raw def _convert_sort(s): if isinstance(s, Sort): return s else: return list(s) # This is the Params class from SDK2, but is now only for internal use. # All required search APIs should now be provided by the official # SDK3 classes, such as SearchOptions. However, we still use it # internally, largely for SearchOptions functionality and # generating query JSON. # TODO: tidy this up more - perhaps move all functionality into # SearchOptions or similar. class _Params(object): """ Generic parameters and query modifiers. Keyword arguments may be used to initialize instance attributes. See individual attributes for more information. .. attribute:: facets A dictionary of :class:`Facet` objects. The dictionary uses the facet name for keys. You can retrieve the facet results by their name as well. You can add facets like so:: params.facets['term_analysis'] = TermFacet('author', limit=10) params.facets['view_count'] = NumericFacet().add_range('low', max=50) """ def __init__(self, **kwargs): self._json_ = {} self._ms = None self.facets = _FacetDict(**kwargs.pop('facets', {})) _assign_kwargs(self, kwargs) def as_encodable(self, index_name): """ :param index_name: The name of the index for the query :return: A dict suitable for passing to `json.dumps()` """ if self.facets: encoded_facets = {} for name, facet in self.facets.items(): encoded_facets[name] = facet.encodable self._json_['facets'] = encoded_facets if self._ms: # Encode according to scan vectors.. sv_val = { 'level': 'at_plus', 'vectors': { index_name: self._ms._to_fts_encodable() } } self._json_.setdefault('ctl', {})['consistency'] = sv_val if self.sort: if isinstance(self.sort, Sort): self._json_['sort'] = self.sort.as_encodable() else: self._json_['sort'] = self.sort return self._json_ limit = _genprop(int, 'size', doc='Maximum number of results to return') skip = _genprop(int, 'from', doc='Seek by this number of results') explain = _genprop( bool, 'explain', doc='Whether to return the explanation of the search') fields = _genprop( list, 'fields', doc='Return these fields for each document') timeout = _genprop(lambda x: int(x * 1000), 'ctl', 'timeout', doc='Timeout for the query, in seconds') highlight_style = _genprop(_highlight, 'highlight', 'style', doc=""" Highlight the results using a given style. Can be either `ansi` or `html` """) highlight_fields = _genprop( list, 'highlight', 'fields', doc=""" Highlight the results from these fields (list) """) sort = _genprop( _convert_sort, 'sort', doc=""" Specify a list of fields by which to sort the results. Can also be a :class:`Sort` class """ ) consistency = _genprop( _consistency, 'ctl', 'consistency', doc=""" Consistency for the query. Use this for 'fixed' consistency, or to clear consistency. You might want to use :meth:`consistent_with` for consistency that is bounded to specific mutations """ ) def consistent_with(self, ms): """ Ensure that this query is consistent with the given mutations. When set, this ensures that only document versions as or more recent than the provided mutations are used for the search. This is often helpful when attempting searches on newly inserted documents. :param ms: Mutation State :type ms: :class:`couchbase.mutation_state.MutationState` """ if self.consistency: raise ValueError( 'Clear "consistency" before specifying "consistent_with"') self._ms = ms
[docs]class SearchQuery(object): """ Base query object. You probably want to use one of the subclasses. .. seealso:: :class:`MatchQuery`, :class:`BooleanQuery`, :class:`RegexQuery`, :class:`PrefixQuery`, :class:`NumericRangeQuery`, :class:`DateRangeQuery`, :class:`ConjunctionQuery`, :class:`DisjunctionQuery`, and others in this module. """ def __init__(self): self._json_ = {} boost = _genprop( float, 'boost', doc=""" When specifying multiple queries, you can give this query a higher or lower weight in order to prioritize it among sibling queries. See :class:`ConjunctionQuery` """) @property def encodable(self): """ Returns an object suitable for serializing to JSON .. code-block:: python json.dumps(query.encodable) """ self.validate() return self._json_
[docs] def validate(self): """ Optional validation function. Invoked before encoding """ pass
class RawQuery(SearchQuery): """ This class is used to wrap a raw query payload. It should be used for custom query parameters, or in cases where any of the other query classes are insufficient. """ def __init__(self, obj): super(RawQuery, self).__init__() self._json_ = obj class _SingleQuery(SearchQuery): __metaclass__ = abc.ABCMeta @property @abstractmethod def _TERMPROP(self): """Name of the JSON property that contains the mandatory match spec""" def __init__(self, term, **kwargs): super(_SingleQuery, self).__init__() if self._TERMPROP not in kwargs: kwargs[self._TERMPROP] = term _assign_kwargs(self, kwargs) _COMMON_FIELDS = { 'prefix_length': _genprop( int, 'prefix_length', doc=""" When using :attr:`fuzziness`, this controls how much of the term or phrase is *excluded* from fuzziness. This may help improve performance at the expense of omitting potential fuzzy matches at the beginning of the string. """), 'fuzziness': _genprop( int, 'fuzziness', doc=""" Allow a given degree of fuzz in the match. Matches which are closer to the original term will be scored higher. You can apply the fuzziness to only a portion of the string by specifying :attr:`prefix_length` - indicating that only the part of the field after the prefix length should be checked with fuzziness. This value is specified as a float """), 'field': _genprop( str, 'field', doc=""" Restrict searching to a given document field """), 'analyzer': _genprop( str, 'analyzer', doc=""" Use a defined server-side analyzer to process the input term prior to executing the search """ ) } def _with_fields(*fields): """ Class decorator to include common query fields :param fields: List of fields to include. These should be keys of the `_COMMON_FIELDS` dict """ dd = {} for x in fields: dd[x] = _COMMON_FIELDS[x] def wrap(cls): dd.update(cls.__dict__) return type(cls.__name__, cls.__bases__, dd) return wrap
[docs]class QueryStringQuery(_SingleQuery): """ Query which allows users to describe a query in a query language. The server will then execute the appropriate query based on the contents of the query string: .. seealso:: `Query Language <http://www.blevesearch.com/docs/Query-String-Query/>`_ Example:: QueryStringQuery('description:water and stuff') """ _TERMPROP = 'query' query = _genprop_str('query') """ Actual query string """
[docs]@_with_fields('field') class WildcardQuery(_SingleQuery): """ Query in which the characters `*` and `?` have special meaning, where `?` matches 1 occurrence and `*` will match 0 or more occurrences of the previous character """ _TERMPROP = 'wildcard' wildcard = _genprop_str(_TERMPROP, doc='Wildcard pattern to use')
def _list_nonempty_conv(l): if not l: raise ValueError('Must have at least one value')
[docs]class DocIdQuery(_SingleQuery): """ Matches document IDs. This is must useful in a compound query (for example, :class:`BooleanQuery`). When used as a criteria, only documents with the specified IDs will be searched. """ _TERMPROP = 'ids' ids = _genprop(list, 'ids', doc=""" List of document IDs to use """)
[docs] def validate(self): super(DocIdQuery, self).validate() if not self.ids: raise NoChildrenException('`ids` must contain at least one ID')
[docs]@_with_fields('prefix_length', 'fuzziness', 'field', 'analyzer') class MatchQuery(_SingleQuery): """ Query which checks one or more fields for a match """ _TERMPROP = 'match' match = _genprop_str( 'match', doc=""" String to search for """)
[docs]@_with_fields('fuzziness', 'prefix_length', 'field') class TermQuery(_SingleQuery): """ Searches for a given term in documents. Unlike :class:`MatchQuery`, the term is not analyzed. Example:: TermQuery('lcb_cntl_string') """ _TERMPROP = 'term' term = _genprop_str('term', doc='Exact term to search for')
[docs]@_with_fields('field', 'analyzer') class MatchPhraseQuery(_SingleQuery): """ Search documents which match a given phrase. The phrase is composed of one or more terms. Example:: MatchPhraseQuery("Hello world!") """ _TERMPROP = 'match_phrase' match_phrase = _genprop_str(_TERMPROP, doc="Phrase to search for")
[docs]@_with_fields('field') class PhraseQuery(_SingleQuery): _TERMPROP = 'terms' terms = _genprop(list, 'terms', doc='List of terms to search for') def __init__(self, *phrases, **kwargs): super(PhraseQuery, self).__init__(phrases, **kwargs)
[docs] def validate(self): super(PhraseQuery, self).validate() if not self.terms: raise NoChildrenException('Missing terms')
[docs]@_with_fields('field') class PrefixQuery(_SingleQuery): """ Search documents for fields beginning with a certain prefix. This is most useful for type-ahead or lookup queries. """ _TERMPROP = 'prefix' prefix = _genprop_str('prefix', doc='The prefix to match')
[docs]@_with_fields('field') class RegexQuery(_SingleQuery): """ Search documents for fields matching a given regular expression """ _TERMPROP = 'regex' regex = _genprop_str('regexp', doc="Regular expression to use")
RegexpQuery = RegexQuery @_with_fields('field') class GeoDistanceQuery(SearchQuery): def __init__(self, distance, location, **kwargs): """ Search for items within a given radius :param distance: The distance string specifying the radius :param location: A tuple of `(lon, lat)` indicating point of origin """ super(GeoDistanceQuery, self).__init__() kwargs['distance'] = distance kwargs['location'] = location _assign_kwargs(self, kwargs) location = _genprop(_location_conv, 'location', doc='Location') distance = _genprop_str('distance') @_with_fields('field') class GeoBoundingBoxQuery(SearchQuery): def __init__(self, top_left, bottom_right, **kwargs): super(GeoBoundingBoxQuery, self).__init__() kwargs['top_left'] = top_left kwargs['bottom_right'] = bottom_right _assign_kwargs(self, kwargs) top_left = _genprop( _location_conv, 'top_left', doc='Tuple of `(lon, lat)` for the top left corner of bounding box') bottom_right = _genprop( _location_conv, 'bottom_right', doc='Tuple of `(lon, lat`) for the bottom right corner of bounding box') class _RangeQuery(SearchQuery): __metaclass__ = abc.ABCMeta @property @abstractmethod def _MINMAX(self): return 'min_name', 'max_name' def __init__(self, r1, r2, **kwargs): super(_RangeQuery, self).__init__() _assign_kwargs(self, kwargs) if r1 is None and r2 is None: raise TypeError('At least one of {0} or {1} should be specified', *self._MINMAX) if r1 is not None: setattr(self, self._MINMAX[0], r1) if r2 is not None: setattr(self, self._MINMAX[1], r2)
[docs]@_with_fields('field') class NumericRangeQuery(_RangeQuery): """ Search documents for fields containing a value within a given numerical range. At least one of `min` or `max` must be specified. """ def __init__(self, min=None, max=None, **kwargs): """ :param float min: See :attr:`min` :param float max: See :attr:`max` """ super(NumericRangeQuery, self).__init__(min, max, **kwargs) min = _genprop( float, 'min', doc='Lower bound of range. See :attr:`min_inclusive`') min_inclusive = _genprop( bool, 'inclusive_min', doc='Whether matches are inclusive of lower bound') max = _genprop( float, 'max', doc='Upper bound of range. See :attr:`max_inclusive`') max_inclusive = _genprop( bool, 'inclusive_max', doc='Whether matches are inclusive of upper bound') _MINMAX = 'min', 'max'
[docs]@_with_fields('field') class DateRangeQuery(_RangeQuery): """ Search documents for fields containing a value within a given date range. The date ranges are parsed according to a given :attr:`datetime_parser`. If no parser is specified, the RFC 3339 parser is used. See `Generating an RFC 3339 Timestamp <http://goo.gl/LIkV7G>_`. The :attr:`start` and :attr:`end` parameters should be specified in the constructor. Note that either `start` or `end` (but not both!) may be omitted. .. code-block:: python DateRangeQuery(start='2014-12-25', end='2016-01-01') """ def __init__(self, start=None, end=None, **kwargs): """ :param str start: Start of date range :param str end: End of date range :param kwargs: Additional options: :attr:`field`, :attr:`boost` """ super(DateRangeQuery, self).__init__(start, end, **kwargs) start = _genprop_str('start', doc='Lower bound datetime') end = _genprop_str('end', doc='Upper bound datetime') start_inclusive = _genprop( bool, 'inclusive_start', doc='If :attr:`start` is inclusive') end_inclusive = _genprop( bool, 'inclusive_end', doc='If :attr:`end` is inclusive') datetime_parser = _genprop_str( 'datetime_parser', doc=""" Parser to use when analyzing the :attr:`start` and :attr:`end` fields on the server. If not specified, the RFC 3339 parser is used. Ensure to specify :attr:`start` and :attr:`end` in a format suitable for the given parser. """) _MINMAX = 'start', 'end'
@_with_fields('field') class TermRangeQuery(_RangeQuery): """ Search documents for fields containing a value within a given lexical range. """ def __init__(self, start=None, end=None, **kwargs): super(TermRangeQuery, self).__init__(start=start, end=end, **kwargs) start = _genprop_str('start', doc='Lower range of term') end = _genprop_str('end', doc='Upper range of term') start_inclusive = _genprop( bool, 'inclusive_start', doc='If :attr:`start` is inclusive') end_inclusive = _genprop( bool, 'inclusive_end', doc='If :attr:`end` is inclusive') _MINMAX = 'start', 'end' class _CompoundQuery(SearchQuery): __metaclass__ = abc.ABCMeta @property @abstractmethod def _COMPOUND_FIELDS(self): """ Field to contain the compound queries. should return an iterable of `(srcname, tgtname)` of the attribute name and the target JSON field that contains the actual list of queries """ return ('Dummy', 'Dummy'), def __init__(self, **kwargs): super(_CompoundQuery, self).__init__() _assign_kwargs(self, kwargs) @property def encodable(self): self.validate() # Requires special handling since the compound queries in themselves # cannot be JSON unless they are properly encoded. # Note that the 'encodable' property also triggers validation js = self._json_.copy() for src, tgt in self._COMPOUND_FIELDS: objs = getattr(self, src) if not objs: continue js[tgt] = [q.encodable for q in objs] return js
[docs]class ConjunctionQuery(_CompoundQuery): """ Compound query in which all sub-queries passed must be satisfied """ _COMPOUND_FIELDS = ('conjuncts', 'conjuncts'), def __init__(self, *queries): super(ConjunctionQuery, self).__init__() self.conjuncts = list(queries)
[docs] def validate(self): super(ConjunctionQuery, self).validate() if not self.conjuncts: raise NoChildrenException('No sub-queries')
def _convert_gt0(value): # Ensure value is greater than 0 value = int(value) if not value: raise ValueError('Must be > 0') return value
[docs]class DisjunctionQuery(_CompoundQuery): """ Compound query in which at least :attr:`min` or more queries must be satisfied """ _COMPOUND_FIELDS = ('disjuncts', 'disjuncts'), def __init__(self, *queries, **kwargs): super(DisjunctionQuery, self).__init__() _assign_kwargs(self, kwargs) self.disjuncts = list(queries) if 'min' not in self._json_: self.min = 1 min = _genprop( _convert_gt0, 'min', doc='Number of queries which must be satisfied')
[docs] def validate(self): super(DisjunctionQuery, self).validate() if not self.disjuncts or len(self.disjuncts) < self.min: raise NoChildrenException('No children specified, or min is too big')
def _bprop_wrap(name, reqtype, doc): """ Helper function to generate properties :param name: The name of the subfield in the JSON dictionary :param reqtype: The compound query type the query list should be coerced into :param doc: Documentation for the field :return: the property. """ def fget(self): return self._subqueries.get(name) def fset(self, value): if value is None: if name in self._subqueries: del self._subqueries[name] elif isinstance(value, reqtype): self._subqueries[name] = value elif isinstance(value, SearchQuery): self._subqueries[name] = reqtype(value) else: try: it = iter(value) except ValueError: raise TypeError('Value must be instance of Query') l = [] for q in it: if not isinstance(q, SearchQuery): raise TypeError('Item is not a query!', q) l.append(q) self._subqueries[name] = reqtype(*l) def fdel(self): setattr(self, name, None) return property(fget, fset, fdel, doc)
[docs]class BooleanQuery(SearchQuery): def __init__(self, must=None, should=None, must_not=None): super(BooleanQuery, self).__init__() self._subqueries = {} self.must = must self.should = should self.must_not = must_not must = _bprop_wrap( 'must', ConjunctionQuery, """ Queries which must be satisfied. When setting this attribute, the SDK will convert value to a :class:`ConjunctionQuery` if the value is a list of queries. """) must_not = _bprop_wrap( 'must_not', DisjunctionQuery, """ Queries which must not be satisfied. Documents found which satisfy the queries in this clause are not returned in the match. When setting this attribute in the SDK, it will be converted to a :class:`DisjunctionQuery` if the value is a list of queries. """) should = _bprop_wrap( 'should', DisjunctionQuery, """ Specify additional queries which should be satisfied. As opposed to :attr:`must`, you can specify the number of queries in this field which must be satisfied. The type of this attribute is :class:`DisjunctionQuery`, and you can set the minimum number of queries to satisfy using:: boolquery.should.min = 1 """) @property def encodable(self): # Overrides the default `encodable` implementation in order to # serialize any sub-queries for src, tgt in ((self.must, 'must'), (self.must_not, 'must_not'), (self.should, 'should')): if src: self._json_[tgt] = src.encodable return super(BooleanQuery, self).encodable
[docs] def validate(self): super(BooleanQuery, self).validate() if not self.must and not self.must_not and not self.should: raise ValueError('No sub-queries specified', self)
[docs]class MatchAllQuery(SearchQuery): """ Special query which matches all documents """ def __init__(self, **kwargs): super(MatchAllQuery, self).__init__() self._json_['match_all'] = None _assign_kwargs(self, kwargs)
[docs]class MatchNoneQuery(SearchQuery): """ Special query which matches no documents """ def __init__(self, **kwargs): super(MatchNoneQuery, self).__init__() self._json_['match_none'] = None _assign_kwargs(self, kwargs)
[docs]@_with_fields('field') class BooleanFieldQuery(_SingleQuery): _TERMPROP = 'bool' bool = _genprop(bool, 'bool', doc='Boolean value to search for')
class SearchException(CouchbaseException): """ Error during server execution """ class NoChildrenException(CouchbaseException): """ Compound query is missing children" """ def __init__(self, msg='No child queries'): super(NoChildrenException, self).__init__({'message': msg}) def _make_search_body(index, query, params=None): """ Generates a dictionary suitable for encoding as the search body :param index: The index name to query :param query: The query itself :param params: Modifiers for the query :type params: :class:`couchbase.search._Params` :return: A dictionary suitable for serialization """ dd = {} if not isinstance(query, SearchQuery): query = QueryStringQuery(query) dd['query'] = query.encodable if params: dd.update(params.as_encodable(index)) dd['indexName'] = index return dd class SearchRequest(object): """ Object representing an active query on the cluster. .. warning:: You should call :cb_bmeth:`search` which will return an instance of this class. Do *not* invoke the constructor directly. You can iterate over this object (i.e. ``__iter__``) to receive the actual search results. """ def __init__(self, body, parent, row_factory=lambda x: x): """ :param str body: serialized JSON string :param Client parent: """ self._body = _to_json(body) self._parent = parent self.row_factory = row_factory self.errors = [] self._mres = None self._do_iter = True self.__raw = False self.__meta_received = False @classmethod def mk_kwargs(cls, kwargs): """ Pop recognized arguments from a keyword list. """ ret = {} kws = ['row_factory', 'body', 'parent'] for k in kws: if k in kwargs: ret[k] = kwargs.pop(k) return ret def _start(self): if self._mres: return self._mres = self._parent._fts_query(self._body) self.__raw = self._mres[None] @property def raw(self): return self.__raw def execute(self): """ Use this convenience method if you are not actually reading the search hits, for example if you are only using :attr:`facets`. Equivalent to:: def execute(self): [x for x in self] return self :return: :class:`SearchRequest` (self) """ for _ in self: pass return self @property def meta(self): """ Get metadata from the query itself. This is guaranteed to only return a Python dictionary. Note that if the query failed, the metadata might not be in JSON format, in which case there may be additional, non-JSON data which can be retrieved using the following :: raw_meta = req.raw.value :return: A dictionary containing the query metadata """ if not self.__meta_received: raise RuntimeError( 'This property only valid once all rows are received!') if isinstance(self.raw.value, dict): return self.raw.value return {} @property def total_hits(self): """ The total number of hits that match the query. This may be greater than the number of hits actually returned, as it is subject to the :attr:`Params.limit` parameter """ return self.meta['total_hits'] @property def took(self): """ The length of time the query took to execute """ return self.meta['took'] @property def max_score(self): return self.meta['max_score'] @property def facets(self): return self.meta['facets'] def _clear(self): del self._parent del self._mres def _handle_meta(self, value): self.__meta_received = True if not isinstance(value, dict): return if 'errors' in value: for err in value['errors']: raise SearchException.pyexc('N1QL Execution failed', err) def _process_payload(self, rows): if rows: return [self.row_factory(row) for row in rows] elif self.raw.done: self._handle_meta(self.raw.value) self._do_iter = False return [] else: # We can only get here if another concurrent query broke out the # event loop before we did. return [] def __iter__(self): if not self._do_iter: raise AlreadyQueriedException() self._start() while self._do_iter: raw_rows = self.raw.fetch(self._mres) actual_rows=list(raw_rows) for row in self._process_payload(actual_rows): yield row def __repr__(self): return ( '<{0.__class__.__name__} body={0._body!r} response={1}>'.format( self, self.raw.value if self.raw else '<PENDING>')) @attr.s class SearchRowLocation(object): field = attr.attr(type=str) term = attr.attr(type=str) position = attr.attr(type=UnsignedInt32) start = attr.attr(type=UnsignedInt32) end = attr.attr(type=UnsignedInt32) array_positions = attr.attr(factory=list, type=List[UnsignedInt32]) class SearchRowFields(Dict[str, Any]): def __init__(self, *args, **kwargs): if kwargs: pass super(SearchRowFields, self).__init__(*args, **kwargs) class SearchRowLocations(object): def __init__(self, **orig_data): self._real_data = orig_data def get_all(self): # type: (...) -> List[SearchRowLocation] """list all locations (any field, any term)""" results = [] for field, terms in self._real_data.items(): for term, entries in terms.items(): results.extend(self.get(field, term)) return results # list all locations for a given field (any term) def get(self, field, # type: str term # type: str ): # type: (...) -> List[SearchRowLocation] """List all locations for a given field and term""" entries_for_field = self._real_data.get(field, dict()) return [SearchRowLocation(field, term, v['pos'], v['start'], v['end'], v['array_positions']) for v in self._real_data[field][term]] def fields(self): # type: (...) -> List[str] """ :return: the fields in this location """ return self._real_data.keys() def terms(self): # type: (...) -> Set[str] """ List all terms in this locations, considering all fields (so a set): """ result = set() for field in self._real_data.values(): result.update(field.keys()) return result def terms_for(self, field # type:str ): # type: (...) -> list[str] """ list the terms for a given field """ return list(self._real_data[field].keys()) @attr.s class SearchRow(object): """A single entry of search results. The server calls them "hits", and represents as a JSON object. The following interface describes the contents of the result row.""" index = attr.ib(type=str) id = attr.ib(type=str) score = attr.ib(type=float) explanation = attr.ib(factory=dict, type=JSON) locations = attr.ib(factory=SearchRowLocations, type=SearchRowLocations) # type: SearchRowLocations fragments = attr.ib(factory=dict, type=Optional[Mapping[str, str]]) fields = attr.ib(default=attr.Factory(SearchRowFields), type=SearchRowFields) @attr.s class SearchFacetResult(object): """ An individual facet result has both metadata and details, as each facet can define ranges into which results are categorized.""" name = attr.attr(type=str) field = attr.attr(type=str) total = attr.attr(type=UnsignedInt64) missing = attr.attr(type=UnsignedInt64) other = attr.attr(type=UnsignedInt64) """ If top-level "error" property exists, then SDK should build and throw CouchbaseException with its content.""" class SearchMetrics(object): def __init__(self, raw_data # type: JSON ): self._raw_data = raw_data @property def _status(self): # type: (...) -> Dict[str,int] return self._raw_data.get('status') @property def success_partition_count(self): # type: (...) -> int return self._status.get('successful') @property def error_partition_count(self): # type: (...) -> int return self._status.get('failed') @property def took(self): # type: (...) -> timedelta return timedelta(microseconds=self._raw_data.get('took')) @property def total_partition_count(self): # type: (...) -> int return self._status.get('total') @property def max_score(self): # type: (...) -> float return self._raw_data.get('max_score') @property def total_rows(self): return self._raw_data.get('total_hits') class HighlightStyle(Enum): """ HighlightStyle Can be either: Ansi Need Example Html Need Example """ Ansi = 'ansi' Html = 'html' class SearchMetaData(object): """Represents the meta-data returned along with a search query result.""" def __init__(self, **raw_json): self.metrics = SearchMetrics(raw_json) self.errors = raw_json class SearchResultBase(object): @internal def __init__(self, *args, row_factory=None, **kwargs # type: SearchRequest ): """ The SearchResult interface provides a means of mapping the results of a Search query into an object. The description and details on the fields can be found in the Couchbase Full Text Search Index Query (FTS) RFC. """ super(SearchResultBase, self).__init__(*args, row_factory=(row_factory or self._row_factory), **kwargs) @staticmethod def _row_factory(orig_value # type: Dict[str, Any] ): # type: (...) -> SearchRow return SearchRow(orig_value.pop('index'), orig_value.pop('id'), orig_value.pop('score'), locations=SearchRowLocations(**orig_value.pop('locations', {})), **{k: orig_value[k] for k in (attr.fields(SearchRow) & orig_value.keys())}) def facets(self): # type: (...) -> Dict[str, SearchFacetResult] return {k: SearchFacetResult(k, v.pop('field'), v.pop('total'), v.pop('missing'), v.pop('other')) for k, v in super(SearchResultBase, self).facets.items()} def metadata(self): # type: (...) -> SearchMetaData return SearchMetaData(**super(SearchResultBase, self).meta) @classmethod def mk_kwargs(cls, kwargs): return SearchRequest.mk_kwargs(kwargs) class SearchResult(SearchResultBase, iterable_wrapper(SearchRequest)): pass SearchParams = NamedTuple('SearchParams', [('body', JSON), ('iterargs', Dict[str, Any]), ('itercls', Type[SearchResult])])
[docs]class SearchOptions(OptionBlockTimeOut): @overload def __init__(self, timeout=None, # type: timedelta limit=None, # type: int skip=None, # type: int explain=None, # type: bool fields=None, # type: List[str] highlight_style=None, # type: HighlightStyle highlight_fields=None, # type: List[str] scan_consistency=None, # type: cluster.QueryScanConsistency consistent_with=None, # type: MutationState facets=None, # type: Dict[str, Facet] raw=None, # type: JSON sort=None # type: List[str] ): pass def __init__(self, **kwargs # type: Any ): """ Search Options These options apply to a Full Text Search query. :param timedelta timeout: Timeout to use for this query. If not set, the default cluster-wide timeout is used :meth:`~.Cluster.search_timeout` :param int limit: Limit the results returned. :param int skip: Skip the first N results. :param bool explain: Include an explaination of the search result scores. :param Iterable[str] fields: List of fields to return, if they exist on the document. :param HighlightStyle highlight_style: Style to render the highlights. See :class:`~.HighlighStyle` for details. :param Iterable[str] highlight_fields: Fields to highlight. If this is not specified, all fields returned are highlighted. :param QueryScanConsistency scan_consistency: Scan Consistency to use for this query. See :class:`~.QueryScanConsistency` for details. :param MutationState consistent_with: Specify a consistency using :class:`~.MutationState`. :param Iterable[str,Facet] facets: Specify a set of :class:`~.Facet` objects that aggregate the result data. :param dict[str,JSON] raw: A way to support unknown commands, and be future-compatible. :param Iterable[Sort] sort: List of various :class:`~.Sort` objects to sort the results. """ # convert highlight_style to str if it is present... style = kwargs.get('highlight_style', None) if style: kwargs['highlight_style'] = style.value sort = kwargs.get('sort', None) if sort: kwargs['sort'] = SortString(*sort) super(SearchOptions, self).__init__(**kwargs) @classmethod def gen_search_params_cls(cls, index, query, *options, **kwargs): # type: (...) -> SearchParams iterargs, itercls, params = cls._gen_params_kwargs_options(*options, **kwargs) return SearchParams(_make_search_body(index, query, params), iterargs, itercls) SearchParamsInternal = NamedTuple('SearchParamsInternal', [('iterargs',Dict[str,Any]), ('itercls', Type[SearchResult]), ('params', _Params)]) @classmethod def _gen_params_kwargs_options(cls, *options, **kwargs): itercls = kwargs.pop('itercls', SearchResult) final_args = forward_args(kwargs, *options) iterargs = itercls.mk_kwargs(final_args) params = cls._gen_params_from_final_args(final_args) return SearchOptions.SearchParamsInternal(iterargs, itercls, params) @classmethod def _gen_params_from_final_args(cls, final_args): consistent_with = final_args.pop('consistent_with', None) params = final_args.pop('params', _Params(**final_args)) # type: _Params if consistent_with: params.consistent_with(consistent_with) return params def _gen_params(self): # for testing purposes return self._gen_params_kwargs_options(self).params def _gen_search_params(self, index, query): # for testing purposes return self.gen_search_params_cls(index, query, self) def as_encodable(self, index): final_args = forward_args(None, self) return self._gen_params_from_final_args(final_args).as_encodable(index)