Added async support for helpers that are merged from opensearch-dsl-py (#329)

Signed-off-by: saimedhi <saimedhi@amazon.com>
This commit is contained in:
Sai Medhini Reddy Maryada
2023-03-17 12:03:46 -07:00
committed by GitHub
parent 409c883013
commit 2b9aee73a5
40 changed files with 7226 additions and 63 deletions
+1
View File
@@ -3,6 +3,7 @@ Inspired from [Keep a Changelog](https://keepachangelog.com/en/1.0.0/)
## [Unreleased]
### Added
- Added async support for helpers that are merged from opensearch-dsl-py ([#329](https://github.com/opensearch-project/opensearch-py/pull/329))
### Changed
### Deprecated
### Removed
+8
View File
@@ -0,0 +1,8 @@
# SPDX-License-Identifier: Apache-2.0
#
# The OpenSearch Contributors require contributions made to
# this file be licensed under the Apache-2.0 license or a
# compatible open source license.
#
# Modifications Copyright OpenSearch Contributors. See
# GitHub history for details.
@@ -32,15 +32,15 @@
import asyncio
import logging
from ..compat import map
from ..exceptions import TransportError
from ..helpers.actions import (
from ...compat import map
from ...exceptions import TransportError
from ...helpers.actions import (
_ActionChunker,
_process_bulk_chunk_error,
_process_bulk_chunk_success,
expand_action,
)
from ..helpers.errors import ScanError
from ...helpers.errors import ScanError
logger = logging.getLogger("opensearchpy.helpers")
@@ -41,8 +41,8 @@ from typing import (
Union,
)
from ..serializer import Serializer
from .client import AsyncOpenSearch
from ...serializer import Serializer
from ..client import AsyncOpenSearch
logger: logging.Logger
+437
View File
@@ -0,0 +1,437 @@
# SPDX-License-Identifier: Apache-2.0
#
# The OpenSearch Contributors require contributions made to
# this file be licensed under the Apache-2.0 license or a
# compatible open source license.
#
# Modifications Copyright OpenSearch Contributors. See
# GitHub history for details.
try:
import collections.abc as collections_abc # only works on python 3.3+
except ImportError:
import collections as collections_abc
from fnmatch import fnmatch
from six import add_metaclass
from opensearchpy._async.helpers.index import AsyncIndex
from opensearchpy._async.helpers.search import AsyncSearch
from opensearchpy.connection.async_connections import get_connection
from opensearchpy.exceptions import (
IllegalOperation,
NotFoundError,
RequestError,
ValidationException,
)
from opensearchpy.helpers.document import DocumentMeta
from opensearchpy.helpers.utils import DOC_META_FIELDS, META_FIELDS, ObjectBase, merge
class AsyncIndexMeta(DocumentMeta):
# global flag to guard us from associating an Index with the base Document
# class, only user defined subclasses should have an _index attr
_document_initialized = False
def __new__(cls, name, bases, attrs):
new_cls = super(AsyncIndexMeta, cls).__new__(cls, name, bases, attrs)
if cls._document_initialized:
index_opts = attrs.pop("Index", None)
index = cls.construct_index(index_opts, bases)
new_cls._index = index
index.document(new_cls)
cls._document_initialized = True
return new_cls
@classmethod
def construct_index(cls, opts, bases):
if opts is None:
for b in bases:
if hasattr(b, "_index"):
return b._index
# Set None as Index name so it will set _all while making the query
return AsyncIndex(name=None)
i = AsyncIndex(
getattr(opts, "name", "*"), using=getattr(opts, "using", "default")
)
i.settings(**getattr(opts, "settings", {}))
i.aliases(**getattr(opts, "aliases", {}))
for a in getattr(opts, "analyzers", ()):
i.analyzer(a)
return i
@add_metaclass(AsyncIndexMeta)
class AsyncDocument(ObjectBase):
"""
Model-like class for persisting documents in opensearch.
"""
@classmethod
def _matches(cls, hit):
if cls._index._name is None:
return True
return fnmatch(hit.get("_index", ""), cls._index._name)
@classmethod
def _get_using(cls, using=None):
return using or cls._index._using
@classmethod
async def _get_connection(cls, using=None):
return await get_connection(cls._get_using(using))
@classmethod
def _default_index(cls, index=None):
return index or cls._index._name
@classmethod
async def init(cls, index=None, using=None):
"""
Create the index and populate the mappings in opensearch.
"""
i = cls._index
if index:
i = i.clone(name=index)
await i.save(using=using)
def _get_index(self, index=None, required=True):
if index is None:
index = getattr(self.meta, "index", None)
if index is None:
index = getattr(self._index, "_name", None)
if index is None and required:
raise ValidationException("No index")
if index and "*" in index:
raise ValidationException("You cannot write to a wildcard index.")
return index
def __repr__(self):
return "{}({})".format(
self.__class__.__name__,
", ".join(
"{}={!r}".format(key, getattr(self.meta, key))
for key in ("index", "id")
if key in self.meta
),
)
@classmethod
def search(cls, using=None, index=None):
"""
Create an :class:`~opensearchpy.AsyncSearch` instance that will search
over this ``Document``.
"""
return AsyncSearch(
using=cls._get_using(using), index=cls._default_index(index), doc_type=[cls]
)
@classmethod
async def get(cls, id, using=None, index=None, **kwargs):
"""
Retrieve a single document from opensearch using its ``id``.
:arg id: ``id`` of the document to be retrieved
:arg index: opensearch index to use, if the ``Document`` is
associated with an index this can be omitted.
:arg using: connection alias to use, defaults to ``'default'``
Any additional keyword arguments will be passed to
``AsyncOpenSearch.get`` unchanged.
"""
opensearch = await cls._get_connection(using)
doc = await opensearch.get(index=cls._default_index(index), id=id, **kwargs)
if not doc.get("found", False):
return None
return cls.from_opensearch(doc)
@classmethod
async def exists(cls, id, using=None, index=None, **kwargs):
"""
check if exists a single document from opensearch using its ``id``.
:arg id: ``id`` of the document to check if exists
:arg index: opensearch index to use, if the ``Document`` is
associated with an index this can be omitted.
:arg using: connection alias to use, defaults to ``'default'``
Any additional keyword arguments will be passed to
``AsyncOpenSearch.exists`` unchanged.
"""
opensearch = await cls._get_connection(using)
return await opensearch.exists(index=cls._default_index(index), id=id, **kwargs)
@classmethod
async def mget(
cls, docs, using=None, index=None, raise_on_error=True, missing="none", **kwargs
):
r"""
Retrieve multiple document by their ``id``\s. Returns a list of instances
in the same order as requested.
:arg docs: list of ``id``\s of the documents to be retrieved or a list
of document specifications as per
https://opensearch.org/docs/latest/opensearch/rest-api/document-apis/multi-get/
:arg index: opensearch index to use, if the ``Document`` is
associated with an index this can be omitted.
:arg using: connection alias to use, defaults to ``'default'``
:arg missing: what to do when one of the documents requested is not
found. Valid options are ``'none'`` (use ``None``), ``'raise'`` (raise
``NotFoundError``) or ``'skip'`` (ignore the missing document).
Any additional keyword arguments will be passed to
``AsyncOpenSearch.mget`` unchanged.
"""
if missing not in ("raise", "skip", "none"):
raise ValueError("'missing' must be 'raise', 'skip', or 'none'.")
opensearch = await cls._get_connection(using)
body = {
"docs": [
doc if isinstance(doc, collections_abc.Mapping) else {"_id": doc}
for doc in docs
]
}
results = await opensearch.mget(body, index=cls._default_index(index), **kwargs)
objs, error_docs, missing_docs = [], [], []
for doc in results["docs"]:
if doc.get("found"):
if error_docs or missing_docs:
# We're going to raise an exception anyway, so avoid an
# expensive call to cls.from_opensearch().
continue
objs.append(cls.from_opensearch(doc))
elif doc.get("error"):
if raise_on_error:
error_docs.append(doc)
if missing == "none":
objs.append(None)
# The doc didn't cause an error, but the doc also wasn't found.
elif missing == "raise":
missing_docs.append(doc)
elif missing == "none":
objs.append(None)
if error_docs:
error_ids = [doc["_id"] for doc in error_docs]
message = "Required routing not provided for documents %s."
message %= ", ".join(error_ids)
raise RequestError(400, message, error_docs)
if missing_docs:
missing_ids = [doc["_id"] for doc in missing_docs]
message = "Documents %s not found." % ", ".join(missing_ids)
raise NotFoundError(404, message, {"docs": missing_docs})
return objs
async def delete(self, using=None, index=None, **kwargs):
"""
Delete the instance in opensearch.
:arg index: opensearch index to use, if the ``Document`` is
associated with an index this can be omitted.
:arg using: connection alias to use, defaults to ``'default'``
Any additional keyword arguments will be passed to
``AsyncOpenSearch.delete`` unchanged.
"""
opensearch = await self._get_connection(using)
# extract routing etc from meta
doc_meta = {k: self.meta[k] for k in DOC_META_FIELDS if k in self.meta}
# Optimistic concurrency control
if "seq_no" in self.meta and "primary_term" in self.meta:
doc_meta["if_seq_no"] = self.meta["seq_no"]
doc_meta["if_primary_term"] = self.meta["primary_term"]
doc_meta.update(kwargs)
await opensearch.delete(index=self._get_index(index), **doc_meta)
def to_dict(self, include_meta=False, skip_empty=True):
"""
Serialize the instance into a dictionary so that it can be saved in opensearch.
:arg include_meta: if set to ``True`` will include all the metadata
(``_index``, ``_id`` etc). Otherwise just the document's
data is serialized.
:arg skip_empty: if set to ``False`` will cause empty values (``None``,
``[]``, ``{}``) to be left on the document. Those values will be
stripped out otherwise as they make no difference in opensearch.
"""
d = super(AsyncDocument, self).to_dict(skip_empty=skip_empty)
if not include_meta:
return d
meta = {"_" + k: self.meta[k] for k in DOC_META_FIELDS if k in self.meta}
# in case of to_dict include the index unlike save/update/delete
index = self._get_index(required=False)
if index is not None:
meta["_index"] = index
meta["_source"] = d
return meta
async def update(
self,
using=None,
index=None,
detect_noop=True,
doc_as_upsert=False,
refresh=False,
retry_on_conflict=None,
script=None,
script_id=None,
scripted_upsert=False,
upsert=None,
return_doc_meta=False,
**fields
):
"""
Partial update of the document, specify fields you wish to update and
both the instance and the document in opensearch will be updated::
doc = MyDocument(title='Document Title!')
doc.save()
doc.update(title='New Document Title!')
:arg index: opensearch index to use, if the ``Document`` is
associated with an index this can be omitted.
:arg using: connection alias to use, defaults to ``'default'``
:arg detect_noop: Set to ``False`` to disable noop detection.
:arg refresh: Control when the changes made by this request are visible
to search. Set to ``True`` for immediate effect.
:arg retry_on_conflict: In between the get and indexing phases of the
update, it is possible that another process might have already
updated the same document. By default, the update will fail with a
version conflict exception. The retry_on_conflict parameter
controls how many times to retry the update before finally throwing
an exception.
:arg doc_as_upsert: Instead of sending a partial doc plus an upsert
doc, setting doc_as_upsert to true will use the contents of doc as
the upsert value
:arg return_doc_meta: set to ``True`` to return all metadata from the
index API call instead of only the operation result
:return operation result noop/updated
"""
body = {
"doc_as_upsert": doc_as_upsert,
"detect_noop": detect_noop,
}
# scripted update
if script or script_id:
if upsert is not None:
body["upsert"] = upsert
if script:
script = {"source": script}
else:
script = {"id": script_id}
script["params"] = fields
body["script"] = script
body["scripted_upsert"] = scripted_upsert
# partial document update
else:
if not fields:
raise IllegalOperation(
"You cannot call update() without updating individual fields or a script. "
"If you wish to update the entire object use save()."
)
# update given fields locally
merge(self, fields)
# prepare data for OpenSearch
values = self.to_dict()
# if fields were given: partial update
body["doc"] = {k: values.get(k) for k in fields.keys()}
# extract routing etc from meta
doc_meta = {k: self.meta[k] for k in DOC_META_FIELDS if k in self.meta}
if retry_on_conflict is not None:
doc_meta["retry_on_conflict"] = retry_on_conflict
# Optimistic concurrency control
if (
retry_on_conflict in (None, 0)
and "seq_no" in self.meta
and "primary_term" in self.meta
):
doc_meta["if_seq_no"] = self.meta["seq_no"]
doc_meta["if_primary_term"] = self.meta["primary_term"]
meta = await (await self._get_connection(using)).update(
index=self._get_index(index), body=body, refresh=refresh, **doc_meta
)
# update meta information from OpenSearch
for k in META_FIELDS:
if "_" + k in meta:
setattr(self.meta, k, meta["_" + k])
return meta if return_doc_meta else meta["result"]
async def save(
self,
using=None,
index=None,
validate=True,
skip_empty=True,
return_doc_meta=False,
**kwargs
):
"""
Save the document into opensearch. If the document doesn't exist it
is created, it is overwritten otherwise. Returns ``True`` if this
operations resulted in new document being created.
:arg index: opensearch index to use, if the ``Document`` is
associated with an index this can be omitted.
:arg using: connection alias to use, defaults to ``'default'``
:arg validate: set to ``False`` to skip validating the document
:arg skip_empty: if set to ``False`` will cause empty values (``None``,
``[]``, ``{}``) to be left on the document. Those values will be
stripped out otherwise as they make no difference in opensearch.
:arg return_doc_meta: set to ``True`` to return all metadata from the
update API call instead of only the operation result
Any additional keyword arguments will be passed to
``AsyncOpenSearch.index`` unchanged.
:return operation result created/updated
"""
if validate:
self.full_clean()
opensearch = await self._get_connection(using)
# extract routing etc from meta
doc_meta = {k: self.meta[k] for k in DOC_META_FIELDS if k in self.meta}
# Optimistic concurrency control
if "seq_no" in self.meta and "primary_term" in self.meta:
doc_meta["if_seq_no"] = self.meta["seq_no"]
doc_meta["if_primary_term"] = self.meta["primary_term"]
doc_meta.update(kwargs)
meta = await opensearch.index(
index=self._get_index(index),
body=self.to_dict(skip_empty=skip_empty),
**doc_meta
)
# update meta information from OpenSearch
for k in META_FIELDS:
if "_" + k in meta:
setattr(self.meta, k, meta["_" + k])
return meta if return_doc_meta else meta["result"]
+14
View File
@@ -0,0 +1,14 @@
# SPDX-License-Identifier: Apache-2.0
#
# The OpenSearch Contributors require contributions made to
# this file be licensed under the Apache-2.0 license or a
# compatible open source license.
#
# Modifications Copyright OpenSearch Contributors. See
# GitHub history for details.
from opensearchpy.helpers.document import DocumentMeta
from opensearchpy.helpers.utils import ObjectBase
class AsyncIndexMeta(DocumentMeta): ...
class AsyncDocument(ObjectBase): ...
@@ -0,0 +1,200 @@
# SPDX-License-Identifier: Apache-2.0
#
# The OpenSearch Contributors require contributions made to
# this file be licensed under the Apache-2.0 license or a
# compatible open source license.
#
# Modifications Copyright OpenSearch Contributors. See
# GitHub history for details.
from six import iteritems, itervalues
from opensearchpy._async.helpers.search import AsyncSearch
from opensearchpy.helpers.faceted_search import FacetedResponse
from opensearchpy.helpers.query import MatchAll
class AsyncFacetedSearch(object):
"""
Abstraction for creating faceted navigation searches that takes care of
composing the queries, aggregations and filters as needed as well as
presenting the results in an easy-to-consume fashion::
class BlogSearch(AsyncFacetedSearch):
index = 'blogs'
doc_types = [Blog, Post]
fields = ['title^5', 'category', 'description', 'body']
facets = {
'type': TermsFacet(field='_type'),
'category': TermsFacet(field='category'),
'weekly_posts': DateHistogramFacet(field='published_from', interval='week')
}
def search(self):
' Override search to add your own filters '
s = super(BlogSearch, self).search()
return s.filter('term', published=True)
# when using:
blog_search = BlogSearch("web framework", filters={"category": "python"})
# supports pagination
blog_search[10:20]
response = await blog_search.execute()
# easy access to aggregation results:
for category, hit_count, is_selected in response.facets.category:
print(
"Category %s has %d hits%s." % (
category,
hit_count,
' and is chosen' if is_selected else ''
)
)
"""
index = None
doc_types = None
fields = None
facets = {}
using = "default"
def __init__(self, query=None, filters={}, sort=()):
"""
:arg query: the text to search for
:arg filters: facet values to filter
:arg sort: sort information to be passed to :class:`~opensearchpy.AsyncSearch`
"""
self._query = query
self._filters = {}
self._sort = sort
self.filter_values = {}
for name, value in iteritems(filters):
self.add_filter(name, value)
self._s = self.build_search()
async def count(self):
return await self._s.count()
def __getitem__(self, k):
self._s = self._s[k]
return self
def __iter__(self):
return iter(self._s)
def add_filter(self, name, filter_values):
"""
Add a filter for a facet.
"""
# normalize the value into a list
if not isinstance(filter_values, (tuple, list)):
if filter_values is None:
return
filter_values = [
filter_values,
]
# remember the filter values for use in FacetedResponse
self.filter_values[name] = filter_values
# get the filter from the facet
f = self.facets[name].add_filter(filter_values)
if f is None:
return
self._filters[name] = f
def search(self):
"""
Returns the base Search object to which the facets are added.
You can customize the query by overriding this method and returning a
modified search object.
"""
s = AsyncSearch(doc_type=self.doc_types, index=self.index, using=self.using)
return s.response_class(FacetedResponse)
def query(self, search, query):
"""
Add query part to ``search``.
Override this if you wish to customize the query used.
"""
if query:
if self.fields:
return search.query("multi_match", fields=self.fields, query=query)
else:
return search.query("multi_match", query=query)
return search
def aggregate(self, search):
"""
Add aggregations representing the facets selected, including potential
filters.
"""
for f, facet in iteritems(self.facets):
agg = facet.get_aggregation()
agg_filter = MatchAll()
for field, filter in iteritems(self._filters):
if f == field:
continue
agg_filter &= filter
search.aggs.bucket("_filter_" + f, "filter", filter=agg_filter).bucket(
f, agg
)
def filter(self, search):
"""
Add a ``post_filter`` to the search request narrowing the results based
on the facet filters.
"""
if not self._filters:
return search
post_filter = MatchAll()
for f in itervalues(self._filters):
post_filter &= f
return search.post_filter(post_filter)
def highlight(self, search):
"""
Add highlighting for all the fields
"""
return search.highlight(
*(f if "^" not in f else f.split("^", 1)[0] for f in self.fields)
)
def sort(self, search):
"""
Add sorting information to the request.
"""
if self._sort:
search = search.sort(*self._sort)
return search
def build_search(self):
"""
Construct the ``AsyncSearch`` object.
"""
s = self.search()
s = self.query(s, self._query)
s = self.filter(s)
if self.fields:
s = self.highlight(s)
s = self.sort(s)
self.aggregate(s)
return s
async def execute(self):
"""
Execute the search and return the response.
"""
r = await self._s.execute()
r._faceted_search = self
return r
@@ -0,0 +1,10 @@
# SPDX-License-Identifier: Apache-2.0
#
# The OpenSearch Contributors require contributions made to
# this file be licensed under the Apache-2.0 license or a
# compatible open source license.
#
# Modifications Copyright OpenSearch Contributors. See
# GitHub history for details.
class AsyncFacetedSearch(object): ...
+652
View File
@@ -0,0 +1,652 @@
# SPDX-License-Identifier: Apache-2.0
#
# The OpenSearch Contributors require contributions made to
# this file be licensed under the Apache-2.0 license or a
# compatible open source license.
#
# Modifications Copyright OpenSearch Contributors. See
# GitHub history for details.
from opensearchpy._async.helpers.mapping import AsyncMapping
from opensearchpy._async.helpers.search import AsyncSearch
from opensearchpy._async.helpers.update_by_query import AsyncUpdateByQuery
from opensearchpy.connection.async_connections import get_connection
from opensearchpy.exceptions import IllegalOperation
from opensearchpy.helpers import analysis
from opensearchpy.helpers.utils import merge
class AsyncIndexTemplate(object):
def __init__(self, name, template, index=None, order=None, **kwargs):
if index is None:
self._index = AsyncIndex(template, **kwargs)
else:
if kwargs:
raise ValueError(
"You cannot specify options for Index when"
" passing an Index instance."
)
self._index = index.clone()
self._index._name = template
self._template_name = name
self.order = order
def __getattr__(self, attr_name):
return getattr(self._index, attr_name)
def to_dict(self):
d = self._index.to_dict()
d["index_patterns"] = [self._index._name]
if self.order is not None:
d["order"] = self.order
return d
async def save(self, using=None):
opensearch = await get_connection(using or self._index._using)
return await opensearch.indices.put_template(
name=self._template_name, body=self.to_dict()
)
class AsyncIndex(object):
def __init__(self, name, using="default"):
"""
:arg name: name of the index
:arg using: connection alias to use, defaults to ``'default'``
"""
self._name = name
self._doc_types = []
self._using = using
self._settings = {}
self._aliases = {}
self._analysis = {}
self._mapping = None
def get_or_create_mapping(self):
if self._mapping is None:
self._mapping = AsyncMapping()
return self._mapping
def as_template(self, template_name, pattern=None, order=None):
# TODO: should we allow pattern to be a top-level arg?
# or maybe have an IndexPattern that allows for it and have
# AsyncDocument._index be that?
return AsyncIndexTemplate(
template_name, pattern or self._name, index=self, order=order
)
def resolve_nested(self, field_path):
for doc in self._doc_types:
nested, field = doc._doc_type.mapping.resolve_nested(field_path)
if field is not None:
return nested, field
if self._mapping:
return self._mapping.resolve_nested(field_path)
return (), None
def resolve_field(self, field_path):
for doc in self._doc_types:
field = doc._doc_type.mapping.resolve_field(field_path)
if field is not None:
return field
if self._mapping:
return self._mapping.resolve_field(field_path)
return None
async def load_mappings(self, using=None):
await self.get_or_create_mapping().update_from_opensearch(
self._name, using=using or self._using
)
def clone(self, name=None, using=None):
"""
Create a copy of the instance with another name or connection alias.
Useful for creating multiple indices with shared configuration::
i = AsyncIndex('base-index')
i.settings(number_of_shards=1)
await i.create()
i2 = i.clone('other-index')
await i2.create()
:arg name: name of the index
:arg using: connection alias to use, defaults to ``'default'``
"""
i = AsyncIndex(name or self._name, using=using or self._using)
i._settings = self._settings.copy()
i._aliases = self._aliases.copy()
i._analysis = self._analysis.copy()
i._doc_types = self._doc_types[:]
if self._mapping is not None:
i._mapping = self._mapping._clone()
return i
async def _get_connection(self, using=None):
if self._name is None:
raise ValueError("You cannot perform API calls on the default index.")
return await get_connection(using or self._using)
connection = property(_get_connection)
def mapping(self, mapping):
"""
Associate a mapping (an instance of
:class:`~opensearchpy.AsyncMapping`) with this index.
This means that, when this index is created, it will contain the
mappings for the document type defined by those mappings.
"""
self.get_or_create_mapping().update(mapping)
def document(self, document):
"""
Associate a :class:`~opensearchpy.AsyncDocument` subclass with an index.
This means that, when this index is created, it will contain the
mappings for the ``AsyncDocument``. If the ``AsyncDocument`` class doesn't have a
default index yet (by defining ``class AsyncIndex``), this instance will be
used. Can be used as a decorator::
i = AsyncIndex('blog')
@i.document
class Post(AsyncDocument):
title = Text()
# create the index, including Post mappings
await i.create()
# .search() will now return a AsyncSearch object that will return
# properly deserialized Post instances
s = i.search()
"""
self._doc_types.append(document)
# If the document index does not have any name, that means the user
# did not set any index already to the document.
# So set this index as document index
if document._index._name is None:
document._index = self
return document
def settings(self, **kwargs):
"""
Add settings to the index::
i = AsyncIndex('i')
i.settings(number_of_shards=1, number_of_replicas=0)
Multiple calls to ``settings`` will merge the keys, later overriding
the earlier.
"""
self._settings.update(kwargs)
return self
def aliases(self, **kwargs):
"""
Add aliases to the index definition::
i = AsyncIndex('blog-v2')
i.aliases(blog={}, published={'filter': Q('term', published=True)})
"""
self._aliases.update(kwargs)
return self
def analyzer(self, *args, **kwargs):
"""
Explicitly add an analyzer to an index. Note that all custom analyzers
defined in mappings will also be created. This is useful for search analyzers.
Example::
from opensearchpy import analyzer, tokenizer
my_analyzer = analyzer('my_analyzer',
tokenizer=tokenizer('trigram', 'nGram', min_gram=3, max_gram=3),
filter=['lowercase']
)
i = AsyncIndex('blog')
i.analyzer(my_analyzer)
"""
analyzer = analysis.analyzer(*args, **kwargs)
d = analyzer.get_analysis_definition()
# empty custom analyzer, probably already defined out of our control
if not d:
return
# merge the definition
merge(self._analysis, d, True)
def to_dict(self):
out = {}
if self._settings:
out["settings"] = self._settings
if self._aliases:
out["aliases"] = self._aliases
mappings = self._mapping.to_dict() if self._mapping else {}
analysis = self._mapping._collect_analysis() if self._mapping else {}
for d in self._doc_types:
mapping = d._doc_type.mapping
merge(mappings, mapping.to_dict(), True)
merge(analysis, mapping._collect_analysis(), True)
if mappings:
out["mappings"] = mappings
if analysis or self._analysis:
merge(analysis, self._analysis)
out.setdefault("settings", {})["analysis"] = analysis
return out
def search(self, using=None):
"""
Return a :class:`~opensearchpy.AsyncSearch` object searching over the
index (or all the indices belonging to this template) and its
``Document``\\s.
"""
return AsyncSearch(
using=using or self._using, index=self._name, doc_type=self._doc_types
)
def updateByQuery(self, using=None):
"""
Return a :class:`~opensearchpy.AsyncUpdateByQuery` object searching over the index
(or all the indices belonging to this template) and updating Documents that match
the search criteria.
For more information, see here:
https://opensearch.org/docs/latest/opensearch/rest-api/document-apis/update-by-query/
"""
return AsyncUpdateByQuery(
using=using or self._using,
index=self._name,
)
async def create(self, using=None, **kwargs):
"""
Creates the index in opensearch.
Any additional keyword arguments will be passed to
``AsyncOpenSearch.indices.create`` unchanged.
"""
return await (await self._get_connection(using)).indices.create(
index=self._name, body=self.to_dict(), **kwargs
)
async def is_closed(self, using=None):
state = await (await self._get_connection(using)).cluster.state(
index=self._name, metric="metadata"
)
return state["metadata"]["indices"][self._name]["state"] == "close"
async def save(self, using=None):
"""
Sync the index definition with opensearch, creating the index if it
doesn't exist and updating its settings and mappings if it does.
Note some settings and mapping changes cannot be done on an open
index (or at all on an existing index) and for those this method will
fail with the underlying exception.
"""
if not await self.exists(using=using):
return await self.create(using=using)
body = self.to_dict()
settings = body.pop("settings", {})
analysis = settings.pop("analysis", None)
current_settings = (await self.get_settings(using=using))[self._name][
"settings"
]["index"]
if analysis:
if await self.is_closed(using=using):
# closed index, update away
settings["analysis"] = analysis
else:
# compare analysis definition, if all analysis objects are
# already defined as requested, skip analysis update and
# proceed, otherwise raise IllegalOperation
existing_analysis = current_settings.get("analysis", {})
if any(
existing_analysis.get(section, {}).get(k, None)
!= analysis[section][k]
for section in analysis
for k in analysis[section]
):
raise IllegalOperation(
"You cannot update analysis configuration on an open index, "
"you need to close index %s first." % self._name
)
# try and update the settings
if settings:
settings = settings.copy()
for k, v in list(settings.items()):
if k in current_settings and current_settings[k] == str(v):
del settings[k]
if settings:
await self.put_settings(using=using, body=settings)
# update the mappings, any conflict in the mappings will result in an
# exception
mappings = body.pop("mappings", {})
if mappings:
await self.put_mapping(using=using, body=mappings)
async def analyze(self, using=None, **kwargs):
"""
Perform the analysis process on a text and return the tokens breakdown
of the text.
Any additional keyword arguments will be passed to
``AsyncOpenSearch.indices.analyze`` unchanged.
"""
return await (await self._get_connection(using)).indices.analyze(
index=self._name, **kwargs
)
async def refresh(self, using=None, **kwargs):
"""
Performs a refresh operation on the index.
Any additional keyword arguments will be passed to
``AsyncOpenSearch.indices.refresh`` unchanged.
"""
return await (await self._get_connection(using)).indices.refresh(
index=self._name, **kwargs
)
async def flush(self, using=None, **kwargs):
"""
Performs a flush operation on the index.
Any additional keyword arguments will be passed to
``AsyncOpenSearch.indices.flush`` unchanged.
"""
return await (await self._get_connection(using)).indices.flush(
index=self._name, **kwargs
)
async def get(self, using=None, **kwargs):
"""
The get index API allows to retrieve information about the index.
Any additional keyword arguments will be passed to
``AsyncOpenSearch.indices.get`` unchanged.
"""
return await (await self._get_connection(using)).indices.get(
index=self._name, **kwargs
)
async def open(self, using=None, **kwargs):
"""
Opens the index in opensearch.
Any additional keyword arguments will be passed to
``AsyncOpenSearch.indices.open`` unchanged.
"""
return await (await self._get_connection(using)).indices.open(
index=self._name, **kwargs
)
async def close(self, using=None, **kwargs):
"""
Closes the index in opensearch.
Any additional keyword arguments will be passed to
``AsyncOpenSearch.indices.close`` unchanged.
"""
return await (await self._get_connection(using)).indices.close(
index=self._name, **kwargs
)
async def delete(self, using=None, **kwargs):
"""
Deletes the index in opensearch.
Any additional keyword arguments will be passed to
``AsyncOpenSearch.indices.delete`` unchanged.
"""
return await (await self._get_connection(using)).indices.delete(
index=self._name, **kwargs
)
async def exists(self, using=None, **kwargs):
"""
Returns ``True`` if the index already exists in opensearch.
Any additional keyword arguments will be passed to
``AsyncOpenSearch.indices.exists`` unchanged.
"""
return await (await self._get_connection(using)).indices.exists(
index=self._name, **kwargs
)
async def put_mapping(self, using=None, **kwargs):
"""
Register specific mapping definition for a specific type.
Any additional keyword arguments will be passed to
``AsyncOpenSearch.indices.put_mapping`` unchanged.
"""
return await (await self._get_connection(using)).indices.put_mapping(
index=self._name, **kwargs
)
async def get_mapping(self, using=None, **kwargs):
"""
Retrieve specific mapping definition for a specific type.
Any additional keyword arguments will be passed to
``AsyncOpenSearch.indices.get_mapping`` unchanged.
"""
return await (await self._get_connection(using)).indices.get_mapping(
index=self._name, **kwargs
)
async def get_field_mapping(self, using=None, **kwargs):
"""
Retrieve mapping definition of a specific field.
Any additional keyword arguments will be passed to
``Async OpenSearch.indices.get_field_mapping`` unchanged.
"""
return await (await self._get_connection(using)).indices.get_field_mapping(
index=self._name, **kwargs
)
async def put_alias(self, using=None, **kwargs):
"""
Create an alias for the index.
Any additional keyword arguments will be passed to
``AsyncOpenSearch.indices.put_alias`` unchanged.
"""
return await (await self._get_connection(using)).indices.put_alias(
index=self._name, **kwargs
)
async def exists_alias(self, using=None, **kwargs):
"""
Return a boolean indicating whether given alias exists for this index.
Any additional keyword arguments will be passed to
``AsyncOpenSearch.indices.exists_alias`` unchanged.
"""
return await (await self._get_connection(using)).indices.exists_alias(
index=self._name, **kwargs
)
async def get_alias(self, using=None, **kwargs):
"""
Retrieve a specified alias.
Any additional keyword arguments will be passed to
``AsyncOpenSearch.indices.get_alias`` unchanged.
"""
return await (await self._get_connection(using)).indices.get_alias(
index=self._name, **kwargs
)
async def delete_alias(self, using=None, **kwargs):
"""
Delete specific alias.
Any additional keyword arguments will be passed to
``AsyncOpenSearch.indices.delete_alias`` unchanged.
"""
return await (await self._get_connection(using)).indices.delete_alias(
index=self._name, **kwargs
)
async def get_settings(self, using=None, **kwargs):
"""
Retrieve settings for the index.
Any additional keyword arguments will be passed to
``AsyncOpenSearch.indices.get_settings`` unchanged.
"""
return await (await self._get_connection(using)).indices.get_settings(
index=self._name, **kwargs
)
async def put_settings(self, using=None, **kwargs):
"""
Change specific index level settings in real time.
Any additional keyword arguments will be passed to
``AsyncOpenSearch.indices.put_settings`` unchanged.
"""
return await (await self._get_connection(using)).indices.put_settings(
index=self._name, **kwargs
)
async def stats(self, using=None, **kwargs):
"""
Retrieve statistics on different operations happening on the index.
Any additional keyword arguments will be passed to
``AsyncOpenSearch.indices.stats`` unchanged.
"""
return await (await self._get_connection(using)).indices.stats(
index=self._name, **kwargs
)
async def segments(self, using=None, **kwargs):
"""
Provide low level segments information that a Lucene index (shard
level) is built with.
Any additional keyword arguments will be passed to
``AsyncOpenSearch.indices.segments`` unchanged.
"""
return await (await self._get_connection(using)).indices.segments(
index=self._name, **kwargs
)
async def validate_query(self, using=None, **kwargs):
"""
Validate a potentially expensive query without executing it.
Any additional keyword arguments will be passed to
``AsyncOpenSearch.indices.validate_query`` unchanged.
"""
return await (await self._get_connection(using)).indices.validate_query(
index=self._name, **kwargs
)
async def clear_cache(self, using=None, **kwargs):
"""
Clear all caches or specific cached associated with the index.
Any additional keyword arguments will be passed to
``AsyncOpenSearch.indices.clear_cache`` unchanged.
"""
return await (await self._get_connection(using)).indices.clear_cache(
index=self._name, **kwargs
)
async def recovery(self, using=None, **kwargs):
"""
The indices recovery API provides insight into on-going shard
recoveries for the index.
Any additional keyword arguments will be passed to
``AsyncOpenSearch.indices.recovery`` unchanged.
"""
return await (await self._get_connection(using)).indices.recovery(
index=self._name, **kwargs
)
async def upgrade(self, using=None, **kwargs):
"""
Upgrade the index to the latest format.
Any additional keyword arguments will be passed to
``AsyncOpenSearch.indices.upgrade`` unchanged.
"""
return await (await self._get_connection(using)).indices.upgrade(
index=self._name, **kwargs
)
async def get_upgrade(self, using=None, **kwargs):
"""
Monitor how much of the index is upgraded.
Any additional keyword arguments will be passed to
``AsyncOpenSearch.indices.get_upgrade`` unchanged.
"""
return await (await self._get_connection(using)).indices.get_upgrade(
index=self._name, **kwargs
)
async def shard_stores(self, using=None, **kwargs):
"""
Provides store information for shard copies of the index. Store
information reports on which nodes shard copies exist, the shard copy
version, indicating how recent they are, and any exceptions encountered
while opening the shard index or from earlier engine failure.
Any additional keyword arguments will be passed to
``AsyncOpenSearch.indices.shard_stores`` unchanged.
"""
return await (await self._get_connection(using)).indices.shard_stores(
index=self._name, **kwargs
)
async def forcemerge(self, using=None, **kwargs):
"""
The force merge API allows to force merging of the index through an
API. The merge relates to the number of segments a Lucene index holds
within each shard. The force merge operation allows to reduce the
number of segments by merging them.
This call will block until the merge is complete. If the http
connection is lost, the request will continue in the background, and
any new requests will block until the previous force merge is complete.
Any additional keyword arguments will be passed to
``AsyncOpenSearch.indices.forcemerge`` unchanged.
"""
return await (await self._get_connection(using)).indices.forcemerge(
index=self._name, **kwargs
)
async def shrink(self, using=None, **kwargs):
"""
The shrink index API allows you to shrink an existing index into a new
index with fewer primary shards. The number of primary shards in the
target index must be a factor of the shards in the source index. For
example an index with 8 primary shards can be shrunk into 4, 2 or 1
primary shards or an index with 15 primary shards can be shrunk into 5,
3 or 1. If the number of shards in the index is a prime number it can
only be shrunk into a single primary shard. Before shrinking, a
(primary or replica) copy of every shard in the index must be present
on the same node.
Any additional keyword arguments will be passed to
``AsyncOpenSearch.indices.shrink`` unchanged.
"""
return await (await self._get_connection(using)).indices.shrink(
index=self._name, **kwargs
)
+11
View File
@@ -0,0 +1,11 @@
# SPDX-License-Identifier: Apache-2.0
#
# The OpenSearch Contributors require contributions made to
# this file be licensed under the Apache-2.0 license or a
# compatible open source license.
#
# Modifications Copyright OpenSearch Contributors. See
# GitHub history for details.
class AsyncIndexTemplate(object): ...
class AsyncIndex(object): ...
+169
View File
@@ -0,0 +1,169 @@
# SPDX-License-Identifier: Apache-2.0
#
# The OpenSearch Contributors require contributions made to
# this file be licensed under the Apache-2.0 license or a
# compatible open source license.
#
# Modifications Copyright OpenSearch Contributors. See
# GitHub history for details.
try:
import collections.abc as collections_abc # only works on python 3.3+
except ImportError:
import collections as collections_abc
from itertools import chain
from six import iteritems
from opensearchpy.connection.async_connections import get_connection
from opensearchpy.helpers.field import Nested, Text
from opensearchpy.helpers.mapping import META_FIELDS, Properties
class AsyncMapping(object):
def __init__(self):
self.properties = Properties()
self._meta = {}
def __repr__(self):
return "Mapping()"
def _clone(self):
m = AsyncMapping()
m.properties._params = self.properties._params.copy()
return m
@classmethod
async def from_opensearch(cls, index, using="default"):
m = cls()
await m.update_from_opensearch(index, using)
return m
def resolve_nested(self, field_path):
field = self
nested = []
parts = field_path.split(".")
for i, step in enumerate(parts):
try:
field = field[step]
except KeyError:
return (), None
if isinstance(field, Nested):
nested.append(".".join(parts[: i + 1]))
return nested, field
def resolve_field(self, field_path):
field = self
for step in field_path.split("."):
try:
field = field[step]
except KeyError:
return
return field
def _collect_analysis(self):
analysis = {}
fields = []
if "_all" in self._meta:
fields.append(Text(**self._meta["_all"]))
for f in chain(fields, self.properties._collect_fields()):
for analyzer_name in (
"analyzer",
"normalizer",
"search_analyzer",
"search_quote_analyzer",
):
if not hasattr(f, analyzer_name):
continue
analyzer = getattr(f, analyzer_name)
d = analyzer.get_analysis_definition()
# empty custom analyzer, probably already defined out of our control
if not d:
continue
# merge the definition
# TODO: conflict detection/resolution
for key in d:
analysis.setdefault(key, {}).update(d[key])
return analysis
async def save(self, index, using="default"):
from opensearchpy._async.helpers.index import AsyncIndex
index = AsyncIndex(index, using=using)
index.mapping(self)
return await index.save()
async def update_from_opensearch(self, index, using="default"):
opensearch = await get_connection(using)
raw = await opensearch.indices.get_mapping(index=index)
_, raw = raw.popitem()
self._update_from_dict(raw["mappings"])
def _update_from_dict(self, raw):
for name, definition in iteritems(raw.get("properties", {})):
self.field(name, definition)
# metadata like _all etc
for name, value in iteritems(raw):
if name != "properties":
if isinstance(value, collections_abc.Mapping):
self.meta(name, **value)
else:
self.meta(name, value)
def update(self, mapping, update_only=False):
for name in mapping:
if update_only and name in self:
# nested and inner objects, merge recursively
if hasattr(self[name], "update"):
# FIXME only merge subfields, not the settings
self[name].update(mapping[name], update_only)
continue
self.field(name, mapping[name])
if update_only:
for name in mapping._meta:
if name not in self._meta:
self._meta[name] = mapping._meta[name]
else:
self._meta.update(mapping._meta)
def __contains__(self, name):
return name in self.properties.properties
def __getitem__(self, name):
return self.properties.properties[name]
def __iter__(self):
return iter(self.properties.properties)
def field(self, *args, **kwargs):
self.properties.field(*args, **kwargs)
return self
def meta(self, name, params=None, **kwargs):
if not name.startswith("_") and name not in META_FIELDS:
name = "_" + name
if params and kwargs:
raise ValueError("Meta configs cannot have both value and a dictionary.")
self._meta[name] = kwargs if params is None else params
return self
def to_dict(self):
meta = self._meta
# hard coded serialization of analyzers in _all
if "_all" in meta:
meta = meta.copy()
_all = meta["_all"] = meta["_all"].copy()
for f in ("analyzer", "search_analyzer", "search_quote_analyzer"):
if hasattr(_all.get(f, None), "to_dict"):
_all[f] = _all[f].to_dict()
meta.update(self.properties.to_dict())
return meta
+10
View File
@@ -0,0 +1,10 @@
# SPDX-License-Identifier: Apache-2.0
#
# The OpenSearch Contributors require contributions made to
# this file be licensed under the Apache-2.0 license or a
# compatible open source license.
#
# Modifications Copyright OpenSearch Contributors. See
# GitHub history for details.
class AsyncMapping(object): ...
+534
View File
@@ -0,0 +1,534 @@
# SPDX-License-Identifier: Apache-2.0
#
# The OpenSearch Contributors require contributions made to
# this file be licensed under the Apache-2.0 license or a
# compatible open source license.
#
# Modifications Copyright OpenSearch Contributors. See
# GitHub history for details.
import copy
from six import iteritems, string_types
from opensearchpy._async.helpers.actions import aiter, async_scan
from opensearchpy.connection.async_connections import get_connection
from opensearchpy.exceptions import IllegalOperation, TransportError
from opensearchpy.helpers.aggs import A
from opensearchpy.helpers.query import Bool, Q
from opensearchpy.helpers.response import Response
from opensearchpy.helpers.search import AggsProxy, ProxyDescriptor, QueryProxy, Request
from opensearchpy.helpers.utils import AttrDict, recursive_to_dict
class AsyncSearch(Request):
query = ProxyDescriptor("query")
post_filter = ProxyDescriptor("post_filter")
def __init__(self, **kwargs):
"""
Search request to opensearch.
:arg using: `AsyncOpenSearch` instance to use
:arg index: limit the search to index
:arg doc_type: only query this type.
All the parameters supplied (or omitted) at creation type can be later
overridden by methods (`using`, `index` and `doc_type` respectively).
"""
super(AsyncSearch, self).__init__(**kwargs)
self.aggs = AggsProxy(self)
self._sort = []
self._source = None
self._highlight = {}
self._highlight_opts = {}
self._suggest = {}
self._script_fields = {}
self._response_class = Response
self._query_proxy = QueryProxy(self, "query")
self._post_filter_proxy = QueryProxy(self, "post_filter")
def filter(self, *args, **kwargs):
return self.query(Bool(filter=[Q(*args, **kwargs)]))
def exclude(self, *args, **kwargs):
return self.query(Bool(filter=[~Q(*args, **kwargs)]))
def __getitem__(self, n):
"""
Support slicing the `AsyncSearch` instance for pagination.
Slicing equates to the from/size parameters. E.g.::
s = AsyncSearch().query(...)[0:25]
is equivalent to::
s = AsyncSearch().query(...).extra(from_=0, size=25)
"""
s = self._clone()
if isinstance(n, slice):
# If negative slicing, abort.
if n.start and n.start < 0 or n.stop and n.stop < 0:
raise ValueError("AsyncSearch does not support negative slicing.")
# OpenSearch won't get all results so we default to size: 10 if
# stop not given.
s._extra["from"] = n.start or 0
s._extra["size"] = max(
0, n.stop - (n.start or 0) if n.stop is not None else 10
)
return s
else: # This is an index lookup, equivalent to slicing by [n:n+1].
# If negative index, abort.
if n < 0:
raise ValueError("AsyncSearch does not support negative indexing.")
s._extra["from"] = n
s._extra["size"] = 1
return s
@classmethod
def from_dict(cls, d):
"""
Construct a new `AsyncSearch` instance from a raw dict containing the search
body. Useful when migrating from raw dictionaries.
Example::
s = AsyncSearch.from_dict({
"query": {
"bool": {
"must": [...]
}
},
"aggs": {...}
})
s = s.filter('term', published=True)
"""
s = cls()
s.update_from_dict(d)
return s
def _clone(self):
"""
Return a clone of the current search request. Performs a shallow copy
of all the underlying objects. Used internally by most state modifying
APIs.
"""
s = super(AsyncSearch, self)._clone()
s._response_class = self._response_class
s._sort = self._sort[:]
s._source = copy.copy(self._source) if self._source is not None else None
s._highlight = self._highlight.copy()
s._highlight_opts = self._highlight_opts.copy()
s._suggest = self._suggest.copy()
s._script_fields = self._script_fields.copy()
for x in ("query", "post_filter"):
getattr(s, x)._proxied = getattr(self, x)._proxied
# copy top-level bucket definitions
if self.aggs._params.get("aggs"):
s.aggs._params = {"aggs": self.aggs._params["aggs"].copy()}
return s
def response_class(self, cls):
"""
Override the default wrapper used for the response.
"""
s = self._clone()
s._response_class = cls
return s
def update_from_dict(self, d):
"""
Apply options from a serialized body to the current instance. Modifies
the object in-place. Used mostly by ``from_dict``.
"""
d = d.copy()
if "query" in d:
self.query._proxied = Q(d.pop("query"))
if "post_filter" in d:
self.post_filter._proxied = Q(d.pop("post_filter"))
aggs = d.pop("aggs", d.pop("aggregations", {}))
if aggs:
self.aggs._params = {
"aggs": {name: A(value) for (name, value) in iteritems(aggs)}
}
if "sort" in d:
self._sort = d.pop("sort")
if "_source" in d:
self._source = d.pop("_source")
if "highlight" in d:
high = d.pop("highlight").copy()
self._highlight = high.pop("fields")
self._highlight_opts = high
if "suggest" in d:
self._suggest = d.pop("suggest")
if "text" in self._suggest:
text = self._suggest.pop("text")
for s in self._suggest.values():
s.setdefault("text", text)
if "script_fields" in d:
self._script_fields = d.pop("script_fields")
self._extra.update(d)
return self
def script_fields(self, **kwargs):
"""
Define script fields to be calculated on hits.
Example::
s = AsyncSearch()
s = s.script_fields(times_two="doc['field'].value * 2")
s = s.script_fields(
times_three={
'script': {
'lang': 'painless',
'source': "doc['field'].value * params.n",
'params': {'n': 3}
}
}
)
"""
s = self._clone()
for name in kwargs:
if isinstance(kwargs[name], string_types):
kwargs[name] = {"script": kwargs[name]}
s._script_fields.update(kwargs)
return s
def source(self, fields=None, **kwargs):
"""
Selectively control how the _source field is returned.
:arg fields: wildcard string, array of wildcards, or dictionary of includes and excludes
If ``fields`` is None, the entire document will be returned for
each hit. If fields is a dictionary with keys of 'includes' and/or
'excludes' the fields will be either included or excluded appropriately.
Calling this multiple times with the same named parameter will override the
previous values with the new ones.
Example::
s = AsyncSearch()
s = s.source(includes=['obj1.*'], excludes=["*.description"])
s = AsyncSearch()
s = s.source(includes=['obj1.*']).source(excludes=["*.description"])
"""
s = self._clone()
if fields and kwargs:
raise ValueError("You cannot specify fields and kwargs at the same time.")
if fields is not None:
s._source = fields
return s
if kwargs and not isinstance(s._source, dict):
s._source = {}
for key, value in kwargs.items():
if value is None:
try:
del s._source[key]
except KeyError:
pass
else:
s._source[key] = value
return s
def sort(self, *keys):
"""
Add sorting information to the search request. If called without
arguments it will remove all sort requirements. Otherwise it will
replace them. Acceptable arguments are::
'some.field'
'-some.other.field'
{'different.field': {'any': 'dict'}}
so for example::
s = AsyncSearch().sort(
'category',
'-title',
{"price" : {"order" : "asc", "mode" : "avg"}}
)
will sort by ``category``, ``title`` (in descending order) and
``price`` in ascending order using the ``avg`` mode.
The API returns a copy of the AsyncSearch object and can thus be chained.
"""
s = self._clone()
s._sort = []
for k in keys:
if isinstance(k, string_types) and k.startswith("-"):
if k[1:] == "_score":
raise IllegalOperation("Sorting by `-_score` is not allowed.")
k = {k[1:]: {"order": "desc"}}
s._sort.append(k)
return s
def highlight_options(self, **kwargs):
"""
Update the global highlighting options used for this request. For
example::
s = AsyncSearch()
s = s.highlight_options(order='score')
"""
s = self._clone()
s._highlight_opts.update(kwargs)
return s
def highlight(self, *fields, **kwargs):
"""
Request highlighting of some fields. All keyword arguments passed in will be
used as parameters for all the fields in the ``fields`` parameter. Example::
AsyncSearch().highlight('title', 'body', fragment_size=50)
will produce the equivalent of::
{
"highlight": {
"fields": {
"body": {"fragment_size": 50},
"title": {"fragment_size": 50}
}
}
}
If you want to have different options for different fields
you can call ``highlight`` twice::
AsyncSearch().highlight('title', fragment_size=50).highlight('body', fragment_size=100)
which will produce::
{
"highlight": {
"fields": {
"body": {"fragment_size": 100},
"title": {"fragment_size": 50}
}
}
}
"""
s = self._clone()
for f in fields:
s._highlight[f] = kwargs
return s
def suggest(self, name, text, **kwargs):
"""
Add a suggestions request to the search.
:arg name: name of the suggestion
:arg text: text to suggest on
All keyword arguments will be added to the suggestions body. For example::
s = AsyncSearch()
s = s.suggest('suggestion-1', 'AsyncOpenSearch', term={'field': 'body'})
"""
s = self._clone()
s._suggest[name] = {"text": text}
s._suggest[name].update(kwargs)
return s
def to_dict(self, count=False, **kwargs):
"""
Serialize the search into the dictionary that will be sent over as the
request's body.
:arg count: a flag to specify if we are interested in a body for count -
no aggregations, no pagination bounds etc.
All additional keyword arguments will be included into the dictionary.
"""
d = {}
if self.query:
d["query"] = self.query.to_dict()
# count request doesn't care for sorting and other things
if not count:
if self.post_filter:
d["post_filter"] = self.post_filter.to_dict()
if self.aggs.aggs:
d.update(self.aggs.to_dict())
if self._sort:
d["sort"] = self._sort
d.update(recursive_to_dict(self._extra))
if self._source not in (None, {}):
d["_source"] = self._source
if self._highlight:
d["highlight"] = {"fields": self._highlight}
d["highlight"].update(self._highlight_opts)
if self._suggest:
d["suggest"] = self._suggest
if self._script_fields:
d["script_fields"] = self._script_fields
d.update(recursive_to_dict(kwargs))
return d
async def count(self):
"""
Return the number of hits matching the query and filters. Note that
only the actual number is returned.
"""
if hasattr(self, "_response") and self._response.hits.total.relation == "eq":
return self._response.hits.total.value
opensearch = await get_connection(self._using)
d = self.to_dict(count=True)
# TODO: failed shards detection
return (await opensearch.count(index=self._index, body=d, **self._params))[
"count"
]
async def execute(self, ignore_cache=False):
"""
Execute the search and return an instance of ``Response`` wrapping all
the data.
:arg ignore_cache: if set to ``True``, consecutive calls will hit
AsyncOpenSearch, while cached result will be ignored. Defaults to `False`
"""
if ignore_cache or not hasattr(self, "_response"):
opensearch = await get_connection(self._using)
self._response = self._response_class(
self,
await opensearch.search(
index=self._index, body=self.to_dict(), **self._params
),
)
return self._response
async def scan(self):
"""
Turn the search into a scan search and return a generator that will
iterate over all the documents matching the query.
Use ``params`` method to specify any additional arguments you with to
pass to the underlying ``async_scan`` helper from ``opensearchpy``
"""
opensearch = await get_connection(self._using)
async for hit in aiter(
async_scan(
opensearch, query=self.to_dict(), index=self._index, **self._params
)
):
yield self._get_result(hit)
async def delete(self):
"""
delete() executes the query by delegating to delete_by_query()
"""
opensearch = await get_connection(self._using)
return AttrDict(
await opensearch.delete_by_query(
index=self._index, body=self.to_dict(), **self._params
)
)
class AsyncMultiSearch(Request):
"""
Combine multiple :class:`~opensearchpy.AsyncSearch` objects into a single
request.
"""
def __init__(self, **kwargs):
super(AsyncMultiSearch, self).__init__(**kwargs)
self._searches = []
def __getitem__(self, key):
return self._searches[key]
def __iter__(self):
return iter(self._searches)
def _clone(self):
ms = super(AsyncMultiSearch, self)._clone()
ms._searches = self._searches[:]
return ms
def add(self, search):
"""
Adds a new :class:`~opensearchpy.AsyncSearch` object to the request::
ms = AsyncMultiSearch(index='my-index')
ms = ms.add(AsyncSearch(doc_type=Category).filter('term', category='python'))
ms = ms.add(AsyncSearch(doc_type=Blog))
"""
ms = self._clone()
ms._searches.append(search)
return ms
def to_dict(self):
out = []
for s in self._searches:
meta = {}
if s._index:
meta["index"] = s._index
meta.update(s._params)
out.append(meta)
out.append(s.to_dict())
return out
async def execute(self, ignore_cache=False, raise_on_error=True):
"""
Execute the multi search request and return a list of search results.
"""
if ignore_cache or not hasattr(self, "_response"):
opensearch = await get_connection(self._using)
responses = await opensearch.msearch(
index=self._index, body=self.to_dict(), **self._params
)
out = []
for s, r in zip(self._searches, responses["responses"]):
if r.get("error", False):
if raise_on_error:
raise TransportError("N/A", r["error"]["type"], r["error"])
r = None
else:
r = Response(s, r)
out.append(r)
self._response = out
return self._response
+13
View File
@@ -0,0 +1,13 @@
# SPDX-License-Identifier: Apache-2.0
#
# The OpenSearch Contributors require contributions made to
# this file be licensed under the Apache-2.0 license or a
# compatible open source license.
#
# Modifications Copyright OpenSearch Contributors. See
# GitHub history for details.
from opensearchpy.helpers.search import Request
class AsyncSearch(Request): ...
class AsyncMultiSearch(Request): ...
+45
View File
@@ -0,0 +1,45 @@
# SPDX-License-Identifier: Apache-2.0
#
# The OpenSearch Contributors require contributions made to
# this file be licensed under the Apache-2.0 license or a
# compatible open source license.
#
# Modifications Copyright OpenSearch Contributors. See
# GitHub history for details.
import os
import time
from unittest import SkipTest
from opensearchpy import AsyncOpenSearch
from opensearchpy.exceptions import ConnectionError
if "OPENSEARCH_URL" in os.environ:
OPENSEARCH_URL = os.environ["OPENSEARCH_URL"]
else:
OPENSEARCH_URL = "https://admin:admin@localhost:9200"
async def get_test_client(nowait=False, **kwargs):
# construct kwargs from the environment
kw = {"timeout": 30}
from opensearchpy import AsyncConnection
async_connection = AsyncConnection()
if hasattr(async_connection, "AIOHttpConnection"):
kw["connection_class"] = getattr(async_connection, "AIOHttpConnection")
kw.update(kwargs)
client = AsyncOpenSearch(OPENSEARCH_URL, **kw)
# wait for yellow status
for _ in range(1 if nowait else 100):
try:
await client.cluster.health(wait_for_status="yellow")
return client
except ConnectionError:
time.sleep(0.1)
else:
# timeout
raise SkipTest("OpenSearch failed to start.")
+19
View File
@@ -0,0 +1,19 @@
# SPDX-License-Identifier: Apache-2.0
#
# The OpenSearch Contributors require contributions made to
# this file be licensed under the Apache-2.0 license or a
# compatible open source license.
#
# Modifications Copyright OpenSearch Contributors. See
# GitHub history for details.
from typing import Any
from _typeshed import Incomplete
from opensearchpy import AsyncOpenSearch as AsyncOpenSearch
from opensearchpy.exceptions import ConnectionError as ConnectionError
OPENSEARCH_URL: Incomplete
async def get_test_client(nowait: bool = ..., **kwargs: Any) -> Any: ...
@@ -0,0 +1,151 @@
# SPDX-License-Identifier: Apache-2.0
#
# The OpenSearch Contributors require contributions made to
# this file be licensed under the Apache-2.0 license or a
# compatible open source license.
#
# Modifications Copyright OpenSearch Contributors. See
# GitHub history for details.
from opensearchpy.connection.async_connections import get_connection
from opensearchpy.helpers.query import Bool, Q
from opensearchpy.helpers.response import UpdateByQueryResponse
from opensearchpy.helpers.search import ProxyDescriptor, QueryProxy, Request
from opensearchpy.helpers.utils import recursive_to_dict
class AsyncUpdateByQuery(Request):
query = ProxyDescriptor("query")
def __init__(self, **kwargs):
"""
Update by query request to opensearch.
:arg using: `AsyncOpenSearch` instance to use
:arg index: limit the search to index
:arg doc_type: only query this type.
All the parameters supplied (or omitted) at creation type can be later
overridden by methods (`using`, `index` and `doc_type` respectively).
"""
super(AsyncUpdateByQuery, self).__init__(**kwargs)
self._response_class = UpdateByQueryResponse
self._script = {}
self._query_proxy = QueryProxy(self, "query")
def filter(self, *args, **kwargs):
return self.query(Bool(filter=[Q(*args, **kwargs)]))
def exclude(self, *args, **kwargs):
return self.query(Bool(filter=[~Q(*args, **kwargs)]))
@classmethod
def from_dict(cls, d):
"""
Construct a new `AsyncUpdateByQuery` instance from a raw dict containing the search
body. Useful when migrating from raw dictionaries.
Example::
ubq = AsyncUpdateByQuery.from_dict({
"query": {
"bool": {
"must": [...]
}
},
"script": {...}
})
ubq = ubq.filter('term', published=True)
"""
u = cls()
u.update_from_dict(d)
return u
def _clone(self):
"""
Return a clone of the current search request. Performs a shallow copy
of all the underlying objects. Used internally by most state modifying
APIs.
"""
ubq = super(AsyncUpdateByQuery, self)._clone()
ubq._response_class = self._response_class
ubq._script = self._script.copy()
ubq.query._proxied = self.query._proxied
return ubq
def response_class(self, cls):
"""
Override the default wrapper used for the response.
"""
ubq = self._clone()
ubq._response_class = cls
return ubq
def update_from_dict(self, d):
"""
Apply options from a serialized body to the current instance. Modifies
the object in-place. Used mostly by ``from_dict``.
"""
d = d.copy()
if "query" in d:
self.query._proxied = Q(d.pop("query"))
if "script" in d:
self._script = d.pop("script")
self._extra.update(d)
return self
def script(self, **kwargs):
"""
Define update action to take:
Note: the API only accepts a single script, so
calling the script multiple times will overwrite.
Example::
ubq = AsyncSearch()
ubq = ubq.script(source="ctx._source.likes++"")
ubq = ubq.script(source="ctx._source.likes += params.f"",
lang="expression",
params={'f': 3})
"""
ubq = self._clone()
if ubq._script:
ubq._script = {}
ubq._script.update(kwargs)
return ubq
def to_dict(self, **kwargs):
"""
Serialize the search into the dictionary that will be sent over as the
request'ubq body.
All additional keyword arguments will be included into the dictionary.
"""
d = {}
if self.query:
d["query"] = self.query.to_dict()
if self._script:
d["script"] = self._script
d.update(recursive_to_dict(self._extra))
d.update(recursive_to_dict(kwargs))
return d
async def execute(self):
"""
Execute the search and return an instance of ``Response`` wrapping all
the data.
"""
opensearch = await get_connection(self._using)
self._response = self._response_class(
self,
await opensearch.update_by_query(
index=self._index, body=self.to_dict(), **self._params
),
)
return self._response
@@ -0,0 +1,12 @@
# SPDX-License-Identifier: Apache-2.0
#
# The OpenSearch Contributors require contributions made to
# this file be licensed under the Apache-2.0 license or a
# compatible open source license.
#
# Modifications Copyright OpenSearch Contributors. See
# GitHub history for details.
from opensearchpy.helpers.search import Request
class AsyncUpdateByQuery(Request): ...
@@ -0,0 +1,113 @@
# SPDX-License-Identifier: Apache-2.0
#
# The OpenSearch Contributors require contributions made to
# this file be licensed under the Apache-2.0 license or a
# compatible open source license.
#
# Modifications Copyright OpenSearch Contributors. See
# GitHub history for details.
from six import string_types
import opensearchpy
from opensearchpy._async.helpers.actions import aiter
from opensearchpy.serializer import serializer
class AsyncConnections(object):
"""
Class responsible for holding connections to different clusters. Used as a
singleton in this module.
"""
def __init__(self):
self._kwargs = {}
self._conns = {}
async def configure(self, **kwargs):
"""
Configure multiple connections at once, useful for passing in config
dictionaries obtained from other sources, like Django's settings or a
configuration management tool.
Example::
async_connections.configure(
default={'hosts': 'localhost'},
dev={'hosts': ['opensearchdev1.example.com:9200'], 'sniff_on_start': True},
)
Connections will only be constructed lazily when requested through
``get_connection``.
"""
async for k in aiter(list(self._conns)):
# try and preserve existing client to keep the persistent connections alive
if k in self._kwargs and kwargs.get(k, None) == self._kwargs[k]:
continue
del self._conns[k]
self._kwargs = kwargs
async def add_connection(self, alias, conn):
"""
Add a connection object, it will be passed through as-is.
"""
self._conns[alias] = conn
async def remove_connection(self, alias):
"""
Remove connection from the registry. Raises ``KeyError`` if connection
wasn't found.
"""
errors = 0
async for d in aiter((self._conns, self._kwargs)):
try:
del d[alias]
except KeyError:
errors += 1
if errors == 2:
raise KeyError("There is no connection with alias %r." % alias)
async def create_connection(self, alias="default", **kwargs):
"""
Construct an instance of ``opensearchpy.AsyncOpenSearch`` and register
it under given alias.
"""
kwargs.setdefault("serializer", serializer)
conn = self._conns[alias] = opensearchpy.AsyncOpenSearch(**kwargs)
return conn
async def get_connection(self, alias="default"):
"""
Retrieve a connection, construct it if necessary (only configuration
was passed to us). If a non-string alias has been passed through we
assume it's already a client instance and will just return it as-is.
Raises ``KeyError`` if no client (or its definition) is registered
under the alias.
"""
# do not check isinstance(AsyncOpenSearch) so that people can wrap their
# clients
if not isinstance(alias, string_types):
return alias
# connection already established
try:
return self._conns[alias]
except KeyError:
pass
# if not, try to create it
try:
return await self.create_connection(alias, **self._kwargs[alias])
except KeyError:
# no connection and no kwargs to set one up
raise KeyError("There is no connection with alias %r." % alias)
async_connections = AsyncConnections()
configure = async_connections.configure
add_connection = async_connections.add_connection
remove_connection = async_connections.remove_connection
create_connection = async_connections.create_connection
get_connection = async_connections.get_connection
@@ -0,0 +1,10 @@
# SPDX-License-Identifier: Apache-2.0
#
# The OpenSearch Contributors require contributions made to
# this file be licensed under the Apache-2.0 license or a
# compatible open source license.
#
# Modifications Copyright OpenSearch Contributors. See
# GitHub history for details.
class AsyncConnections: ...
+1 -1
View File
@@ -59,7 +59,7 @@ __all__ = [
# Asyncio only supported on Python 3.6+
if sys.version_info >= (3, 6):
from .._async.helpers import (
from .._async.helpers.actions import (
async_bulk,
async_reindex,
async_scan,
+4 -4
View File
@@ -42,10 +42,10 @@ try:
if sys.version_info < (3, 6):
raise ImportError
from .._async.helpers import async_bulk as async_bulk
from .._async.helpers import async_reindex as async_reindex
from .._async.helpers import async_scan as async_scan
from .._async.helpers import async_streaming_bulk as async_streaming_bulk
from .._async.helpers.actions import async_bulk as async_bulk
from .._async.helpers.actions import async_reindex as async_reindex
from .._async.helpers.actions import async_scan as async_scan
from .._async.helpers.actions import async_streaming_bulk as async_streaming_bulk
from .asyncsigner import AWSV4SignerAsyncAuth as AWSV4SignerAsyncAuth
from .signer import AWSV4SignerAuth as AWSV4SignerAuth
except (ImportError, SyntaxError):
+1 -2
View File
@@ -27,8 +27,7 @@
import six
from opensearchpy.connection.connections import get_connection
from .utils import AttrDict, DslBase, merge
from opensearchpy.helpers.utils import AttrDict, DslBase, merge
__all__ = ["tokenizer", "analyzer", "char_filter", "token_filter", "normalizer"]
@@ -37,10 +37,11 @@ import aiohttp
import pytest
from mock import patch
from multidict import CIMultiDict
from pytest import raises
from opensearchpy import AIOHttpConnection, __versionstr__
from opensearchpy import AIOHttpConnection, AsyncOpenSearch, __versionstr__, serializer
from opensearchpy.compat import reraise_exceptions
from opensearchpy.connection import Connection
from opensearchpy.connection import Connection, async_connections
from opensearchpy.exceptions import ConnectionError
pytestmark = pytest.mark.asyncio
@@ -373,3 +374,78 @@ class TestConnectionHttpbin:
conn = AIOHttpConnection("not.a.host.name")
with pytest.raises(ConnectionError):
await conn.perform_request("GET", "/")
async def test_default_connection_is_returned_by_default():
c = async_connections.AsyncConnections()
con, con2 = object(), object()
await c.add_connection("default", con)
await c.add_connection("not-default", con2)
assert await c.get_connection() is con
async def test_get_connection_created_connection_if_needed():
c = async_connections.AsyncConnections()
await c.configure(
default={"hosts": ["opensearch.com"]}, local={"hosts": ["localhost"]}
)
default = await c.get_connection()
local = await c.get_connection("local")
assert isinstance(default, AsyncOpenSearch)
assert isinstance(local, AsyncOpenSearch)
assert [{"host": "opensearch.com"}] == default.transport.hosts
assert [{"host": "localhost"}] == local.transport.hosts
async def test_configure_preserves_unchanged_connections():
c = async_connections.AsyncConnections()
await c.configure(
default={"hosts": ["opensearch.com"]}, local={"hosts": ["localhost"]}
)
default = await c.get_connection()
local = await c.get_connection("local")
await c.configure(
default={"hosts": ["not-opensearch.com"]}, local={"hosts": ["localhost"]}
)
new_default = await c.get_connection()
new_local = await c.get_connection("local")
assert new_local is local
assert new_default is not default
async def test_remove_connection_removes_both_conn_and_conf():
c = async_connections.AsyncConnections()
await c.configure(
default={"hosts": ["opensearch.com"]}, local={"hosts": ["localhost"]}
)
await c.add_connection("local2", object())
await c.remove_connection("default")
await c.get_connection("local2")
await c.remove_connection("local2")
with raises(Exception):
await c.get_connection("local2")
await c.get_connection("default")
async def test_create_connection_constructs_client():
c = async_connections.AsyncConnections()
await c.create_connection("testing", hosts=["opensearch.com"])
con = await c.get_connection("testing")
assert [{"host": "opensearch.com"}] == con.transport.hosts
async def test_create_connection_adds_our_serializer():
c = async_connections.AsyncConnections()
await c.create_connection("testing", hosts=["opensearch.com"])
result = await c.get_connection("testing")
assert result.transport.serializer is serializer.serializer
@@ -0,0 +1,227 @@
# SPDX-License-Identifier: Apache-2.0
#
# The OpenSearch Contributors require contributions made to
# this file be licensed under the Apache-2.0 license or a
# compatible open source license.
#
# Modifications Copyright OpenSearch Contributors. See
# GitHub history for details.
import pytest
from mock import Mock
from pytest import fixture
from opensearchpy.connection.async_connections import add_connection, async_connections
pytestmark = pytest.mark.asyncio
@fixture
async def mock_client(dummy_response):
client = Mock()
client.search.return_value = dummy_response
await add_connection("mock", client)
yield client
async_connections._conn = {}
async_connections._kwargs = {}
@fixture
def dummy_response():
return {
"_shards": {"failed": 0, "successful": 10, "total": 10},
"hits": {
"hits": [
{
"_index": "test-index",
"_id": "opensearch",
"_score": 12.0,
"_source": {"city": "Amsterdam", "name": "OpenSearch"},
},
{
"_index": "test-index",
"_id": "42",
"_score": 11.123,
"_routing": "opensearch",
"_source": {
"name": {"first": "Shay", "last": "Bannon"},
"lang": "java",
"twitter": "kimchy",
},
},
{
"_index": "test-index",
"_id": "47",
"_score": 1,
"_routing": "opensearch",
"_source": {
"name": {"first": "Honza", "last": "Král"},
"lang": "python",
"twitter": "honzakral",
},
},
{
"_index": "test-index",
"_id": "53",
"_score": 16.0,
"_routing": "opensearch",
},
],
"max_score": 12.0,
"total": 123,
},
"timed_out": False,
"took": 123,
}
@fixture
def aggs_search():
from opensearchpy._async.helpers.search import AsyncSearch
s = AsyncSearch(index="flat-git")
s.aggs.bucket("popular_files", "terms", field="files", size=2).metric(
"line_stats", "stats", field="stats.lines"
).metric("top_commits", "top_hits", size=2, _source=["stats.*", "committed_date"])
s.aggs.bucket(
"per_month", "date_histogram", interval="month", field="info.committed_date"
)
s.aggs.metric("sum_lines", "sum", field="stats.lines")
return s
@fixture
def aggs_data():
return {
"took": 4,
"timed_out": False,
"_shards": {"total": 1, "successful": 1, "failed": 0},
"hits": {"total": 52, "hits": [], "max_score": 0.0},
"aggregations": {
"sum_lines": {"value": 25052.0},
"per_month": {
"buckets": [
{
"doc_count": 38,
"key": 1393632000000,
"key_as_string": "2014-03-01T00:00:00.000Z",
},
{
"doc_count": 11,
"key": 1396310400000,
"key_as_string": "2014-04-01T00:00:00.000Z",
},
{
"doc_count": 3,
"key": 1398902400000,
"key_as_string": "2014-05-01T00:00:00.000Z",
},
]
},
"popular_files": {
"buckets": [
{
"key": "opensearchpy",
"line_stats": {
"count": 40,
"max": 228.0,
"min": 2.0,
"sum": 2151.0,
"avg": 53.775,
},
"doc_count": 40,
"top_commits": {
"hits": {
"total": 40,
"hits": [
{
"_id": "3ca6e1e73a071a705b4babd2f581c91a2a3e5037",
"_type": "doc",
"_source": {
"stats": {
"files": 4,
"deletions": 7,
"lines": 30,
"insertions": 23,
},
"committed_date": "2014-05-02T13:47:19",
},
"_score": 1.0,
"_index": "flat-git",
},
{
"_id": "eb3e543323f189fd7b698e66295427204fff5755",
"_type": "doc",
"_source": {
"stats": {
"files": 1,
"deletions": 0,
"lines": 18,
"insertions": 18,
},
"committed_date": "2014-05-01T13:32:14",
},
"_score": 1.0,
"_index": "flat-git",
},
],
"max_score": 1.0,
}
},
},
{
"key": "test_opensearchpy/test_dsl",
"line_stats": {
"count": 35,
"max": 228.0,
"min": 2.0,
"sum": 1939.0,
"avg": 55.4,
},
"doc_count": 35,
"top_commits": {
"hits": {
"total": 35,
"hits": [
{
"_id": "3ca6e1e73a071a705b4babd2f581c91a2a3e5037",
"_type": "doc",
"_source": {
"stats": {
"files": 4,
"deletions": 7,
"lines": 30,
"insertions": 23,
},
"committed_date": "2014-05-02T13:47:19",
},
"_score": 1.0,
"_index": "flat-git",
},
{
"_id": "dd15b6ba17dd9ba16363a51f85b31f66f1fb1157",
"_type": "doc",
"_source": {
"stats": {
"files": 3,
"deletions": 18,
"lines": 62,
"insertions": 44,
},
"committed_date": "2014-05-01T13:30:44",
},
"_score": 1.0,
"_index": "flat-git",
},
],
"max_score": 1.0,
}
},
},
],
"doc_count_error_upper_bound": 0,
"sum_other_doc_count": 120,
},
},
}
@@ -0,0 +1,629 @@
# SPDX-License-Identifier: Apache-2.0
#
# The OpenSearch Contributors require contributions made to
# this file be licensed under the Apache-2.0 license or a
# compatible open source license.
#
# Modifications Copyright OpenSearch Contributors. See
# GitHub history for details.
from __future__ import unicode_literals
import codecs
import ipaddress
import pickle
from datetime import datetime
from hashlib import sha256
import pytest
from pytest import raises
from opensearchpy import InnerDoc, MetaField, Range, analyzer
from opensearchpy._async.helpers import document
from opensearchpy._async.helpers.index import AsyncIndex
from opensearchpy._async.helpers.mapping import AsyncMapping
from opensearchpy.exceptions import IllegalOperation, ValidationException
from opensearchpy.helpers import field, utils
pytestmark = pytest.mark.asyncio
class MyInner(InnerDoc):
old_field = field.Text()
class MyDoc(document.AsyncDocument):
title = field.Keyword()
name = field.Text()
created_at = field.Date()
inner = field.Object(MyInner)
class MySubDoc(MyDoc):
name = field.Keyword()
class Index:
name = "default-index"
class MyDoc2(document.AsyncDocument):
extra = field.Long()
class MyMultiSubDoc(MyDoc2, MySubDoc):
pass
class Comment(InnerDoc):
title = field.Text()
tags = field.Keyword(multi=True)
class DocWithNested(document.AsyncDocument):
comments = field.Nested(Comment)
class Index:
name = "test-doc-with-nested"
class SimpleCommit(document.AsyncDocument):
files = field.Text(multi=True)
class Index:
name = "test-git"
class Secret(str):
pass
class SecretField(field.CustomField):
builtin_type = "text"
def _serialize(self, data):
return codecs.encode(data, "rot_13")
def _deserialize(self, data):
if isinstance(data, Secret):
return data
return Secret(codecs.decode(data, "rot_13"))
class SecretDoc(document.AsyncDocument):
title = SecretField(index="no")
class Index:
name = "test-secret-doc"
class NestedSecret(document.AsyncDocument):
secrets = field.Nested(SecretDoc)
class Index:
name = "test-nested-secret"
class OptionalObjectWithRequiredField(document.AsyncDocument):
comments = field.Nested(properties={"title": field.Keyword(required=True)})
class Index:
name = "test-required"
class Host(document.AsyncDocument):
ip = field.Ip()
class Index:
name = "test-host"
async def test_range_serializes_properly():
class D(document.AsyncDocument):
lr = field.LongRange()
d = D(lr=Range(lt=42))
assert 40 in d.lr
assert 47 not in d.lr
assert {"lr": {"lt": 42}} == d.to_dict()
d = D(lr={"lt": 42})
assert {"lr": {"lt": 42}} == d.to_dict()
async def test_range_deserializes_properly():
class D(InnerDoc):
lr = field.LongRange()
d = D.from_opensearch({"lr": {"lt": 42}}, True)
assert isinstance(d.lr, Range)
assert 40 in d.lr
assert 47 not in d.lr
async def test_resolve_nested():
nested, field = NestedSecret._index.resolve_nested("secrets.title")
assert nested == ["secrets"]
assert field is NestedSecret._doc_type.mapping["secrets"]["title"]
async def test_conflicting_mapping_raises_error_in_index_to_dict():
class A(document.AsyncDocument):
name = field.Text()
class B(document.AsyncDocument):
name = field.Keyword()
i = AsyncIndex("i")
i.document(A)
i.document(B)
with raises(ValueError):
i.to_dict()
async def test_ip_address_serializes_properly():
host = Host(ip=ipaddress.IPv4Address("10.0.0.1"))
assert {"ip": "10.0.0.1"} == host.to_dict()
async def test_matches_uses_index():
assert SimpleCommit._matches({"_index": "test-git"})
assert not SimpleCommit._matches({"_index": "not-test-git"})
async def test_matches_with_no_name_always_matches():
class D(document.AsyncDocument):
pass
assert D._matches({})
assert D._matches({"_index": "whatever"})
async def test_matches_accepts_wildcards():
class MyDoc(document.AsyncDocument):
class Index:
name = "my-*"
assert MyDoc._matches({"_index": "my-index"})
assert not MyDoc._matches({"_index": "not-my-index"})
async def test_assigning_attrlist_to_field():
sc = SimpleCommit()
ls = ["README", "README.rst"]
sc.files = utils.AttrList(ls)
assert sc.to_dict()["files"] is ls
async def test_optional_inner_objects_are_not_validated_if_missing():
d = OptionalObjectWithRequiredField()
assert d.full_clean() is None
async def test_custom_field():
s = SecretDoc(title=Secret("Hello"))
assert {"title": "Uryyb"} == s.to_dict()
assert s.title == "Hello"
s = SecretDoc.from_opensearch({"_source": {"title": "Uryyb"}})
assert s.title == "Hello"
assert isinstance(s.title, Secret)
async def test_custom_field_mapping():
assert {
"properties": {"title": {"index": "no", "type": "text"}}
} == SecretDoc._doc_type.mapping.to_dict()
async def test_custom_field_in_nested():
s = NestedSecret()
s.secrets.append(SecretDoc(title=Secret("Hello")))
assert {"secrets": [{"title": "Uryyb"}]} == s.to_dict()
assert s.secrets[0].title == "Hello"
async def test_multi_works_after_doc_has_been_saved():
c = SimpleCommit()
c.full_clean()
c.files.append("setup.py")
assert c.to_dict() == {"files": ["setup.py"]}
async def test_multi_works_in_nested_after_doc_has_been_serialized():
# Issue #359
c = DocWithNested(comments=[Comment(title="First!")])
assert [] == c.comments[0].tags
assert {"comments": [{"title": "First!"}]} == c.to_dict()
assert [] == c.comments[0].tags
async def test_null_value_for_object():
d = MyDoc(inner=None)
assert d.inner is None
async def test_inherited_doc_types_can_override_index():
class MyDocDifferentIndex(MySubDoc):
class Index:
name = "not-default-index"
settings = {"number_of_replicas": 0}
aliases = {"a": {}}
analyzers = [analyzer("my_analizer", tokenizer="keyword")]
assert MyDocDifferentIndex._index._name == "not-default-index"
assert MyDocDifferentIndex()._get_index() == "not-default-index"
assert MyDocDifferentIndex._index.to_dict() == {
"aliases": {"a": {}},
"mappings": {
"properties": {
"created_at": {"type": "date"},
"inner": {
"type": "object",
"properties": {"old_field": {"type": "text"}},
},
"name": {"type": "keyword"},
"title": {"type": "keyword"},
}
},
"settings": {
"analysis": {
"analyzer": {"my_analizer": {"tokenizer": "keyword", "type": "custom"}}
},
"number_of_replicas": 0,
},
}
async def test_to_dict_with_meta():
d = MySubDoc(title="hello")
d.meta.routing = "some-parent"
assert {
"_index": "default-index",
"_routing": "some-parent",
"_source": {"title": "hello"},
} == d.to_dict(True)
async def test_to_dict_with_meta_includes_custom_index():
d = MySubDoc(title="hello")
d.meta.index = "other-index"
assert {"_index": "other-index", "_source": {"title": "hello"}} == d.to_dict(True)
async def test_to_dict_without_skip_empty_will_include_empty_fields():
d = MySubDoc(tags=[], title=None, inner={})
assert {} == d.to_dict()
assert {"tags": [], "title": None, "inner": {}} == d.to_dict(skip_empty=False)
async def test_attribute_can_be_removed():
d = MyDoc(title="hello")
del d.title
assert "title" not in d._d_
async def test_doc_type_can_be_correctly_pickled():
d = DocWithNested(
title="Hello World!", comments=[Comment(title="hellp")], meta={"id": 42}
)
s = pickle.dumps(d)
d2 = pickle.loads(s)
assert d2 == d
assert 42 == d2.meta.id
assert "Hello World!" == d2.title
assert [{"title": "hellp"}] == d2.comments
assert isinstance(d2.comments[0], Comment)
async def test_meta_is_accessible_even_on_empty_doc():
d = MyDoc()
d.meta
d = MyDoc(title="aaa")
d.meta
async def test_meta_field_mapping():
class User(document.AsyncDocument):
username = field.Text()
class Meta:
all = MetaField(enabled=False)
_index = MetaField(enabled=True)
dynamic = MetaField("strict")
dynamic_templates = MetaField([42])
assert {
"properties": {"username": {"type": "text"}},
"_all": {"enabled": False},
"_index": {"enabled": True},
"dynamic": "strict",
"dynamic_templates": [42],
} == User._doc_type.mapping.to_dict()
async def test_multi_value_fields():
class Blog(document.AsyncDocument):
tags = field.Keyword(multi=True)
b = Blog()
assert [] == b.tags
b.tags.append("search")
b.tags.append("python")
assert ["search", "python"] == b.tags
async def test_docs_with_properties():
class User(document.AsyncDocument):
pwd_hash = field.Text()
def check_password(self, pwd):
return sha256(pwd).hexdigest() == self.pwd_hash
@property
def password(self):
raise AttributeError("readonly")
@password.setter
def password(self, pwd):
self.pwd_hash = sha256(pwd).hexdigest()
u = User(pwd_hash=sha256(b"secret").hexdigest())
assert u.check_password(b"secret")
assert not u.check_password(b"not-secret")
u.password = b"not-secret"
assert "password" not in u._d_
assert not u.check_password(b"secret")
assert u.check_password(b"not-secret")
with raises(AttributeError):
u.password
async def test_nested_can_be_assigned_to():
d1 = DocWithNested(comments=[Comment(title="First!")])
d2 = DocWithNested()
d2.comments = d1.comments
assert isinstance(d1.comments[0], Comment)
assert d2.comments == [{"title": "First!"}]
assert {"comments": [{"title": "First!"}]} == d2.to_dict()
assert isinstance(d2.comments[0], Comment)
async def test_nested_can_be_none():
d = DocWithNested(comments=None, title="Hello World!")
assert {"title": "Hello World!"} == d.to_dict()
async def test_nested_defaults_to_list_and_can_be_updated():
md = DocWithNested()
assert [] == md.comments
md.comments.append({"title": "hello World!"})
assert {"comments": [{"title": "hello World!"}]} == md.to_dict()
async def test_to_dict_is_recursive_and_can_cope_with_multi_values():
md = MyDoc(name=["a", "b", "c"])
md.inner = [MyInner(old_field="of1"), MyInner(old_field="of2")]
assert isinstance(md.inner[0], MyInner)
assert {
"name": ["a", "b", "c"],
"inner": [{"old_field": "of1"}, {"old_field": "of2"}],
} == md.to_dict()
async def test_to_dict_ignores_empty_collections():
md = MySubDoc(name="", address={}, count=0, valid=False, tags=[])
assert {"name": "", "count": 0, "valid": False} == md.to_dict()
async def test_declarative_mapping_definition():
assert issubclass(MyDoc, document.AsyncDocument)
assert hasattr(MyDoc, "_doc_type")
assert {
"properties": {
"created_at": {"type": "date"},
"name": {"type": "text"},
"title": {"type": "keyword"},
"inner": {"type": "object", "properties": {"old_field": {"type": "text"}}},
}
} == MyDoc._doc_type.mapping.to_dict()
async def test_you_can_supply_own_mapping_instance():
class MyD(document.AsyncDocument):
title = field.Text()
class Meta:
mapping = AsyncMapping()
mapping.meta("_all", enabled=False)
assert {
"_all": {"enabled": False},
"properties": {"title": {"type": "text"}},
} == MyD._doc_type.mapping.to_dict()
async def test_document_can_be_created_dynamically():
n = datetime.now()
md = MyDoc(title="hello")
md.name = "My Fancy Document!"
md.created_at = n
inner = md.inner
# consistent returns
assert inner is md.inner
inner.old_field = "Already defined."
md.inner.new_field = ["undefined", "field"]
assert {
"title": "hello",
"name": "My Fancy Document!",
"created_at": n,
"inner": {"old_field": "Already defined.", "new_field": ["undefined", "field"]},
} == md.to_dict()
async def test_invalid_date_will_raise_exception():
md = MyDoc()
md.created_at = "not-a-date"
with raises(ValidationException):
md.full_clean()
async def test_document_inheritance():
assert issubclass(MySubDoc, MyDoc)
assert issubclass(MySubDoc, document.AsyncDocument)
assert hasattr(MySubDoc, "_doc_type")
assert {
"properties": {
"created_at": {"type": "date"},
"name": {"type": "keyword"},
"title": {"type": "keyword"},
"inner": {"type": "object", "properties": {"old_field": {"type": "text"}}},
}
} == MySubDoc._doc_type.mapping.to_dict()
async def test_child_class_can_override_parent():
class A(document.AsyncDocument):
o = field.Object(dynamic=False, properties={"a": field.Text()})
class B(A):
o = field.Object(dynamic="strict", properties={"b": field.Text()})
assert {
"properties": {
"o": {
"dynamic": "strict",
"properties": {"a": {"type": "text"}, "b": {"type": "text"}},
"type": "object",
}
}
} == B._doc_type.mapping.to_dict()
async def test_meta_fields_are_stored_in_meta_and_ignored_by_to_dict():
md = MySubDoc(meta={"id": 42}, name="My First doc!")
md.meta.index = "my-index"
assert md.meta.index == "my-index"
assert md.meta.id == 42
assert {"name": "My First doc!"} == md.to_dict()
assert {"id": 42, "index": "my-index"} == md.meta.to_dict()
async def test_index_inheritance():
assert issubclass(MyMultiSubDoc, MySubDoc)
assert issubclass(MyMultiSubDoc, MyDoc2)
assert issubclass(MyMultiSubDoc, document.AsyncDocument)
assert hasattr(MyMultiSubDoc, "_doc_type")
assert hasattr(MyMultiSubDoc, "_index")
assert {
"properties": {
"created_at": {"type": "date"},
"name": {"type": "keyword"},
"title": {"type": "keyword"},
"inner": {"type": "object", "properties": {"old_field": {"type": "text"}}},
"extra": {"type": "long"},
}
} == MyMultiSubDoc._doc_type.mapping.to_dict()
async def test_meta_fields_can_be_set_directly_in_init():
p = object()
md = MyDoc(_id=p, title="Hello World!")
assert md.meta.id is p
async def test_save_no_index(mock_client):
md = MyDoc()
with raises(ValidationException):
await md.save(using="mock")
async def test_delete_no_index(mock_client):
md = MyDoc()
with raises(ValidationException):
await md.delete(using="mock")
async def test_update_no_fields():
md = MyDoc()
with raises(IllegalOperation):
await md.update()
async def test_search_with_custom_alias_and_index(mock_client):
search_object = MyDoc.search(
using="staging", index=["custom_index1", "custom_index2"]
)
assert search_object._using == "staging"
assert search_object._index == ["custom_index1", "custom_index2"]
async def test_from_opensearch_respects_underscored_non_meta_fields():
doc = {
"_index": "test-index",
"_id": "opensearch",
"_score": 12.0,
"fields": {"hello": "world", "_routing": "opensearch", "_tags": ["search"]},
"_source": {
"city": "Amsterdam",
"name": "OpenSearch",
"_tagline": "You know, for search",
},
}
class Company(document.AsyncDocument):
class Index:
name = "test-company"
c = Company.from_opensearch(doc)
assert c.meta.fields._tags == ["search"]
assert c.meta.fields._routing == "opensearch"
assert c._tagline == "You know, for search"
async def test_nested_and_object_inner_doc():
class MySubDocWithNested(MyDoc):
nested_inner = field.Nested(MyInner)
props = MySubDocWithNested._doc_type.mapping.to_dict()["properties"]
assert props == {
"created_at": {"type": "date"},
"inner": {"properties": {"old_field": {"type": "text"}}, "type": "object"},
"name": {"type": "text"},
"nested_inner": {
"properties": {"old_field": {"type": "text"}},
"type": "nested",
},
"title": {"type": "keyword"},
}
@@ -0,0 +1,186 @@
# SPDX-License-Identifier: Apache-2.0
#
# The OpenSearch Contributors require contributions made to
# this file be licensed under the Apache-2.0 license or a
# compatible open source license.
#
# Modifications Copyright OpenSearch Contributors. See
# GitHub history for details.
from datetime import datetime
import pytest
from opensearchpy._async.helpers.faceted_search import AsyncFacetedSearch
from opensearchpy.helpers.faceted_search import DateHistogramFacet, TermsFacet
pytestmark = pytest.mark.asyncio
class BlogSearch(AsyncFacetedSearch):
doc_types = ["user", "post"]
fields = (
"title^5",
"body",
)
facets = {
"category": TermsFacet(field="category.raw"),
"tags": TermsFacet(field="tags"),
}
async def test_query_is_created_properly():
bs = BlogSearch("python search")
s = bs.build_search()
assert s._doc_type == ["user", "post"]
assert {
"aggs": {
"_filter_tags": {
"filter": {"match_all": {}},
"aggs": {"tags": {"terms": {"field": "tags"}}},
},
"_filter_category": {
"filter": {"match_all": {}},
"aggs": {"category": {"terms": {"field": "category.raw"}}},
},
},
"query": {
"multi_match": {"fields": ("title^5", "body"), "query": "python search"}
},
"highlight": {"fields": {"body": {}, "title": {}}},
} == s.to_dict()
async def test_query_is_created_properly_with_sort_tuple():
bs = BlogSearch("python search", sort=("category", "-title"))
s = bs.build_search()
assert s._doc_type == ["user", "post"]
assert {
"aggs": {
"_filter_tags": {
"filter": {"match_all": {}},
"aggs": {"tags": {"terms": {"field": "tags"}}},
},
"_filter_category": {
"filter": {"match_all": {}},
"aggs": {"category": {"terms": {"field": "category.raw"}}},
},
},
"query": {
"multi_match": {"fields": ("title^5", "body"), "query": "python search"}
},
"highlight": {"fields": {"body": {}, "title": {}}},
"sort": ["category", {"title": {"order": "desc"}}],
} == s.to_dict()
async def test_filter_is_applied_to_search_but_not_relevant_facet():
bs = BlogSearch("python search", filters={"category": "opensearch"})
s = bs.build_search()
assert {
"aggs": {
"_filter_tags": {
"filter": {"terms": {"category.raw": ["opensearch"]}},
"aggs": {"tags": {"terms": {"field": "tags"}}},
},
"_filter_category": {
"filter": {"match_all": {}},
"aggs": {"category": {"terms": {"field": "category.raw"}}},
},
},
"post_filter": {"terms": {"category.raw": ["opensearch"]}},
"query": {
"multi_match": {"fields": ("title^5", "body"), "query": "python search"}
},
"highlight": {"fields": {"body": {}, "title": {}}},
} == s.to_dict()
async def test_filters_are_applied_to_search_ant_relevant_facets():
bs = BlogSearch(
"python search",
filters={"category": "opensearch", "tags": ["python", "django"]},
)
s = bs.build_search()
d = s.to_dict()
# we need to test post_filter without relying on order
f = d["post_filter"]["bool"].pop("must")
assert len(f) == 2
assert {"terms": {"category.raw": ["opensearch"]}} in f
assert {"terms": {"tags": ["python", "django"]}} in f
assert {
"aggs": {
"_filter_tags": {
"filter": {"terms": {"category.raw": ["opensearch"]}},
"aggs": {"tags": {"terms": {"field": "tags"}}},
},
"_filter_category": {
"filter": {"terms": {"tags": ["python", "django"]}},
"aggs": {"category": {"terms": {"field": "category.raw"}}},
},
},
"query": {
"multi_match": {"fields": ("title^5", "body"), "query": "python search"}
},
"post_filter": {"bool": {}},
"highlight": {"fields": {"body": {}, "title": {}}},
} == d
async def test_date_histogram_facet_with_1970_01_01_date():
dhf = DateHistogramFacet()
assert dhf.get_value({"key": None}) == datetime(1970, 1, 1, 0, 0)
assert dhf.get_value({"key": 0}) == datetime(1970, 1, 1, 0, 0)
@pytest.mark.parametrize(
["interval_type", "interval"],
[
("interval", "year"),
("calendar_interval", "year"),
("interval", "month"),
("calendar_interval", "month"),
("interval", "week"),
("calendar_interval", "week"),
("interval", "day"),
("calendar_interval", "day"),
("fixed_interval", "day"),
("interval", "hour"),
("fixed_interval", "hour"),
("interval", "1Y"),
("calendar_interval", "1Y"),
("interval", "1M"),
("calendar_interval", "1M"),
("interval", "1w"),
("calendar_interval", "1w"),
("interval", "1d"),
("calendar_interval", "1d"),
("fixed_interval", "1d"),
("interval", "1h"),
("fixed_interval", "1h"),
],
)
async def test_date_histogram_interval_types(interval_type, interval):
dhf = DateHistogramFacet(field="@timestamp", **{interval_type: interval})
assert dhf.get_aggregation().to_dict() == {
"date_histogram": {
"field": "@timestamp",
interval_type: interval,
"min_doc_count": 0,
}
}
dhf.get_value_filter(datetime.now())
async def test_date_histogram_no_interval_keyerror():
dhf = DateHistogramFacet(field="@timestamp")
with pytest.raises(KeyError) as e:
dhf.get_value_filter(datetime.now())
assert str(e.value) == "'interval'"
@@ -0,0 +1,178 @@
# SPDX-License-Identifier: Apache-2.0
#
# The OpenSearch Contributors require contributions made to
# this file be licensed under the Apache-2.0 license or a
# compatible open source license.
#
# Modifications Copyright OpenSearch Contributors. See
# GitHub history for details.
import string
from random import choice
import pytest
from pytest import raises
from opensearchpy import Date, Text, analyzer
from opensearchpy._async.helpers.document import AsyncDocument
from opensearchpy._async.helpers.index import AsyncIndex
pytestmark = pytest.mark.asyncio
class Post(AsyncDocument):
title = Text()
published_from = Date()
async def test_multiple_doc_types_will_combine_mappings():
class User(AsyncDocument):
username = Text()
i = AsyncIndex("i")
i.document(Post)
i.document(User)
assert {
"mappings": {
"properties": {
"title": {"type": "text"},
"username": {"type": "text"},
"published_from": {"type": "date"},
}
}
} == i.to_dict()
async def test_search_is_limited_to_index_name():
i = AsyncIndex("my-index")
s = i.search()
assert s._index == ["my-index"]
async def test_cloned_index_has_copied_settings_and_using():
client = object()
i = AsyncIndex("my-index", using=client)
i.settings(number_of_shards=1)
i2 = i.clone("my-other-index")
assert "my-other-index" == i2._name
assert client is i2._using
assert i._settings == i2._settings
assert i._settings is not i2._settings
async def test_cloned_index_has_analysis_attribute():
"""
Regression test for Issue #582 in which `Index.clone()` was not copying
over the `_analysis` attribute.
"""
client = object()
i = AsyncIndex("my-index", using=client)
random_analyzer_name = "".join((choice(string.ascii_letters) for _ in range(100)))
random_analyzer = analyzer(
random_analyzer_name, tokenizer="standard", filter="standard"
)
i.analyzer(random_analyzer)
i2 = i.clone("my-clone-index")
assert i.to_dict()["settings"]["analysis"] == i2.to_dict()["settings"]["analysis"]
def test_settings_are_saved():
i = AsyncIndex("i")
i.settings(number_of_replicas=0)
i.settings(number_of_shards=1)
assert {"settings": {"number_of_shards": 1, "number_of_replicas": 0}} == i.to_dict()
async def test_registered_doc_type_included_in_to_dict():
i = AsyncIndex("i", using="alias")
i.document(Post)
assert {
"mappings": {
"properties": {
"title": {"type": "text"},
"published_from": {"type": "date"},
}
}
} == i.to_dict()
async def test_registered_doc_type_included_in_search():
i = AsyncIndex("i", using="alias")
i.document(Post)
s = i.search()
assert s._doc_type == [Post]
async def test_aliases_add_to_object():
random_alias = "".join((choice(string.ascii_letters) for _ in range(100)))
alias_dict = {random_alias: {}}
index = AsyncIndex("i", using="alias")
index.aliases(**alias_dict)
assert index._aliases == alias_dict
async def test_aliases_returned_from_to_dict():
random_alias = "".join((choice(string.ascii_letters) for _ in range(100)))
alias_dict = {random_alias: {}}
index = AsyncIndex("i", using="alias")
index.aliases(**alias_dict)
assert index._aliases == index.to_dict()["aliases"] == alias_dict
async def test_analyzers_added_to_object():
random_analyzer_name = "".join((choice(string.ascii_letters) for _ in range(100)))
random_analyzer = analyzer(
random_analyzer_name, tokenizer="standard", filter="standard"
)
index = AsyncIndex("i", using="alias")
index.analyzer(random_analyzer)
assert index._analysis["analyzer"][random_analyzer_name] == {
"filter": ["standard"],
"type": "custom",
"tokenizer": "standard",
}
async def test_analyzers_returned_from_to_dict():
random_analyzer_name = "".join((choice(string.ascii_letters) for _ in range(100)))
random_analyzer = analyzer(
random_analyzer_name, tokenizer="standard", filter="standard"
)
index = AsyncIndex("i", using="alias")
index.analyzer(random_analyzer)
assert index.to_dict()["settings"]["analysis"]["analyzer"][
random_analyzer_name
] == {"filter": ["standard"], "type": "custom", "tokenizer": "standard"}
async def test_conflicting_analyzer_raises_error():
i = AsyncIndex("i")
i.analyzer("my_analyzer", tokenizer="whitespace", filter=["lowercase", "stop"])
with raises(ValueError):
i.analyzer("my_analyzer", tokenizer="keyword", filter=["lowercase", "stop"])
async def test_index_template_can_have_order():
i = AsyncIndex("i-*")
it = i.as_template("i", order=2)
assert {"index_patterns": ["i-*"], "order": 2} == it.to_dict()
@@ -0,0 +1,216 @@
# SPDX-License-Identifier: Apache-2.0
#
# The OpenSearch Contributors require contributions made to
# this file be licensed under the Apache-2.0 license or a
# compatible open source license.
#
# Modifications Copyright OpenSearch Contributors. See
# GitHub history for details.
import json
from opensearchpy import Keyword, Nested, Text
from opensearchpy._async.helpers import mapping
from opensearchpy.helpers import analysis
async def test_mapping_can_has_fields():
m = mapping.AsyncMapping()
m.field("name", "text").field("tags", "keyword")
assert {
"properties": {"name": {"type": "text"}, "tags": {"type": "keyword"}}
} == m.to_dict()
async def test_mapping_update_is_recursive():
m1 = mapping.AsyncMapping()
m1.field("title", "text")
m1.field("author", "object")
m1.field("author", "object", properties={"name": {"type": "text"}})
m1.meta("_all", enabled=False)
m1.meta("dynamic", False)
m2 = mapping.AsyncMapping()
m2.field("published_from", "date")
m2.field("author", "object", properties={"email": {"type": "text"}})
m2.field("title", "text")
m2.field("lang", "keyword")
m2.meta("_analyzer", path="lang")
m1.update(m2, update_only=True)
assert {
"_all": {"enabled": False},
"_analyzer": {"path": "lang"},
"dynamic": False,
"properties": {
"published_from": {"type": "date"},
"title": {"type": "text"},
"lang": {"type": "keyword"},
"author": {
"type": "object",
"properties": {"name": {"type": "text"}, "email": {"type": "text"}},
},
},
} == m1.to_dict()
async def test_properties_can_iterate_over_all_the_fields():
m = mapping.AsyncMapping()
m.field("f1", "text", test_attr="f1", fields={"f2": Keyword(test_attr="f2")})
m.field("f3", Nested(test_attr="f3", properties={"f4": Text(test_attr="f4")}))
assert {"f1", "f2", "f3", "f4"} == {
f.test_attr for f in m.properties._collect_fields()
}
async def test_mapping_can_collect_all_analyzers_and_normalizers():
a1 = analysis.analyzer(
"my_analyzer1",
tokenizer="keyword",
filter=[
"lowercase",
analysis.token_filter("my_filter1", "stop", stopwords=["a", "b"]),
],
)
a2 = analysis.analyzer("english")
a3 = analysis.analyzer("unknown_custom")
a4 = analysis.analyzer(
"my_analyzer2",
tokenizer=analysis.tokenizer("trigram", "nGram", min_gram=3, max_gram=3),
filter=[analysis.token_filter("my_filter2", "stop", stopwords=["c", "d"])],
)
a5 = analysis.analyzer("my_analyzer3", tokenizer="keyword")
n1 = analysis.normalizer("my_normalizer1", filter=["lowercase"])
n2 = analysis.normalizer(
"my_normalizer2",
filter=[
"my_filter1",
"my_filter2",
analysis.token_filter("my_filter3", "stop", stopwords=["e", "f"]),
],
)
n3 = analysis.normalizer("unknown_custom")
m = mapping.AsyncMapping()
m.field(
"title",
"text",
analyzer=a1,
fields={"english": Text(analyzer=a2), "unknown": Keyword(search_analyzer=a3)},
)
m.field("comments", Nested(properties={"author": Text(analyzer=a4)}))
m.field("normalized_title", "keyword", normalizer=n1)
m.field("normalized_comment", "keyword", normalizer=n2)
m.field("unknown", "keyword", normalizer=n3)
m.meta("_all", analyzer=a5)
assert {
"analyzer": {
"my_analyzer1": {
"filter": ["lowercase", "my_filter1"],
"tokenizer": "keyword",
"type": "custom",
},
"my_analyzer2": {
"filter": ["my_filter2"],
"tokenizer": "trigram",
"type": "custom",
},
"my_analyzer3": {"tokenizer": "keyword", "type": "custom"},
},
"normalizer": {
"my_normalizer1": {"filter": ["lowercase"], "type": "custom"},
"my_normalizer2": {
"filter": ["my_filter1", "my_filter2", "my_filter3"],
"type": "custom",
},
},
"filter": {
"my_filter1": {"stopwords": ["a", "b"], "type": "stop"},
"my_filter2": {"stopwords": ["c", "d"], "type": "stop"},
"my_filter3": {"stopwords": ["e", "f"], "type": "stop"},
},
"tokenizer": {"trigram": {"max_gram": 3, "min_gram": 3, "type": "nGram"}},
} == m._collect_analysis()
assert json.loads(json.dumps(m.to_dict())) == m.to_dict()
async def test_mapping_can_collect_multiple_analyzers():
a1 = analysis.analyzer(
"my_analyzer1",
tokenizer="keyword",
filter=[
"lowercase",
analysis.token_filter("my_filter1", "stop", stopwords=["a", "b"]),
],
)
a2 = analysis.analyzer(
"my_analyzer2",
tokenizer=analysis.tokenizer("trigram", "nGram", min_gram=3, max_gram=3),
filter=[analysis.token_filter("my_filter2", "stop", stopwords=["c", "d"])],
)
m = mapping.AsyncMapping()
m.field("title", "text", analyzer=a1, search_analyzer=a2)
m.field(
"text",
"text",
analyzer=a1,
fields={
"english": Text(analyzer=a1),
"unknown": Keyword(analyzer=a1, search_analyzer=a2),
},
)
assert {
"analyzer": {
"my_analyzer1": {
"filter": ["lowercase", "my_filter1"],
"tokenizer": "keyword",
"type": "custom",
},
"my_analyzer2": {
"filter": ["my_filter2"],
"tokenizer": "trigram",
"type": "custom",
},
},
"filter": {
"my_filter1": {"stopwords": ["a", "b"], "type": "stop"},
"my_filter2": {"stopwords": ["c", "d"], "type": "stop"},
},
"tokenizer": {"trigram": {"max_gram": 3, "min_gram": 3, "type": "nGram"}},
} == m._collect_analysis()
async def test_even_non_custom_analyzers_can_have_params():
a1 = analysis.analyzer("whitespace", type="pattern", pattern=r"\\s+")
m = mapping.AsyncMapping()
m.field("title", "text", analyzer=a1)
assert {
"analyzer": {"whitespace": {"type": "pattern", "pattern": r"\\s+"}}
} == m._collect_analysis()
async def test_resolve_field_can_resolve_multifields():
m = mapping.AsyncMapping()
m.field("title", "text", fields={"keyword": Keyword()})
assert isinstance(m.resolve_field("title.keyword"), Keyword)
async def test_resolve_nested():
m = mapping.AsyncMapping()
m.field("n1", "nested", properties={"n2": Nested(properties={"k1": Keyword()})})
m.field("k2", "keyword")
nested, field = m.resolve_nested("n1.n2.k1")
assert nested == ["n1", "n1.n2"]
assert isinstance(field, Keyword)
nested, field = m.resolve_nested("k2")
assert nested == []
assert isinstance(field, Keyword)
@@ -0,0 +1,546 @@
# SPDX-License-Identifier: Apache-2.0
#
# The OpenSearch Contributors require contributions made to
# this file be licensed under the Apache-2.0 license or a
# compatible open source license.
#
# Modifications Copyright OpenSearch Contributors. See
# GitHub history for details.
from copy import deepcopy
import pytest
from pytest import raises
from opensearchpy._async.helpers import search
from opensearchpy._async.helpers.document import AsyncDocument
from opensearchpy.exceptions import IllegalOperation
from opensearchpy.helpers import query
from opensearchpy.helpers.query import Q
pytestmark = pytest.mark.asyncio
async def test_expand__to_dot_is_respected():
s = search.AsyncSearch().query("match", a__b=42, _expand__to_dot=False)
assert {"query": {"match": {"a__b": 42}}} == s.to_dict()
async def test_execute_uses_cache():
s = search.AsyncSearch()
r = object()
s._response = r
assert r is await s.execute()
async def test_cache_isnt_cloned():
s = search.AsyncSearch()
s._response = object()
assert not hasattr(s._clone(), "_response")
async def test_search_starts_with_no_query():
s = search.AsyncSearch()
assert s.query._proxied is None
async def test_search_query_combines_query():
s = search.AsyncSearch()
s2 = s.query("match", f=42)
assert s2.query._proxied == query.Match(f=42)
assert s.query._proxied is None
s3 = s2.query("match", f=43)
assert s2.query._proxied == query.Match(f=42)
assert s3.query._proxied == query.Bool(must=[query.Match(f=42), query.Match(f=43)])
async def test_query_can_be_assigned_to():
s = search.AsyncSearch()
q = Q("match", title="python")
s.query = q
assert s.query._proxied is q
async def test_query_can_be_wrapped():
s = search.AsyncSearch().query("match", title="python")
s.query = Q("function_score", query=s.query, field_value_factor={"field": "rating"})
assert {
"query": {
"function_score": {
"functions": [{"field_value_factor": {"field": "rating"}}],
"query": {"match": {"title": "python"}},
}
}
} == s.to_dict()
async def test_using():
o = object()
o2 = object()
s = search.AsyncSearch(using=o)
assert s._using is o
s2 = s.using(o2)
assert s._using is o
assert s2._using is o2
async def test_methods_are_proxied_to_the_query():
s = search.AsyncSearch().query("match_all")
assert s.query.to_dict() == {"match_all": {}}
async def test_query_always_returns_search():
s = search.AsyncSearch()
assert isinstance(s.query("match", f=42), search.AsyncSearch)
async def test_source_copied_on_clone():
s = search.AsyncSearch().source(False)
assert s._clone()._source == s._source
assert s._clone()._source is False
s2 = search.AsyncSearch().source([])
assert s2._clone()._source == s2._source
assert s2._source == []
s3 = search.AsyncSearch().source(["some", "fields"])
assert s3._clone()._source == s3._source
assert s3._clone()._source == ["some", "fields"]
async def test_copy_clones():
from copy import copy
s1 = search.AsyncSearch().source(["some", "fields"])
s2 = copy(s1)
assert s1 == s2
assert s1 is not s2
async def test_aggs_allow_two_metric():
s = search.AsyncSearch()
s.aggs.metric("a", "max", field="a").metric("b", "max", field="b")
assert s.to_dict() == {
"aggs": {"a": {"max": {"field": "a"}}, "b": {"max": {"field": "b"}}}
}
async def test_aggs_get_copied_on_change():
s = search.AsyncSearch().query("match_all")
s.aggs.bucket("per_tag", "terms", field="f").metric(
"max_score", "max", field="score"
)
s2 = s.query("match_all")
s2.aggs.bucket("per_month", "date_histogram", field="date", interval="month")
s3 = s2.query("match_all")
s3.aggs["per_month"].metric("max_score", "max", field="score")
s4 = s3._clone()
s4.aggs.metric("max_score", "max", field="score")
d = {
"query": {"match_all": {}},
"aggs": {
"per_tag": {
"terms": {"field": "f"},
"aggs": {"max_score": {"max": {"field": "score"}}},
}
},
}
assert d == s.to_dict()
d["aggs"]["per_month"] = {"date_histogram": {"field": "date", "interval": "month"}}
assert d == s2.to_dict()
d["aggs"]["per_month"]["aggs"] = {"max_score": {"max": {"field": "score"}}}
assert d == s3.to_dict()
d["aggs"]["max_score"] = {"max": {"field": "score"}}
assert d == s4.to_dict()
async def test_search_index():
s = search.AsyncSearch(index="i")
assert s._index == ["i"]
s = s.index("i2")
assert s._index == ["i", "i2"]
s = s.index("i3")
assert s._index == ["i", "i2", "i3"]
s = s.index()
assert s._index is None
s = search.AsyncSearch(index=("i", "i2"))
assert s._index == ["i", "i2"]
s = search.AsyncSearch(index=["i", "i2"])
assert s._index == ["i", "i2"]
s = search.AsyncSearch()
s = s.index("i", "i2")
assert s._index == ["i", "i2"]
s2 = s.index("i3")
assert s._index == ["i", "i2"]
assert s2._index == ["i", "i2", "i3"]
s = search.AsyncSearch()
s = s.index(["i", "i2"], "i3")
assert s._index == ["i", "i2", "i3"]
s2 = s.index("i4")
assert s._index == ["i", "i2", "i3"]
assert s2._index == ["i", "i2", "i3", "i4"]
s2 = s.index(["i4"])
assert s2._index == ["i", "i2", "i3", "i4"]
s2 = s.index(("i4", "i5"))
assert s2._index == ["i", "i2", "i3", "i4", "i5"]
async def test_doc_type_document_class():
class MyDocument(AsyncDocument):
pass
s = search.AsyncSearch(doc_type=MyDocument)
assert s._doc_type == [MyDocument]
assert s._doc_type_map == {}
s = search.AsyncSearch().doc_type(MyDocument)
assert s._doc_type == [MyDocument]
assert s._doc_type_map == {}
async def test_sort():
s = search.AsyncSearch()
s = s.sort("fielda", "-fieldb")
assert ["fielda", {"fieldb": {"order": "desc"}}] == s._sort
assert {"sort": ["fielda", {"fieldb": {"order": "desc"}}]} == s.to_dict()
s = s.sort()
assert [] == s._sort
assert search.AsyncSearch().to_dict() == s.to_dict()
async def test_sort_by_score():
s = search.AsyncSearch()
s = s.sort("_score")
assert {"sort": ["_score"]} == s.to_dict()
s = search.AsyncSearch()
with raises(IllegalOperation):
s.sort("-_score")
async def test_slice():
s = search.AsyncSearch()
assert {"from": 3, "size": 7} == s[3:10].to_dict()
assert {"from": 0, "size": 5} == s[:5].to_dict()
assert {"from": 3, "size": 10} == s[3:].to_dict()
assert {"from": 0, "size": 0} == s[0:0].to_dict()
assert {"from": 20, "size": 0} == s[20:0].to_dict()
async def test_index():
s = search.AsyncSearch()
assert {"from": 3, "size": 1} == s[3].to_dict()
async def test_search_to_dict():
s = search.AsyncSearch()
assert {} == s.to_dict()
s = s.query("match", f=42)
assert {"query": {"match": {"f": 42}}} == s.to_dict()
assert {"query": {"match": {"f": 42}}, "size": 10} == s.to_dict(size=10)
s.aggs.bucket("per_tag", "terms", field="f").metric(
"max_score", "max", field="score"
)
d = {
"aggs": {
"per_tag": {
"terms": {"field": "f"},
"aggs": {"max_score": {"max": {"field": "score"}}},
}
},
"query": {"match": {"f": 42}},
}
assert d == s.to_dict()
s = search.AsyncSearch(extra={"size": 5})
assert {"size": 5} == s.to_dict()
s = s.extra(from_=42)
assert {"size": 5, "from": 42} == s.to_dict()
async def test_complex_example():
s = search.AsyncSearch()
s = (
s.query("match", title="python")
.query(~Q("match", title="ruby"))
.filter(Q("term", category="meetup") | Q("term", category="conference"))
.post_filter("terms", tags=["prague", "czech"])
.script_fields(more_attendees="doc['attendees'].value + 42")
)
s.aggs.bucket("per_country", "terms", field="country").metric(
"avg_attendees", "avg", field="attendees"
)
s.query.minimum_should_match = 2
s = s.highlight_options(order="score").highlight("title", "body", fragment_size=50)
assert {
"query": {
"bool": {
"filter": [
{
"bool": {
"should": [
{"term": {"category": "meetup"}},
{"term": {"category": "conference"}},
]
}
}
],
"must": [{"match": {"title": "python"}}],
"must_not": [{"match": {"title": "ruby"}}],
"minimum_should_match": 2,
}
},
"post_filter": {"terms": {"tags": ["prague", "czech"]}},
"aggs": {
"per_country": {
"terms": {"field": "country"},
"aggs": {"avg_attendees": {"avg": {"field": "attendees"}}},
}
},
"highlight": {
"order": "score",
"fields": {"title": {"fragment_size": 50}, "body": {"fragment_size": 50}},
},
"script_fields": {"more_attendees": {"script": "doc['attendees'].value + 42"}},
} == s.to_dict()
async def test_reverse():
d = {
"query": {
"filtered": {
"filter": {
"bool": {
"should": [
{"term": {"category": "meetup"}},
{"term": {"category": "conference"}},
]
}
},
"query": {
"bool": {
"must": [{"match": {"title": "python"}}],
"must_not": [{"match": {"title": "ruby"}}],
"minimum_should_match": 2,
}
},
}
},
"post_filter": {"bool": {"must": [{"terms": {"tags": ["prague", "czech"]}}]}},
"aggs": {
"per_country": {
"terms": {"field": "country"},
"aggs": {"avg_attendees": {"avg": {"field": "attendees"}}},
}
},
"sort": ["title", {"category": {"order": "desc"}}, "_score"],
"size": 5,
"highlight": {"order": "score", "fields": {"title": {"fragment_size": 50}}},
"suggest": {
"my-title-suggestions-1": {
"text": "devloping distibutd saerch engies",
"term": {"size": 3, "field": "title"},
}
},
"script_fields": {"more_attendees": {"script": "doc['attendees'].value + 42"}},
}
d2 = deepcopy(d)
s = search.AsyncSearch.from_dict(d)
# make sure we haven't modified anything in place
assert d == d2
assert {"size": 5} == s._extra
assert d == s.to_dict()
async def test_from_dict_doesnt_need_query():
s = search.AsyncSearch.from_dict({"size": 5})
assert {"size": 5} == s.to_dict()
async def test_source():
assert {} == search.AsyncSearch().source().to_dict()
assert {
"_source": {"includes": ["foo.bar.*"], "excludes": ["foo.one"]}
} == search.AsyncSearch().source(
includes=["foo.bar.*"], excludes=["foo.one"]
).to_dict()
assert {"_source": False} == search.AsyncSearch().source(False).to_dict()
assert {"_source": ["f1", "f2"]} == search.AsyncSearch().source(
includes=["foo.bar.*"], excludes=["foo.one"]
).source(["f1", "f2"]).to_dict()
async def test_source_on_clone():
assert {
"_source": {"includes": ["foo.bar.*"], "excludes": ["foo.one"]},
"query": {"bool": {"filter": [{"term": {"title": "python"}}]}},
} == search.AsyncSearch().source(includes=["foo.bar.*"]).source(
excludes=["foo.one"]
).filter(
"term", title="python"
).to_dict()
assert {
"_source": False,
"query": {"bool": {"filter": [{"term": {"title": "python"}}]}},
} == search.AsyncSearch().source(False).filter("term", title="python").to_dict()
async def test_source_on_clear():
assert (
{}
== search.AsyncSearch()
.source(includes=["foo.bar.*"])
.source(includes=None, excludes=None)
.to_dict()
)
async def test_suggest_accepts_global_text():
s = search.AsyncSearch.from_dict(
{
"suggest": {
"text": "the amsterdma meetpu",
"my-suggest-1": {"term": {"field": "title"}},
"my-suggest-2": {"text": "other", "term": {"field": "body"}},
}
}
)
assert {
"suggest": {
"my-suggest-1": {
"term": {"field": "title"},
"text": "the amsterdma meetpu",
},
"my-suggest-2": {"term": {"field": "body"}, "text": "other"},
}
} == s.to_dict()
async def test_suggest():
s = search.AsyncSearch()
s = s.suggest("my_suggestion", "pyhton", term={"field": "title"})
assert {
"suggest": {"my_suggestion": {"term": {"field": "title"}, "text": "pyhton"}}
} == s.to_dict()
async def test_exclude():
s = search.AsyncSearch()
s = s.exclude("match", title="python")
assert {
"query": {
"bool": {
"filter": [{"bool": {"must_not": [{"match": {"title": "python"}}]}}]
}
}
} == s.to_dict()
async def test_update_from_dict():
s = search.AsyncSearch()
s.update_from_dict({"indices_boost": [{"important-documents": 2}]})
s.update_from_dict({"_source": ["id", "name"]})
assert {
"indices_boost": [{"important-documents": 2}],
"_source": ["id", "name"],
} == s.to_dict()
async def test_rescore_query_to_dict():
s = search.AsyncSearch(index="index-name")
positive_query = Q(
"function_score",
query=Q("term", tags="a"),
script_score={"script": "_score * 1"},
)
negative_query = Q(
"function_score",
query=Q("term", tags="b"),
script_score={"script": "_score * -100"},
)
s = s.query(positive_query)
s = s.extra(
rescore={"window_size": 100, "query": {"rescore_query": negative_query}}
)
assert s.to_dict() == {
"query": {
"function_score": {
"query": {"term": {"tags": "a"}},
"functions": [{"script_score": {"script": "_score * 1"}}],
}
},
"rescore": {
"window_size": 100,
"query": {
"rescore_query": {
"function_score": {
"query": {"term": {"tags": "b"}},
"functions": [{"script_score": {"script": "_score * -100"}}],
}
}
},
},
}
assert s.to_dict(
rescore={"window_size": 10, "query": {"rescore_query": positive_query}}
) == {
"query": {
"function_score": {
"query": {"term": {"tags": "a"}},
"functions": [{"script_score": {"script": "_score * 1"}}],
}
},
"rescore": {
"window_size": 10,
"query": {
"rescore_query": {
"function_score": {
"query": {"term": {"tags": "a"}},
"functions": [{"script_score": {"script": "_score * 1"}}],
}
}
},
},
}
@@ -0,0 +1,162 @@
# SPDX-License-Identifier: Apache-2.0
#
# The OpenSearch Contributors require contributions made to
# this file be licensed under the Apache-2.0 license or a
# compatible open source license.
#
# Modifications Copyright OpenSearch Contributors. See
# GitHub history for details.
from copy import deepcopy
import pytest
from opensearchpy import Q
from opensearchpy._async.helpers import update_by_query
from opensearchpy.helpers.response import UpdateByQueryResponse
pytestmark = pytest.mark.asyncio
async def test_ubq_starts_with_no_query():
ubq = update_by_query.AsyncUpdateByQuery()
assert ubq.query._proxied is None
async def test_ubq_to_dict():
ubq = update_by_query.AsyncUpdateByQuery()
assert {} == ubq.to_dict()
ubq = ubq.query("match", f=42)
assert {"query": {"match": {"f": 42}}} == ubq.to_dict()
assert {"query": {"match": {"f": 42}}, "size": 10} == ubq.to_dict(size=10)
ubq = update_by_query.AsyncUpdateByQuery(extra={"size": 5})
assert {"size": 5} == ubq.to_dict()
ubq = update_by_query.AsyncUpdateByQuery(
extra={"extra_q": Q("term", category="conference")}
)
assert {"extra_q": {"term": {"category": "conference"}}} == ubq.to_dict()
async def test_complex_example():
ubq = update_by_query.AsyncUpdateByQuery()
ubq = (
ubq.query("match", title="python")
.query(~Q("match", title="ruby"))
.filter(Q("term", category="meetup") | Q("term", category="conference"))
.script(
source="ctx._source.likes += params.f", lang="painless", params={"f": 3}
)
)
ubq.query.minimum_should_match = 2
assert {
"query": {
"bool": {
"filter": [
{
"bool": {
"should": [
{"term": {"category": "meetup"}},
{"term": {"category": "conference"}},
]
}
}
],
"must": [{"match": {"title": "python"}}],
"must_not": [{"match": {"title": "ruby"}}],
"minimum_should_match": 2,
}
},
"script": {
"source": "ctx._source.likes += params.f",
"lang": "painless",
"params": {"f": 3},
},
} == ubq.to_dict()
async def test_exclude():
ubq = update_by_query.AsyncUpdateByQuery()
ubq = ubq.exclude("match", title="python")
assert {
"query": {
"bool": {
"filter": [{"bool": {"must_not": [{"match": {"title": "python"}}]}}]
}
}
} == ubq.to_dict()
async def test_reverse():
d = {
"query": {
"filtered": {
"filter": {
"bool": {
"should": [
{"term": {"category": "meetup"}},
{"term": {"category": "conference"}},
]
}
},
"query": {
"bool": {
"must": [{"match": {"title": "python"}}],
"must_not": [{"match": {"title": "ruby"}}],
"minimum_should_match": 2,
}
},
}
},
"script": {
"source": "ctx._source.likes += params.f",
"lang": "painless",
"params": {"f": 3},
},
}
d2 = deepcopy(d)
ubq = update_by_query.AsyncUpdateByQuery.from_dict(d)
assert d == d2
assert d == ubq.to_dict()
async def test_from_dict_doesnt_need_query():
ubq = update_by_query.AsyncUpdateByQuery.from_dict({"script": {"source": "test"}})
assert {"script": {"source": "test"}} == ubq.to_dict()
async def test_overwrite_script():
ubq = update_by_query.AsyncUpdateByQuery()
ubq = ubq.script(
source="ctx._source.likes += params.f", lang="painless", params={"f": 3}
)
assert {
"script": {
"source": "ctx._source.likes += params.f",
"lang": "painless",
"params": {"f": 3},
}
} == ubq.to_dict()
ubq = ubq.script(source="ctx._source.likes++")
assert {"script": {"source": "ctx._source.likes++"}} == ubq.to_dict()
async def test_update_by_query_response_success():
ubqr = UpdateByQueryResponse({}, {"timed_out": False, "failures": []})
assert ubqr.success()
ubqr = UpdateByQueryResponse({}, {"timed_out": True, "failures": []})
assert not ubqr.success()
ubqr = UpdateByQueryResponse({}, {"timed_out": False, "failures": [{}]})
assert not ubqr.success()
@@ -0,0 +1,110 @@
# SPDX-License-Identifier: Apache-2.0
#
# The OpenSearch Contributors require contributions made to
# this file be licensed under the Apache-2.0 license or a
# compatible open source license.
#
# Modifications Copyright OpenSearch Contributors. See
# GitHub history for details.
import asyncio
import re
from datetime import datetime
import pytest
from pytest import fixture
from test_data import (
DATA,
FLAT_DATA,
TEST_GIT_DATA,
create_flat_git_index,
create_git_index,
)
from opensearchpy._async.helpers.actions import async_bulk
from opensearchpy._async.helpers.test import get_test_client
from opensearchpy.connection.async_connections import add_connection
from test_opensearchpy.test_server.test_helpers.test_document import (
Comment,
History,
PullRequest,
User,
)
pytestmark = pytest.mark.asyncio
@pytest.fixture(scope="session")
def event_loop():
loop = asyncio.get_event_loop()
yield loop
loop.close()
@fixture(scope="session")
async def client():
client = await get_test_client(verify_certs=False, http_auth=("admin", "admin"))
await add_connection("default", client)
return client
@fixture(scope="session")
async def opensearch_version(client):
info = await client.info()
print(info)
yield tuple(
int(x)
for x in re.match(r"^([0-9.]+)", info["version"]["number"]).group(1).split(".")
)
@fixture
async def write_client(client):
yield client
await client.indices.delete("test-*", ignore=404)
await client.indices.delete_template("test-template", ignore=404)
@fixture
async def data_client(client):
# create mappings
await create_git_index(client, "git")
await create_flat_git_index(client, "flat-git")
# load data
await async_bulk(client, DATA, raise_on_error=True, refresh=True)
await async_bulk(client, FLAT_DATA, raise_on_error=True, refresh=True)
yield client
await client.indices.delete("git", ignore=404)
await client.indices.delete("flat-git", ignore=404)
@fixture
def pull_request(write_client):
PullRequest.init()
pr = PullRequest(
_id=42,
comments=[
Comment(
content="Hello World!",
author=User(name="honzakral"),
created_at=datetime(2018, 1, 9, 10, 17, 3, 21184),
history=[
History(
timestamp=datetime(2012, 1, 1),
diff="-Ahoj Svete!\n+Hello World!",
)
],
),
],
created_at=datetime(2018, 1, 9, 9, 17, 3, 21184),
)
pr.save(refresh=True)
return pr
@fixture
async def setup_ubq_tests(client):
index = "test-git"
await create_git_index(client, index)
await async_bulk(client, TEST_GIT_DATA, raise_on_error=True, refresh=True)
return index
@@ -25,16 +25,13 @@
# under the License.
# Licensed to Elasticsearch B.V.under one or more agreements.
# Elasticsearch B.V.licenses this file to you under the Apache 2.0 License.
# See the LICENSE file in the project root for more information
import asyncio
import pytest
from mock import MagicMock, patch
from opensearchpy import TransportError, helpers
from opensearchpy import TransportError
from opensearchpy._async.helpers import actions
from opensearchpy.helpers import BulkIndexError, ScanError
pytestmark = pytest.mark.asyncio
@@ -67,16 +64,16 @@ class FailingBulkClient(object):
class TestStreamingBulk(object):
async def test_actions_remain_unchanged(self, async_client):
actions = [{"_id": 1}, {"_id": 2}]
async for ok, item in helpers.async_streaming_bulk(
async_client, actions, index="test-index"
actions1 = [{"_id": 1}, {"_id": 2}]
async for ok, item in actions.async_streaming_bulk(
async_client, actions1, index="test-index"
):
assert ok
assert [{"_id": 1}, {"_id": 2}] == actions
assert [{"_id": 1}, {"_id": 2}] == actions1
async def test_all_documents_get_inserted(self, async_client):
docs = [{"answer": x, "_id": x} for x in range(100)]
async for ok, item in helpers.async_streaming_bulk(
async for ok, item in actions.async_streaming_bulk(
async_client, docs, index="test-index", refresh=True
):
assert ok
@@ -96,7 +93,7 @@ class TestStreamingBulk(object):
for x in range(100):
yield {"answer": x, "_id": x}
async for ok, item in helpers.async_streaming_bulk(
async for ok, item in actions.async_streaming_bulk(
async_client, async_gen(), index="test-index", refresh=True
):
assert ok
@@ -110,7 +107,7 @@ class TestStreamingBulk(object):
index="test-index", body={"query": {"match_all": {}}}
)
async for ok, item in helpers.async_streaming_bulk(
async for ok, item in actions.async_streaming_bulk(
async_client, sync_gen(), index="test-index", refresh=True
):
assert ok
@@ -131,7 +128,7 @@ class TestStreamingBulk(object):
await async_client.cluster.health(wait_for_status="yellow")
try:
async for ok, item in helpers.async_streaming_bulk(
async for ok, item in actions.async_streaming_bulk(
async_client, [{"a": "b"}, {"a": "c"}], index="i", raise_on_error=True
):
assert ok
@@ -148,7 +145,7 @@ class TestStreamingBulk(object):
{"_op_type": "delete", "_index": "i", "_id": 45},
{"_op_type": "update", "_index": "i", "_id": 42, "doc": {"answer": 42}},
]
async for ok, item in helpers.async_streaming_bulk(async_client, docs):
async for ok, item in actions.async_streaming_bulk(async_client, docs):
assert ok
assert not await async_client.exists(index="i", id=45)
@@ -165,7 +162,7 @@ class TestStreamingBulk(object):
results = [
x
async for x in helpers.async_streaming_bulk(
async for x in actions.async_streaming_bulk(
failing_client,
docs,
raise_on_exception=False,
@@ -200,7 +197,7 @@ class TestStreamingBulk(object):
]
results = [
x
async for x in helpers.async_streaming_bulk(
async for x in actions.async_streaming_bulk(
failing_client,
docs,
raise_on_exception=False,
@@ -231,7 +228,7 @@ class TestStreamingBulk(object):
]
results = [
x
async for x in helpers.async_streaming_bulk(
async for x in actions.async_streaming_bulk(
failing_client,
docs,
raise_on_exception=False,
@@ -258,7 +255,7 @@ class TestStreamingBulk(object):
async def streaming_bulk():
results = [
x
async for x in helpers.async_streaming_bulk(
async for x in actions.async_streaming_bulk(
failing_client,
[{"a": 42}, {"a": 39}],
raise_on_exception=True,
@@ -276,7 +273,7 @@ class TestStreamingBulk(object):
class TestBulk(object):
async def test_bulk_works_with_single_item(self, async_client):
docs = [{"answer": 42, "_id": 1}]
success, failed = await helpers.async_bulk(
success, failed = await actions.async_bulk(
async_client, docs, index="test-index", refresh=True
)
@@ -289,7 +286,7 @@ class TestBulk(object):
async def test_all_documents_get_inserted(self, async_client):
docs = [{"answer": x, "_id": x} for x in range(100)]
success, failed = await helpers.async_bulk(
success, failed = await actions.async_bulk(
async_client, docs, index="test-index", refresh=True
)
@@ -302,7 +299,7 @@ class TestBulk(object):
async def test_stats_only_reports_numbers(self, async_client):
docs = [{"answer": x} for x in range(100)]
success, failed = await helpers.async_bulk(
success, failed = await actions.async_bulk(
async_client, docs, index="test-index", refresh=True, stats_only=True
)
@@ -320,7 +317,7 @@ class TestBulk(object):
)
await async_client.cluster.health(wait_for_status="yellow")
success, failed = await helpers.async_bulk(
success, failed = await actions.async_bulk(
async_client,
[{"a": 42}, {"a": "c", "_id": 42}],
index="i",
@@ -347,16 +344,16 @@ class TestBulk(object):
await async_client.cluster.health(wait_for_status="yellow")
with pytest.raises(BulkIndexError):
await helpers.async_bulk(async_client, [{"a": 42}, {"a": "c"}], index="i")
await actions.async_bulk(async_client, [{"a": 42}, {"a": "c"}], index="i")
async def test_ignore_error_if_raised(self, async_client):
# ignore the status code 400 in tuple
await helpers.async_bulk(
await actions.async_bulk(
async_client, [{"a": 42}, {"a": "c"}], index="i", ignore_status=(400,)
)
# ignore the status code 400 in list
await helpers.async_bulk(
await actions.async_bulk(
async_client,
[{"a": 42}, {"a": "c"}],
index="i",
@@ -366,19 +363,19 @@ class TestBulk(object):
)
# ignore the status code 400
await helpers.async_bulk(
await actions.async_bulk(
async_client, [{"a": 42}, {"a": "c"}], index="i", ignore_status=400
)
# ignore only the status code in the `ignore_status` argument
with pytest.raises(BulkIndexError):
await helpers.async_bulk(
await actions.async_bulk(
async_client, [{"a": 42}, {"a": "c"}], index="i", ignore_status=(444,)
)
# ignore transport error exception
failing_client = FailingBulkClient(async_client)
await helpers.async_bulk(
await actions.async_bulk(
failing_client, [{"a": 42}], index="i", ignore_status=(599,)
)
@@ -392,7 +389,7 @@ class TestBulk(object):
)
await async_client.cluster.health(wait_for_status="yellow")
success, failed = await helpers.async_bulk(
success, failed = await actions.async_bulk(
async_client,
[{"a": 42}, {"a": "c"}],
index="i",
@@ -452,7 +449,7 @@ class TestScan(object):
docs = [
doc
async for doc in helpers.async_scan(
async for doc in actions.async_scan(
async_client,
index="test_index",
query={"sort": "answer"},
@@ -473,7 +470,7 @@ class TestScan(object):
docs = [
x
async for x in helpers.async_scan(async_client, index="test_index", size=2)
async for x in actions.async_scan(async_client, index="test_index", size=2)
]
assert 100 == len(docs)
@@ -490,7 +487,7 @@ class TestScan(object):
with patch.object(async_client, "scroll", MockScroll()):
data = [
x
async for x in helpers.async_scan(
async for x in actions.async_scan(
async_client,
index="test_index",
size=2,
@@ -505,7 +502,7 @@ class TestScan(object):
with pytest.raises(ScanError):
data = [
x
async for x in helpers.async_scan(
async for x in actions.async_scan(
async_client,
index="test_index",
size=2,
@@ -532,7 +529,7 @@ class TestScan(object):
with patch.object(async_client, "scroll", MockScroll()):
data = [
x
async for x in helpers.async_scan(
async for x in actions.async_scan(
async_client,
index="test_index",
size=2,
@@ -556,7 +553,7 @@ class TestScan(object):
with pytest.raises(ScanError):
data = [
x
async for x in helpers.async_scan(
async for x in actions.async_scan(
async_client,
index="test_index",
size=2,
@@ -572,7 +569,7 @@ class TestScan(object):
with patch.object(async_client, "clear_scroll") as clear_mock:
data = [
x
async for x in helpers.async_scan(
async for x in actions.async_scan(
async_client, index="test_index"
)
]
@@ -581,7 +578,7 @@ class TestScan(object):
scroll_mock.assert_not_called()
clear_mock.assert_not_called()
@patch("opensearchpy._async.helpers.logger")
@patch("opensearchpy._async.helpers.actions.logger")
async def test_logger(self, logger_mock, async_client, scan_teardown):
bulk = []
for x in range(4):
@@ -592,7 +589,7 @@ class TestScan(object):
with patch.object(async_client, "scroll", MockScroll()):
_ = [
x
async for x in helpers.async_scan(
async for x in actions.async_scan(
async_client,
index="test_index",
size=2,
@@ -606,7 +603,7 @@ class TestScan(object):
try:
_ = [
x
async for x in helpers.async_scan(
async for x in actions.async_scan(
async_client,
index="test_index",
size=2,
@@ -635,7 +632,7 @@ class TestScan(object):
) as spy:
_ = [
x
async for x in helpers.async_scan(
async for x in actions.async_scan(
async_client, index="test_index", size=2
)
]
@@ -644,7 +641,7 @@ class TestScan(object):
spy.reset_mock()
_ = [
x
async for x in helpers.async_scan(
async for x in actions.async_scan(
async_client, index="test_index", size=2, clear_scroll=True
)
]
@@ -653,7 +650,7 @@ class TestScan(object):
spy.reset_mock()
_ = [
x
async for x in helpers.async_scan(
async for x in actions.async_scan(
async_client, index="test_index", size=2, clear_scroll=False
)
]
@@ -699,7 +696,7 @@ class TestScan(object):
) as clear_mock:
data = [
x
async for x in helpers.async_scan(
async for x in actions.async_scan(
async_client, index="test_index", **kwargs
)
]
@@ -739,7 +736,7 @@ class TestScan(object):
):
data = [
x
async for x in helpers.async_scan(
async for x in actions.async_scan(
async_client,
index="test_index",
headers={"not scroll": "kwargs"},
@@ -779,7 +776,7 @@ class TestReindex(object):
async def test_reindex_passes_kwargs_to_scan_and_bulk(
self, async_client, reindex_setup
):
await helpers.async_reindex(
await actions.async_reindex(
async_client,
"test_index",
"prod_index",
@@ -798,7 +795,7 @@ class TestReindex(object):
)["_source"]
async def test_reindex_accepts_a_query(self, async_client, reindex_setup):
await helpers.async_reindex(
await actions.async_reindex(
async_client,
"test_index",
"prod_index",
@@ -817,7 +814,7 @@ class TestReindex(object):
)["_source"]
async def test_all_documents_get_moved(self, async_client, reindex_setup):
await helpers.async_reindex(async_client, "test_index", "prod_index")
await actions.async_reindex(async_client, "test_index", "prod_index")
await async_client.indices.refresh()
assert await async_client.indices.exists("prod_index")
@@ -869,7 +866,7 @@ class TestParentChildReindex:
async def test_children_are_reindexed_correctly(
self, async_client, parent_reindex_setup
):
await helpers.async_reindex(async_client, "test-index", "real-index")
await actions.async_reindex(async_client, "test-index", "real-index")
assert {"question_answer": "question"} == (
await async_client.get(index="real-index", id=42)
)["_source"]
File diff suppressed because it is too large Load Diff
@@ -0,0 +1,555 @@
# SPDX-License-Identifier: Apache-2.0
#
# The OpenSearch Contributors require contributions made to
# this file be licensed under the Apache-2.0 license or a
# compatible open source license.
#
# Modifications Copyright OpenSearch Contributors. See
# GitHub history for details.
from datetime import datetime
from ipaddress import ip_address
import pytest
from pytest import raises
from pytz import timezone
from opensearchpy import (
Binary,
Boolean,
ConflictError,
Date,
Double,
InnerDoc,
Ip,
Keyword,
Long,
MetaField,
Nested,
NotFoundError,
Object,
Q,
RankFeatures,
Text,
analyzer,
)
from opensearchpy._async.helpers.actions import aiter
from opensearchpy._async.helpers.document import AsyncDocument
from opensearchpy._async.helpers.mapping import AsyncMapping
from opensearchpy.helpers.utils import AttrList
pytestmark = pytest.mark.asyncio
snowball = analyzer("my_snow", tokenizer="standard", filter=["lowercase", "snowball"])
class User(InnerDoc):
name = Text(fields={"raw": Keyword()})
class Wiki(AsyncDocument):
owner = Object(User)
views = Long()
ranked = RankFeatures()
class Index:
name = "test-wiki"
class Repository(AsyncDocument):
owner = Object(User)
created_at = Date()
description = Text(analyzer=snowball)
tags = Keyword()
@classmethod
def search(cls):
return super(Repository, cls).search().filter("term", commit_repo="repo")
class Index:
name = "git"
class Commit(AsyncDocument):
committed_date = Date()
authored_date = Date()
description = Text(analyzer=snowball)
class Index:
name = "flat-git"
class Meta:
mapping = AsyncMapping()
class History(InnerDoc):
timestamp = Date()
diff = Text()
class Comment(InnerDoc):
content = Text()
created_at = Date()
author = Object(User)
history = Nested(History)
class Meta:
dynamic = MetaField(False)
class PullRequest(AsyncDocument):
comments = Nested(Comment)
created_at = Date()
class Index:
name = "test-prs"
class SerializationDoc(AsyncDocument):
i = Long()
b = Boolean()
d = Double()
bin = Binary()
ip = Ip()
class Index:
name = "test-serialization"
async def test_serialization(write_client):
await SerializationDoc.init()
await write_client.index(
index="test-serialization",
id=42,
body={
"i": [1, 2, "3", None],
"b": [True, False, "true", "false", None],
"d": [0.1, "-0.1", None],
"bin": ["SGVsbG8gV29ybGQ=", None],
"ip": ["::1", "127.0.0.1", None],
},
)
sd = await SerializationDoc.get(id=42)
assert sd.i == [1, 2, 3, None]
assert sd.b == [True, False, True, False, None]
assert sd.d == [0.1, -0.1, None]
assert sd.bin == [b"Hello World", None]
assert sd.ip == [ip_address("::1"), ip_address("127.0.0.1"), None]
assert sd.to_dict() == {
"b": [True, False, True, False, None],
"bin": ["SGVsbG8gV29ybGQ=", None],
"d": [0.1, -0.1, None],
"i": [1, 2, 3, None],
"ip": ["::1", "127.0.0.1", None],
}
async def test_nested_inner_hits_are_wrapped_properly(pull_request):
history_query = Q(
"nested",
path="comments.history",
inner_hits={},
query=Q("match", comments__history__diff="ahoj"),
)
s = PullRequest.search().query(
"nested", inner_hits={}, path="comments", query=history_query
)
response = await s.execute()
pr = response.hits[0]
assert isinstance(pr, PullRequest)
assert isinstance(pr.comments[0], Comment)
assert isinstance(pr.comments[0].history[0], History)
comment = pr.meta.inner_hits.comments.hits[0]
assert isinstance(comment, Comment)
assert comment.author.name == "honzakral"
assert isinstance(comment.history[0], History)
history = comment.meta.inner_hits["comments.history"].hits[0]
assert isinstance(history, History)
assert history.timestamp == datetime(2012, 1, 1)
assert "score" in history.meta
async def test_nested_inner_hits_are_deserialized_properly(pull_request):
s = PullRequest.search().query(
"nested",
inner_hits={},
path="comments",
query=Q("match", comments__content="hello"),
)
response = await s.execute()
pr = response.hits[0]
assert isinstance(pr.created_at, datetime)
assert isinstance(pr.comments[0], Comment)
assert isinstance(pr.comments[0].created_at, datetime)
async def test_nested_top_hits_are_wrapped_properly(pull_request):
s = PullRequest.search()
s.aggs.bucket("comments", "nested", path="comments").metric(
"hits", "top_hits", size=1
)
r = await s.execute()
print(r._d_)
assert isinstance(r.aggregations.comments.hits.hits[0], Comment)
async def test_update_object_field(write_client):
await Wiki.init()
w = Wiki(
owner=User(name="Honza Kral"),
_id="opensearch-py",
ranked={"test1": 0.1, "topic2": 0.2},
)
await w.save()
assert "updated" == await w.update(owner=[{"name": "Honza"}, {"name": "Nick"}])
assert w.owner[0].name == "Honza"
assert w.owner[1].name == "Nick"
w = await Wiki.get(id="opensearch-py")
assert w.owner[0].name == "Honza"
assert w.owner[1].name == "Nick"
assert w.ranked == {"test1": 0.1, "topic2": 0.2}
async def test_update_script(write_client):
await Wiki.init()
w = Wiki(owner=User(name="Honza Kral"), _id="opensearch-py", views=42)
await w.save()
await w.update(script="ctx._source.views += params.inc", inc=5)
w = await Wiki.get(id="opensearch-py")
assert w.views == 47
async def test_update_retry_on_conflict(write_client):
await Wiki.init()
w = Wiki(owner=User(name="Honza Kral"), _id="opensearch-py", views=42)
await w.save()
w1 = await Wiki.get(id="opensearch-py")
w2 = await Wiki.get(id="opensearch-py")
await w1.update(
script="ctx._source.views += params.inc", inc=5, retry_on_conflict=1
)
await w2.update(
script="ctx._source.views += params.inc", inc=5, retry_on_conflict=1
)
w = await Wiki.get(id="opensearch-py")
assert w.views == 52
@pytest.mark.parametrize("retry_on_conflict", [None, 0])
async def test_update_conflicting_version(write_client, retry_on_conflict):
await Wiki.init()
w = Wiki(owner=User(name="Honza Kral"), _id="opensearch-py", views=42)
await w.save()
w1 = await Wiki.get(id="opensearch-py")
w2 = await Wiki.get(id="opensearch-py")
await w1.update(script="ctx._source.views += params.inc", inc=5)
with raises(ConflictError):
await w2.update(
script="ctx._source.views += params.inc",
inc=5,
retry_on_conflict=retry_on_conflict,
)
async def test_save_and_update_return_doc_meta(write_client):
await Wiki.init()
w = Wiki(owner=User(name="Honza Kral"), _id="opensearch-py", views=42)
resp = await w.save(return_doc_meta=True)
assert resp["_index"] == "test-wiki"
assert resp["result"] == "created"
assert resp.keys().__contains__("_id")
assert resp.keys().__contains__("_primary_term")
assert resp.keys().__contains__("_seq_no")
assert resp.keys().__contains__("_shards")
assert resp.keys().__contains__("_version")
resp = await w.update(
script="ctx._source.views += params.inc", inc=5, return_doc_meta=True
)
assert resp["_index"] == "test-wiki"
assert resp["result"] == "updated"
assert resp.keys().__contains__("_id")
assert resp.keys().__contains__("_primary_term")
assert resp.keys().__contains__("_seq_no")
assert resp.keys().__contains__("_shards")
assert resp.keys().__contains__("_version")
async def test_init(write_client):
await Repository.init(index="test-git")
assert await write_client.indices.exists(index="test-git")
async def test_get_raises_404_on_index_missing(data_client):
with raises(NotFoundError):
await Repository.get("opensearch-dsl-php", index="not-there")
async def test_get_raises_404_on_non_existent_id(data_client):
with raises(NotFoundError):
await Repository.get("opensearch-dsl-php")
async def test_get_returns_none_if_404_ignored(data_client):
assert None is await Repository.get("opensearch-dsl-php", ignore=404)
async def test_get_returns_none_if_404_ignored_and_index_doesnt_exist(data_client):
assert None is await Repository.get("42", index="not-there", ignore=404)
async def test_get(data_client):
opensearch_repo = await Repository.get("opensearch-py")
assert isinstance(opensearch_repo, Repository)
assert opensearch_repo.owner.name == "opensearch"
assert datetime(2014, 3, 3) == opensearch_repo.created_at
async def test_exists_return_true(data_client):
assert await Repository.exists("opensearch-py")
async def test_exists_false(data_client):
assert not await Repository.exists("opensearch-dsl-php")
async def test_get_with_tz_date(data_client):
first_commit = await Commit.get(
id="3ca6e1e73a071a705b4babd2f581c91a2a3e5037", routing="opensearch-py"
)
tzinfo = timezone("Europe/Prague")
assert (
tzinfo.localize(datetime(2014, 5, 2, 13, 47, 19, 123000))
== first_commit.authored_date
)
async def test_save_with_tz_date(data_client):
tzinfo = timezone("Europe/Prague")
first_commit = await Commit.get(
id="3ca6e1e73a071a705b4babd2f581c91a2a3e5037", routing="opensearch-py"
)
first_commit.committed_date = tzinfo.localize(
datetime(2014, 5, 2, 13, 47, 19, 123456)
)
await first_commit.save()
first_commit = await Commit.get(
id="3ca6e1e73a071a705b4babd2f581c91a2a3e5037", routing="opensearch-py"
)
assert (
tzinfo.localize(datetime(2014, 5, 2, 13, 47, 19, 123456))
== first_commit.committed_date
)
COMMIT_DOCS_WITH_MISSING = [
{"_id": "0"}, # Missing
{"_id": "3ca6e1e73a071a705b4babd2f581c91a2a3e5037"}, # Existing
{"_id": "f"}, # Missing
{"_id": "eb3e543323f189fd7b698e66295427204fff5755"}, # Existing
]
async def test_mget(data_client):
commits = await Commit.mget(COMMIT_DOCS_WITH_MISSING)
assert commits[0] is None
assert commits[1].meta.id == "3ca6e1e73a071a705b4babd2f581c91a2a3e5037"
assert commits[2] is None
assert commits[3].meta.id == "eb3e543323f189fd7b698e66295427204fff5755"
async def test_mget_raises_exception_when_missing_param_is_invalid(data_client):
with raises(ValueError):
await Commit.mget(COMMIT_DOCS_WITH_MISSING, missing="raj")
async def test_mget_raises_404_when_missing_param_is_raise(data_client):
with raises(NotFoundError):
await Commit.mget(COMMIT_DOCS_WITH_MISSING, missing="raise")
async def test_mget_ignores_missing_docs_when_missing_param_is_skip(data_client):
commits = await Commit.mget(COMMIT_DOCS_WITH_MISSING, missing="skip")
assert commits[0].meta.id == "3ca6e1e73a071a705b4babd2f581c91a2a3e5037"
assert commits[1].meta.id == "eb3e543323f189fd7b698e66295427204fff5755"
async def test_update_works_from_search_response(data_client):
opensearch_repo = (await Repository.search().execute())[0]
await opensearch_repo.update(owner={"other_name": "opensearchpy"})
assert "opensearchpy" == opensearch_repo.owner.other_name
new_version = await Repository.get("opensearch-py")
assert "opensearchpy" == new_version.owner.other_name
assert "opensearch" == new_version.owner.name
async def test_update(data_client):
opensearch_repo = await Repository.get("opensearch-py")
v = opensearch_repo.meta.version
old_seq_no = opensearch_repo.meta.seq_no
await opensearch_repo.update(
owner={"new_name": "opensearchpy"}, new_field="testing-update"
)
assert "opensearchpy" == opensearch_repo.owner.new_name
assert "testing-update" == opensearch_repo.new_field
# assert version has been updated
assert opensearch_repo.meta.version == v + 1
new_version = await Repository.get("opensearch-py")
assert "testing-update" == new_version.new_field
assert "opensearchpy" == new_version.owner.new_name
assert "opensearch" == new_version.owner.name
assert "seq_no" in new_version.meta
assert new_version.meta.seq_no != old_seq_no
assert "primary_term" in new_version.meta
async def test_save_updates_existing_doc(data_client):
opensearch_repo = await Repository.get("opensearch-py")
opensearch_repo.new_field = "testing-save"
old_seq_no = opensearch_repo.meta.seq_no
assert "updated" == await opensearch_repo.save()
new_repo = await data_client.get(index="git", id="opensearch-py")
assert "testing-save" == new_repo["_source"]["new_field"]
assert new_repo["_seq_no"] != old_seq_no
assert new_repo["_seq_no"] == opensearch_repo.meta.seq_no
async def test_save_automatically_uses_seq_no_and_primary_term(data_client):
opensearch_repo = await Repository.get("opensearch-py")
opensearch_repo.meta.seq_no += 1
with raises(ConflictError):
await opensearch_repo.save()
async def test_delete_automatically_uses_seq_no_and_primary_term(data_client):
opensearch_repo = await Repository.get("opensearch-py")
opensearch_repo.meta.seq_no += 1
with raises(ConflictError):
await opensearch_repo.delete()
async def assert_doc_equals(expected, actual):
async for f in aiter(expected):
assert f in actual
assert actual[f] == expected[f]
async def test_can_save_to_different_index(write_client):
test_repo = Repository(description="testing", meta={"id": 42})
assert await test_repo.save(index="test-document")
await assert_doc_equals(
{
"found": True,
"_index": "test-document",
"_id": "42",
"_source": {"description": "testing"},
},
await write_client.get(index="test-document", id=42),
)
async def test_save_without_skip_empty_will_include_empty_fields(write_client):
test_repo = Repository(field_1=[], field_2=None, field_3={}, meta={"id": 42})
assert await test_repo.save(index="test-document", skip_empty=False)
await assert_doc_equals(
{
"found": True,
"_index": "test-document",
"_id": "42",
"_source": {"field_1": [], "field_2": None, "field_3": {}},
},
await write_client.get(index="test-document", id=42),
)
async def test_delete(write_client):
await write_client.create(
index="test-document",
id="opensearch-py",
body={
"organization": "opensearch",
"created_at": "2014-03-03",
"owner": {"name": "opensearch"},
},
)
test_repo = Repository(meta={"id": "opensearch-py"})
test_repo.meta.index = "test-document"
await test_repo.delete()
assert not await write_client.exists(
index="test-document",
id="opensearch-py",
)
async def test_search(data_client):
assert await Repository.search().count() == 1
async def test_search_returns_proper_doc_classes(data_client):
result = await Repository.search().execute()
opensearch_repo = result.hits[0]
assert isinstance(opensearch_repo, Repository)
assert opensearch_repo.owner.name == "opensearch"
async def test_refresh_mapping(data_client):
class Commit(AsyncDocument):
class Index:
name = "git"
await Commit._index.load_mappings()
assert "stats" in Commit._index._mapping
assert "committer" in Commit._index._mapping
assert "description" in Commit._index._mapping
assert "committed_date" in Commit._index._mapping
assert isinstance(Commit._index._mapping["committed_date"], Date)
async def test_highlight_in_meta(data_client):
commit = (
await Commit.search()
.query("match", description="inverting")
.highlight("description")
.execute()
)[0]
assert isinstance(commit, Commit)
assert "description" in commit.meta.highlight
assert isinstance(commit.meta.highlight["description"], AttrList)
assert len(commit.meta.highlight["description"]) > 0
@@ -0,0 +1,274 @@
# SPDX-License-Identifier: Apache-2.0
#
# The OpenSearch Contributors require contributions made to
# this file be licensed under the Apache-2.0 license or a
# compatible open source license.
#
# Modifications Copyright OpenSearch Contributors. See
# GitHub history for details.
from datetime import datetime
import pytest
from opensearchpy import A, Boolean, Date, Keyword
from opensearchpy._async.helpers.document import AsyncDocument
from opensearchpy._async.helpers.faceted_search import AsyncFacetedSearch
from opensearchpy.helpers.faceted_search import (
DateHistogramFacet,
NestedFacet,
RangeFacet,
TermsFacet,
)
from test_opensearchpy.test_async.test_server.test_helpers.test_document import (
PullRequest,
)
pytestmark = pytest.mark.asyncio
class Repos(AsyncDocument):
is_public = Boolean()
created_at = Date()
class Index:
name = "git"
class Commit(AsyncDocument):
files = Keyword()
committed_date = Date()
class Index:
name = "git"
class MetricSearch(AsyncFacetedSearch):
index = "git"
doc_types = [Commit]
facets = {
"files": TermsFacet(field="files", metric=A("max", field="committed_date")),
}
@pytest.fixture(scope="session")
def commit_search_cls(opensearch_version):
interval_kwargs = {"fixed_interval": "1d"}
class CommitSearch(AsyncFacetedSearch):
index = "flat-git"
fields = (
"description",
"files",
)
facets = {
"files": TermsFacet(field="files"),
"frequency": DateHistogramFacet(
field="authored_date", min_doc_count=1, **interval_kwargs
),
"deletions": RangeFacet(
field="stats.deletions",
ranges=[("ok", (None, 1)), ("good", (1, 5)), ("better", (5, None))],
),
}
return CommitSearch
@pytest.fixture(scope="session")
def repo_search_cls(opensearch_version):
interval_type = "calendar_interval"
class RepoSearch(AsyncFacetedSearch):
index = "git"
doc_types = [Repos]
facets = {
"public": TermsFacet(field="is_public"),
"created": DateHistogramFacet(
field="created_at", **{interval_type: "month"}
),
}
def search(self):
s = super(RepoSearch, self).search()
return s.filter("term", commit_repo="repo")
return RepoSearch
@pytest.fixture(scope="session")
def pr_search_cls(opensearch_version):
interval_type = "calendar_interval"
class PRSearch(AsyncFacetedSearch):
index = "test-prs"
doc_types = [PullRequest]
facets = {
"comments": NestedFacet(
"comments",
DateHistogramFacet(
field="comments.created_at", **{interval_type: "month"}
),
)
}
return PRSearch
async def test_facet_with_custom_metric(data_client):
ms = MetricSearch()
r = await ms.execute()
dates = [f[1] for f in r.facets.files]
assert dates == list(sorted(dates, reverse=True))
assert dates[0] == 1399038439000
async def test_nested_facet(pull_request, pr_search_cls):
prs = pr_search_cls()
r = await prs.execute()
assert r.hits.total.value == 1
assert [(datetime(2018, 1, 1, 0, 0), 1, False)] == r.facets.comments
async def test_nested_facet_with_filter(pull_request, pr_search_cls):
prs = pr_search_cls(filters={"comments": datetime(2018, 1, 1, 0, 0)})
r = await prs.execute()
assert r.hits.total.value == 1
assert [(datetime(2018, 1, 1, 0, 0), 1, True)] == r.facets.comments
prs = pr_search_cls(filters={"comments": datetime(2018, 2, 1, 0, 0)})
r = await prs.execute()
assert not r.hits
async def test_datehistogram_facet(data_client, repo_search_cls):
rs = repo_search_cls()
r = await rs.execute()
assert r.hits.total.value == 1
assert [(datetime(2014, 3, 1, 0, 0), 1, False)] == r.facets.created
async def test_boolean_facet(data_client, repo_search_cls):
rs = repo_search_cls()
r = await rs.execute()
assert r.hits.total.value == 1
assert [(True, 1, False)] == r.facets.public
value, count, selected = r.facets.public[0]
assert value is True
async def test_empty_search_finds_everything(
data_client, opensearch_version, commit_search_cls
):
cs = commit_search_cls()
r = await cs.execute()
assert r.hits.total.value == 52
assert [
("opensearchpy", 39, False),
("test_opensearchpy", 35, False),
("test_opensearchpy/test_dsl", 35, False),
("opensearchpy/query.py", 18, False),
("test_opensearchpy/test_dsl/test_search.py", 15, False),
("opensearchpy/utils.py", 14, False),
("test_opensearchpy/test_dsl/test_query.py", 13, False),
("opensearchpy/search.py", 12, False),
("opensearchpy/aggs.py", 11, False),
("test_opensearchpy/test_dsl/test_result.py", 5, False),
] == r.facets.files
assert [
(datetime(2014, 3, 3, 0, 0), 2, False),
(datetime(2014, 3, 4, 0, 0), 1, False),
(datetime(2014, 3, 5, 0, 0), 3, False),
(datetime(2014, 3, 6, 0, 0), 3, False),
(datetime(2014, 3, 7, 0, 0), 9, False),
(datetime(2014, 3, 10, 0, 0), 2, False),
(datetime(2014, 3, 15, 0, 0), 4, False),
(datetime(2014, 3, 21, 0, 0), 2, False),
(datetime(2014, 3, 23, 0, 0), 2, False),
(datetime(2014, 3, 24, 0, 0), 10, False),
(datetime(2014, 4, 20, 0, 0), 2, False),
(datetime(2014, 4, 22, 0, 0), 2, False),
(datetime(2014, 4, 25, 0, 0), 3, False),
(datetime(2014, 4, 26, 0, 0), 2, False),
(datetime(2014, 4, 27, 0, 0), 2, False),
(datetime(2014, 5, 1, 0, 0), 2, False),
(datetime(2014, 5, 2, 0, 0), 1, False),
] == r.facets.frequency
assert [
("ok", 19, False),
("good", 14, False),
("better", 19, False),
] == r.facets.deletions
async def test_term_filters_are_shown_as_selected_and_data_is_filtered(
data_client, commit_search_cls
):
cs = commit_search_cls(filters={"files": "test_opensearchpy/test_dsl"})
r = await cs.execute()
assert 35 == r.hits.total.value
assert [
("opensearchpy", 39, False),
("test_opensearchpy", 35, False),
("test_opensearchpy/test_dsl", 35, True),
("opensearchpy/query.py", 18, False),
("test_opensearchpy/test_dsl/test_search.py", 15, False),
("opensearchpy/utils.py", 14, False),
("test_opensearchpy/test_dsl/test_query.py", 13, False),
("opensearchpy/search.py", 12, False),
("opensearchpy/aggs.py", 11, False),
("test_opensearchpy/test_dsl/test_result.py", 5, False),
] == r.facets.files
assert [
(datetime(2014, 3, 3, 0, 0), 1, False),
(datetime(2014, 3, 5, 0, 0), 2, False),
(datetime(2014, 3, 6, 0, 0), 3, False),
(datetime(2014, 3, 7, 0, 0), 6, False),
(datetime(2014, 3, 10, 0, 0), 1, False),
(datetime(2014, 3, 15, 0, 0), 3, False),
(datetime(2014, 3, 21, 0, 0), 2, False),
(datetime(2014, 3, 23, 0, 0), 1, False),
(datetime(2014, 3, 24, 0, 0), 7, False),
(datetime(2014, 4, 20, 0, 0), 1, False),
(datetime(2014, 4, 25, 0, 0), 3, False),
(datetime(2014, 4, 26, 0, 0), 2, False),
(datetime(2014, 4, 27, 0, 0), 1, False),
(datetime(2014, 5, 1, 0, 0), 1, False),
(datetime(2014, 5, 2, 0, 0), 1, False),
] == r.facets.frequency
assert [
("ok", 12, False),
("good", 10, False),
("better", 13, False),
] == r.facets.deletions
async def test_range_filters_are_shown_as_selected_and_data_is_filtered(
data_client, commit_search_cls
):
cs = commit_search_cls(filters={"deletions": "better"})
r = await cs.execute()
assert 19 == r.hits.total.value
async def test_pagination(data_client, commit_search_cls):
cs = commit_search_cls()
cs = cs[0:20]
assert 52 == await cs.count()
assert 20 == len(await cs.execute())
@@ -0,0 +1,114 @@
# SPDX-License-Identifier: Apache-2.0
#
# The OpenSearch Contributors require contributions made to
# this file be licensed under the Apache-2.0 license or a
# compatible open source license.
#
# Modifications Copyright OpenSearch Contributors. See
# GitHub history for details.
import pytest
from opensearchpy import Date, Text
from opensearchpy._async.helpers.document import AsyncDocument
from opensearchpy._async.helpers.index import AsyncIndex, AsyncIndexTemplate
from opensearchpy.helpers import analysis
pytestmark = pytest.mark.asyncio
class Post(AsyncDocument):
title = Text(analyzer=analysis.analyzer("my_analyzer", tokenizer="keyword"))
published_from = Date()
async def test_index_template_works(write_client):
it = AsyncIndexTemplate("test-template", "test-*")
it.document(Post)
it.settings(number_of_replicas=0, number_of_shards=1)
await it.save()
i = AsyncIndex("test-blog")
await i.create()
assert {
"test-blog": {
"mappings": {
"properties": {
"title": {"type": "text", "analyzer": "my_analyzer"},
"published_from": {"type": "date"},
}
}
}
} == await write_client.indices.get_mapping(index="test-blog")
async def test_index_can_be_saved_even_with_settings(write_client):
i = AsyncIndex("test-blog", using=write_client)
i.settings(number_of_shards=3, number_of_replicas=0)
await i.save()
i.settings(number_of_replicas=1)
await i.save()
assert (
"1"
== (await i.get_settings())["test-blog"]["settings"]["index"][
"number_of_replicas"
]
)
async def test_index_exists(data_client):
assert await AsyncIndex("git").exists()
assert not await AsyncIndex("not-there").exists()
async def test_index_can_be_created_with_settings_and_mappings(write_client):
i = AsyncIndex("test-blog", using=write_client)
i.document(Post)
i.settings(number_of_replicas=0, number_of_shards=1)
await i.create()
assert {
"test-blog": {
"mappings": {
"properties": {
"title": {"type": "text", "analyzer": "my_analyzer"},
"published_from": {"type": "date"},
}
}
}
} == await write_client.indices.get_mapping(index="test-blog")
settings = await write_client.indices.get_settings(index="test-blog")
assert settings["test-blog"]["settings"]["index"]["number_of_replicas"] == "0"
assert settings["test-blog"]["settings"]["index"]["number_of_shards"] == "1"
assert settings["test-blog"]["settings"]["index"]["analysis"] == {
"analyzer": {"my_analyzer": {"type": "custom", "tokenizer": "keyword"}}
}
async def test_delete(write_client):
await write_client.indices.create(
index="test-index",
body={"settings": {"number_of_replicas": 0, "number_of_shards": 1}},
)
i = AsyncIndex("test-index", using=write_client)
await i.delete()
assert not await write_client.indices.exists(index="test-index")
async def test_multiple_indices_with_same_doc_type_work(write_client):
i1 = AsyncIndex("test-index-1", using=write_client)
i2 = AsyncIndex("test-index-2", using=write_client)
for i in i1, i2:
i.document(Post)
await i.create()
for i in ("test-index-1", "test-index-2"):
settings = await write_client.indices.get_settings(index=i)
assert settings[i]["settings"]["index"]["analysis"] == {
"analyzer": {"my_analyzer": {"type": "custom", "tokenizer": "keyword"}}
}
@@ -0,0 +1,158 @@
# SPDX-License-Identifier: Apache-2.0
#
# The OpenSearch Contributors require contributions made to
# this file be licensed under the Apache-2.0 license or a
# compatible open source license.
#
# Modifications Copyright OpenSearch Contributors. See
# GitHub history for details.
import pytest
from pytest import raises
from opensearchpy import exceptions
from opensearchpy._async.helpers import mapping
from opensearchpy.helpers import analysis
pytestmark = pytest.mark.asyncio
async def test_mapping_saved_into_opensearch(write_client):
m = mapping.AsyncMapping()
m.field(
"name", "text", analyzer=analysis.analyzer("my_analyzer", tokenizer="keyword")
)
m.field("tags", "keyword")
await m.save("test-mapping", using=write_client)
assert {
"test-mapping": {
"mappings": {
"properties": {
"name": {"type": "text", "analyzer": "my_analyzer"},
"tags": {"type": "keyword"},
}
}
}
} == await write_client.indices.get_mapping(index="test-mapping")
async def test_mapping_saved_into_opensearch_when_index_already_exists_closed(
write_client,
):
m = mapping.AsyncMapping()
m.field(
"name", "text", analyzer=analysis.analyzer("my_analyzer", tokenizer="keyword")
)
await write_client.indices.create(index="test-mapping")
with raises(exceptions.IllegalOperation):
await m.save("test-mapping", using=write_client)
await write_client.cluster.health(index="test-mapping", wait_for_status="yellow")
await write_client.indices.close(index="test-mapping")
await m.save("test-mapping", using=write_client)
assert {
"test-mapping": {
"mappings": {
"properties": {"name": {"type": "text", "analyzer": "my_analyzer"}}
}
}
} == await write_client.indices.get_mapping(index="test-mapping")
async def test_mapping_saved_into_opensearch_when_index_already_exists_with_analysis(
write_client,
):
m = mapping.AsyncMapping()
analyzer = analysis.analyzer("my_analyzer", tokenizer="keyword")
m.field("name", "text", analyzer=analyzer)
new_analysis = analyzer.get_analysis_definition()
new_analysis["analyzer"]["other_analyzer"] = {
"type": "custom",
"tokenizer": "whitespace",
}
await write_client.indices.create(
index="test-mapping", body={"settings": {"analysis": new_analysis}}
)
m.field("title", "text", analyzer=analyzer)
await m.save("test-mapping", using=write_client)
assert {
"test-mapping": {
"mappings": {
"properties": {
"name": {"type": "text", "analyzer": "my_analyzer"},
"title": {"type": "text", "analyzer": "my_analyzer"},
}
}
}
} == await write_client.indices.get_mapping(index="test-mapping")
async def test_mapping_gets_updated_from_opensearch(write_client):
await write_client.indices.create(
index="test-mapping",
body={
"settings": {"number_of_shards": 1, "number_of_replicas": 0},
"mappings": {
"date_detection": False,
"properties": {
"title": {
"type": "text",
"analyzer": "snowball",
"fields": {"raw": {"type": "keyword"}},
},
"created_at": {"type": "date"},
"comments": {
"type": "nested",
"properties": {
"created": {"type": "date"},
"author": {
"type": "text",
"analyzer": "snowball",
"fields": {"raw": {"type": "keyword"}},
},
},
},
},
},
},
)
m = await mapping.AsyncMapping.from_opensearch("test-mapping", using=write_client)
assert ["comments", "created_at", "title"] == list(
sorted(m.properties.properties._d_.keys())
)
assert {
"date_detection": False,
"properties": {
"comments": {
"type": "nested",
"properties": {
"created": {"type": "date"},
"author": {
"analyzer": "snowball",
"fields": {"raw": {"type": "keyword"}},
"type": "text",
},
},
},
"created_at": {"type": "date"},
"title": {
"analyzer": "snowball",
"fields": {"raw": {"type": "keyword"}},
"type": "text",
},
},
} == m.to_dict()
# test same with alias
await write_client.indices.put_alias(index="test-mapping", name="test-alias")
m2 = await mapping.AsyncMapping.from_opensearch("test-alias", using=write_client)
assert m2.to_dict() == m.to_dict()
@@ -0,0 +1,161 @@
# SPDX-License-Identifier: Apache-2.0
#
# The OpenSearch Contributors require contributions made to
# this file be licensed under the Apache-2.0 license or a
# compatible open source license.
#
# Modifications Copyright OpenSearch Contributors. See
# GitHub history for details.
from __future__ import unicode_literals
import pytest
from pytest import raises
from opensearchpy import Date, Keyword, Q, Text, TransportError
from opensearchpy._async.helpers.document import AsyncDocument
from opensearchpy._async.helpers.search import AsyncMultiSearch, AsyncSearch
from opensearchpy.helpers.response import aggs
from test_opensearchpy.test_server.test_helpers.test_data import FLAT_DATA
pytestmark = pytest.mark.asyncio
class Repository(AsyncDocument):
created_at = Date()
description = Text(analyzer="snowball")
tags = Keyword()
@classmethod
def search(cls):
return super(Repository, cls).search().filter("term", commit_repo="repo")
class Index:
name = "git"
class Commit(AsyncDocument):
class Index:
name = "flat-git"
async def test_filters_aggregation_buckets_are_accessible(data_client):
has_tests_query = Q("term", files="test_opensearchpy/test_dsl")
s = Commit.search()[0:0]
s.aggs.bucket("top_authors", "terms", field="author.name.raw").bucket(
"has_tests", "filters", filters={"yes": has_tests_query, "no": ~has_tests_query}
).metric("lines", "stats", field="stats.lines")
response = await s.execute()
assert isinstance(
response.aggregations.top_authors.buckets[0].has_tests.buckets.yes, aggs.Bucket
)
assert (
35
== response.aggregations.top_authors.buckets[0].has_tests.buckets.yes.doc_count
)
assert (
228
== response.aggregations.top_authors.buckets[0].has_tests.buckets.yes.lines.max
)
async def test_top_hits_are_wrapped_in_response(data_client):
s = Commit.search()[0:0]
s.aggs.bucket("top_authors", "terms", field="author.name.raw").metric(
"top_commits", "top_hits", size=5
)
response = await s.execute()
top_commits = response.aggregations.top_authors.buckets[0].top_commits
assert isinstance(top_commits, aggs.TopHitsData)
assert 5 == len(top_commits)
hits = [h for h in top_commits]
assert 5 == len(hits)
assert isinstance(hits[0], Commit)
async def test_inner_hits_are_wrapped_in_response(data_client):
s = AsyncSearch(index="git")[0:1].query(
"has_parent", parent_type="repo", inner_hits={}, query=Q("match_all")
)
response = await s.execute()
commit = response.hits[0]
assert isinstance(commit.meta.inner_hits.repo, response.__class__)
assert repr(commit.meta.inner_hits.repo[0]).startswith("<Hit(git/opensearch-py): ")
async def test_scan_respects_doc_types(data_client):
result = Repository.search().scan()
repos = await get_result(result)
assert 1 == len(repos)
assert isinstance(repos[0], Repository)
assert repos[0].organization == "opensearch"
async def test_scan_iterates_through_all_docs(data_client):
s = AsyncSearch(index="flat-git")
result = s.scan()
commits = await get_result(result)
assert 52 == len(commits)
assert {d["_id"] for d in FLAT_DATA} == {c.meta.id for c in commits}
async def get_result(b):
a = []
async for i in b:
a.append(i)
return a
async def test_multi_search(data_client):
s1 = Repository.search()
s2 = AsyncSearch(index="flat-git")
ms = AsyncMultiSearch()
ms = ms.add(s1).add(s2)
r1, r2 = await ms.execute()
assert 1 == len(r1)
assert isinstance(r1[0], Repository)
assert r1._search is s1
assert 52 == r2.hits.total.value
assert r2._search is s2
async def test_multi_missing(data_client):
s1 = Repository.search()
s2 = AsyncSearch(index="flat-git")
s3 = AsyncSearch(index="does_not_exist")
ms = AsyncMultiSearch()
ms = ms.add(s1).add(s2).add(s3)
with raises(TransportError):
await ms.execute()
r1, r2, r3 = await ms.execute(raise_on_error=False)
assert 1 == len(r1)
assert isinstance(r1[0], Repository)
assert r1._search is s1
assert 52 == r2.hits.total.value
assert r2._search is s2
assert r3 is None
async def test_raw_subfield_can_be_used_in_aggs(data_client):
s = AsyncSearch(index="git")[0:0]
s.aggs.bucket("authors", "terms", field="author.name.raw", size=1)
r = await s.execute()
authors = r.aggregations.authors
assert 1 == len(authors)
assert {"key": "Honza Král", "doc_count": 52} == authors[0]
@@ -0,0 +1,69 @@
# SPDX-License-Identifier: Apache-2.0
#
# The OpenSearch Contributors require contributions made to
# this file be licensed under the Apache-2.0 license or a
# compatible open source license.
#
# Modifications Copyright OpenSearch Contributors. See
# GitHub history for details.
import pytest
from opensearchpy._async.helpers.update_by_query import AsyncUpdateByQuery
from opensearchpy.helpers.search import Q
pytestmark = pytest.mark.asyncio
async def test_update_by_query_no_script(write_client, setup_ubq_tests):
index = setup_ubq_tests
ubq = (
AsyncUpdateByQuery(using=write_client)
.index(index)
.filter(~Q("exists", field="is_public"))
)
response = await ubq.execute()
assert response.total == 52
assert response["took"] > 0
assert not response.timed_out
assert response.updated == 52
assert response.deleted == 0
assert response.took > 0
assert response.success()
async def test_update_by_query_with_script(write_client, setup_ubq_tests):
index = setup_ubq_tests
ubq = (
AsyncUpdateByQuery(using=write_client)
.index(index)
.filter(~Q("exists", field="parent_shas"))
.script(source="ctx._source.is_public = false")
)
ubq = ubq.params(conflicts="proceed")
response = await ubq.execute()
assert response.total == 2
assert response.updated == 2
assert response.version_conflicts == 0
async def test_delete_by_query_with_script(write_client, setup_ubq_tests):
index = setup_ubq_tests
ubq = (
AsyncUpdateByQuery(using=write_client)
.index(index)
.filter(Q("match", parent_shas="1dd19210b5be92b960f7db6f66ae526288edccc3"))
.script(source='ctx.op = "delete"')
)
ubq = ubq.params(conflicts="proceed")
response = await ubq.execute()
assert response.total == 1
assert response.deleted == 1
assert response.success()