Merging opensearch-dsl-py to opensearch-py (#287)

Signed-off-by: saimedhi <saimedhi@amazon.com>
This commit is contained in:
Sai Medhini Reddy Maryada
2023-02-14 15:03:56 -08:00
committed by GitHub
parent 93636399ec
commit c58375aa7b
98 changed files with 14190 additions and 26 deletions
+1 -1
View File
@@ -9,7 +9,7 @@ jobs:
strategy:
fail-fast: false
matrix:
stack_version: ['2.1.1']
stack_version: ['2.2.0']
steps:
- name: Checkout
+8 -1
View File
@@ -2,6 +2,7 @@
__pycache__/
*.py[cod]
*$py.class
*.py[co]
# C extensions
*.so
@@ -51,6 +52,7 @@ coverage.xml
.hypothesis/
.pytest_cache/
cover/
junit.xml
# Translations
*.mo
@@ -120,6 +122,7 @@ venv.bak/
# mkdocs documentation
/site
docs/_build
# mypy
.mypy_cache/
@@ -141,4 +144,8 @@ cython_debug/
# opensearch files
test_opensearch/cover
test_opensearch/local.py
.ci/output
.ci/output
#Vi text editor
.*.swp
*~
+1
View File
@@ -31,6 +31,7 @@ Inspired from [Keep a Changelog](https://keepachangelog.com/en/1.0.0/)
- Added SigV4 support for Async Opensearch Client ([#254](https://github.com/opensearch-project/opensearch-py/pull/254))
- Compatibility with OpenSearch 2.1.0 - 2.4.1 ([#257](https://github.com/opensearch-project/opensearch-py/pull/257))
- Adding explicit parameters for AIOHttpConnection and AsyncTransport ([#276](https://github.com/opensearch-project/opensearch-py/pull/276))
- Merging opensearch-dsl-py into opensearch-py ([#287](https://github.com/opensearch-project/opensearch-py/pull/287))
### Changed
- Updated getting started to user guide ([#233](https://github.com/opensearch-project/opensearch-py/pull/233))
- Updated CA certificate handling to check OpenSSL environment variables before defaulting to certifi ([#196](https://github.com/opensearch-project/opensearch-py/pull/196))
+34 -1
View File
@@ -11,6 +11,8 @@
- [Deleting an index](#deleting-an-index)
- [Making API calls](#making-api-calls)
- [Point in time API](#point-in-time-api)
- [Using High-level Python client](#using-high-level-python-client)
- [Searching for documents with filters](#searching-for-documents-with-filters)
- [Using plugins](#using-plugins)
- [Alerting plugin](#alerting-plugin)
- [**Searching for monitors**](#searching-for-monitors)
@@ -228,6 +230,37 @@ print('\n The deleted point in time:')
print(response)
```
## Using High-level Python client
High-level python client is now merged into Low-level python client. Thus, opensearch-py supports creating and indexing documents, searching with and without filters, and updating documents using queries.[High-level Python client documentation](https://opensearch.org/docs/latest/clients/python-high-level/).
All the APIs newly added from opensearch-dsl-py are listed in [docs](https://github.com/opensearch-project/opensearch-py/tree/main/docs/source/api-ref).
In the below example, 'Search' API from High-level Python Client is used.
### Searching for documents with filters
```python
from opensearchpy import OpenSearch, Search
# Use the above mentioned examples for creating client.
# Then,create an index
# Add a document to the index.
# Search for the document.
s = Search(using=client, index=index_name) \
.filter("term", category="search") \
.query("match", title="python")
response = s.execute()
print('\nSearch results:')
for hit in response:
print(hit.meta.score, hit.title)
# Delete the document.
# Delete the index.
```
## Using plugins
Plugin client definitions can be found here --
@@ -486,7 +519,7 @@ async def search():
search()
```
=======
### Using Kerberos
There are several python packages that provide Kerberos support over HTTP connections, such as [requests-kerberos](http://pypi.org/project/requests-kerberos) and [requests-gssapi](https://pypi.org/project/requests-gssapi). The following example shows how to setup the authentication. Note that some of the parameters, such as `mutual_authentication` might depend on the server settings.
+4
View File
@@ -11,3 +11,7 @@
```{eval-rst}
.. autoclass:: opensearchpy.Urllib3HttpConnection
```
```{eval-rst}
.. autoclass:: opensearchpy.connections
```
+16
View File
@@ -55,3 +55,19 @@
```{eval-rst}
.. autoclass:: opensearchpy.TransportError
```
```{eval-rst}
.. autoclass:: opensearchpy.OpenSearchDslException
```
```{eval-rst}
.. autoclass:: opensearchpy.IllegalOperation
```
```{eval-rst}
.. autoclass:: opensearchpy.UnknownDslObject
```
```{eval-rst}
.. autoclass:: opensearchpy.ValidationException
```
+23
View File
@@ -0,0 +1,23 @@
# helpers
```{toctree}
---
glob:
titlesonly:
maxdepth: 1
---
helpers/aggs
helpers/analysis
helpers/document
helpers/faceted_search
helpers/field
helpers/function
helpers/index
helpers/mapping
helpers/query
helpers/search
helpers/update_by_query
helpers/wrappers
```
+5
View File
@@ -0,0 +1,5 @@
# aggs
```{eval-rst}
.. autoclass:: opensearchpy.helpers.aggs.Agg
```
+5
View File
@@ -0,0 +1,5 @@
# analysis
```{eval-rst}
.. autoclass:: opensearchpy.helpers.analysis.Analyzer
```
+5
View File
@@ -0,0 +1,5 @@
# document
```{eval-rst}
.. autoclass:: opensearchpy.helpers.document.Document
```
@@ -0,0 +1,5 @@
# faceted_search
```{eval-rst}
.. autoclass:: opensearchpy.helpers.faceted_search.FacetedSearch
```
+5
View File
@@ -0,0 +1,5 @@
# field
```{eval-rst}
.. autoclass:: opensearchpy.helpers.field.Field
```
+5
View File
@@ -0,0 +1,5 @@
# function
```{eval-rst}
.. autoclass:: opensearchpy.helpers.function.ScoreFunction
```
+5
View File
@@ -0,0 +1,5 @@
# index
```{eval-rst}
.. autoclass:: opensearchpy.helpers.index.Index
```
+5
View File
@@ -0,0 +1,5 @@
# mapping
```{eval-rst}
.. autoclass:: opensearchpy.helpers.mapping.Mapping
```
+5
View File
@@ -0,0 +1,5 @@
# query
```{eval-rst}
.. autoclass:: opensearchpy.helpers.query.Query
```
+5
View File
@@ -0,0 +1,5 @@
# search
```{eval-rst}
.. autoclass:: opensearchpy.helpers.search.Search
```
@@ -0,0 +1,5 @@
# update_by_query
```{eval-rst}
.. autoclass:: opensearchpy.helpers.update_by_query.UpdateByQuery
```
+5
View File
@@ -0,0 +1,5 @@
# wrappers
```{eval-rst}
.. autoclass:: opensearchpy.helpers.wrappers.Range
```
+4
View File
@@ -3,3 +3,7 @@
```{eval-rst}
.. autoclass:: opensearchpy.JSONSerializer
```
```{eval-rst}
.. autoclass:: opensearchpy.AttrJSONSerializer
```
+143 -1
View File
@@ -44,7 +44,12 @@ logger = logging.getLogger("opensearch")
logger.addHandler(logging.NullHandler())
from .client import OpenSearch
from .connection import Connection, RequestsHttpConnection, Urllib3HttpConnection
from .connection import (
Connection,
RequestsHttpConnection,
Urllib3HttpConnection,
connections,
)
from .connection_pool import ConnectionPool, ConnectionSelector, RoundRobinSelector
from .exceptions import (
AuthenticationException,
@@ -52,17 +57,82 @@ from .exceptions import (
ConflictError,
ConnectionError,
ConnectionTimeout,
IllegalOperation,
ImproperlyConfigured,
NotFoundError,
OpenSearchDeprecationWarning,
OpenSearchDslException,
OpenSearchException,
OpenSearchWarning,
RequestError,
SerializationError,
SSLError,
TransportError,
UnknownDslObject,
ValidationException,
)
from .helpers import AWSV4SignerAsyncAuth, AWSV4SignerAuth
from .helpers.aggs import A
from .helpers.analysis import analyzer, char_filter, normalizer, token_filter, tokenizer
from .helpers.document import Document, InnerDoc, MetaField
from .helpers.faceted_search import (
DateHistogramFacet,
Facet,
FacetedResponse,
FacetedSearch,
HistogramFacet,
NestedFacet,
RangeFacet,
TermsFacet,
)
from .helpers.field import (
Binary,
Boolean,
Byte,
Completion,
CustomField,
Date,
DateRange,
DenseVector,
Double,
DoubleRange,
Field,
Float,
FloatRange,
GeoPoint,
GeoShape,
HalfFloat,
Integer,
IntegerRange,
Ip,
IpRange,
Join,
Keyword,
Long,
LongRange,
Murmur3,
Nested,
Object,
Percolator,
RangeField,
RankFeature,
RankFeatures,
ScaledFloat,
SearchAsYouType,
Short,
SparseVector,
Text,
TokenCount,
construct_field,
)
from .helpers.function import SF
from .helpers.index import Index, IndexTemplate
from .helpers.mapping import Mapping
from .helpers.query import Q
from .helpers.search import MultiSearch, Search
from .helpers.update_by_query import UpdateByQuery
from .helpers.utils import AttrDict, AttrList, DslBase
from .helpers.wrappers import Range
from .serializer import JSONSerializer
from .transport import Transport
@@ -97,6 +167,78 @@ __all__ = [
"OpenSearchDeprecationWarning",
"AWSV4SignerAuth",
"AWSV4SignerAsyncAuth",
"A",
"AttrDict",
"AttrList",
"Binary",
"Boolean",
"Byte",
"Completion",
"CustomField",
"Date",
"DateHistogramFacet",
"DateRange",
"DenseVector",
"Document",
"Double",
"DoubleRange",
"DslBase",
"Facet",
"FacetedResponse",
"FacetedSearch",
"Field",
"Float",
"FloatRange",
"GeoPoint",
"GeoShape",
"HalfFloat",
"HistogramFacet",
"IllegalOperation",
"Index",
"IndexTemplate",
"InnerDoc",
"Integer",
"IntegerRange",
"Ip",
"IpRange",
"Join",
"Keyword",
"Long",
"LongRange",
"Mapping",
"MetaField",
"MultiSearch",
"Murmur3",
"Nested",
"NestedFacet",
"Object",
"OpenSearchDslException",
"Percolator",
"Q",
"Range",
"RangeFacet",
"RangeField",
"RankFeature",
"RankFeatures",
"SF",
"ScaledFloat",
"Search",
"SearchAsYouType",
"Short",
"SparseVector",
"TermsFacet",
"Text",
"TokenCount",
"UnknownDslObject",
"UpdateByQuery",
"ValidationException",
"analyzer",
"char_filter",
"connections",
"construct_field",
"normalizer",
"token_filter",
"tokenizer",
]
try:
+70
View File
@@ -32,6 +32,7 @@ from .connection import AsyncHttpConnection as AsyncHttpConnection
from .connection import Connection as Connection
from .connection import RequestsHttpConnection as RequestsHttpConnection
from .connection import Urllib3HttpConnection as Urllib3HttpConnection
from .connection import connections as connections
from .connection_pool import ConnectionPool as ConnectionPool
from .connection_pool import ConnectionSelector as ConnectionSelector
from .connection_pool import RoundRobinSelector as RoundRobinSelector
@@ -40,14 +41,82 @@ from .exceptions import AuthorizationException as AuthorizationException
from .exceptions import ConflictError as ConflictError
from .exceptions import ConnectionError as ConnectionError
from .exceptions import ConnectionTimeout as ConnectionTimeout
from .exceptions import IllegalOperation as IllegalOperation
from .exceptions import ImproperlyConfigured as ImproperlyConfigured
from .exceptions import NotFoundError as NotFoundError
from .exceptions import OpenSearchDeprecationWarning as OpenSearchDeprecationWarning
from .exceptions import OpenSearchDslException as OpenSearchDslException
from .exceptions import OpenSearchException as OpenSearchException
from .exceptions import OpenSearchWarning as OpenSearchWarning
from .exceptions import RequestError as RequestError
from .exceptions import SerializationError as SerializationError
from .exceptions import SSLError as SSLError
from .exceptions import TransportError as TransportError
from .exceptions import UnknownDslObject as UnknownDslObject
from .exceptions import ValidationException as ValidationException
from .helpers.aggs import A as A
from .helpers.analysis import Analyzer, CharFilter, Normalizer, TokenFilter, Tokenizer
from .helpers.document import Document as Document
from .helpers.document import InnerDoc as InnerDoc
from .helpers.document import MetaField as MetaField
from .helpers.faceted_search import DateHistogramFacet as DateHistogramFacet
from .helpers.faceted_search import Facet as Facet
from .helpers.faceted_search import FacetedResponse as FacetedResponse
from .helpers.faceted_search import FacetedSearch as FacetedSearch
from .helpers.faceted_search import HistogramFacet as HistogramFacet
from .helpers.faceted_search import NestedFacet as NestedFacet
from .helpers.faceted_search import RangeFacet as RangeFacet
from .helpers.faceted_search import TermsFacet as TermsFacet
from .helpers.field import Binary as Binary
from .helpers.field import Boolean as Boolean
from .helpers.field import Byte as Byte
from .helpers.field import Completion as Completion
from .helpers.field import CustomField as CustomField
from .helpers.field import Date as Date
from .helpers.field import DateRange as DateRange
from .helpers.field import DenseVector as DenseVector
from .helpers.field import Double as Double
from .helpers.field import DoubleRange as DoubleRange
from .helpers.field import Field as Field
from .helpers.field import Float as Float
from .helpers.field import FloatRange as FloatRange
from .helpers.field import GeoPoint as GeoPoint
from .helpers.field import GeoShape as GeoShape
from .helpers.field import HalfFloat as HalfFloat
from .helpers.field import Integer as Integer
from .helpers.field import IntegerRange as IntegerRange
from .helpers.field import Ip as Ip
from .helpers.field import IpRange as IpRange
from .helpers.field import Join as Join
from .helpers.field import Keyword as Keyword
from .helpers.field import Long as Long
from .helpers.field import LongRange as LongRange
from .helpers.field import Murmur3 as Murmur3
from .helpers.field import Nested as Nested
from .helpers.field import Object as Object
from .helpers.field import Percolator as Percolator
from .helpers.field import RangeField as RangeField
from .helpers.field import RankFeature as RankFeature
from .helpers.field import RankFeatures as RankFeatures
from .helpers.field import ScaledFloat as ScaledFloat
from .helpers.field import SearchAsYouType as SearchAsYouType
from .helpers.field import Short as Short
from .helpers.field import SparseVector as SparseVector
from .helpers.field import Text as Text
from .helpers.field import TokenCount as TokenCount
from .helpers.field import construct_field as construct_field
from .helpers.function import SF as SF
from .helpers.index import Index as Index
from .helpers.index import IndexTemplate as IndexTemplate
from .helpers.mapping import Mapping as Mapping
from .helpers.query import Q as Q
from .helpers.search import MultiSearch as MultiSearch
from .helpers.search import Search as Search
from .helpers.update_by_query import UpdateByQuery as UpdateByQuery
from .helpers.utils import AttrDict as AttrDict
from .helpers.utils import AttrList as AttrList
from .helpers.utils import DslBase as DslBase
from .helpers.wrappers import Range as Range
from .serializer import JSONSerializer as JSONSerializer
from .transport import Transport as Transport
@@ -57,6 +126,7 @@ try:
from ._async.client import AsyncOpenSearch as AsyncOpenSearch
from ._async.http_aiohttp import AIOHttpConnection as AIOHttpConnection
from ._async.http_aiohttp import AsyncConnection as AsyncConnection
from ._async.transport import AsyncTransport as AsyncTransport
from .helpers import AWSV4SignerAsyncAuth as AWSV4SignerAsyncAuth
from .helpers import AWSV4SignerAuth as AWSV4SignerAuth
+1 -1
View File
@@ -24,4 +24,4 @@
# specific language governing permissions and limitations
# under the License.
__versionstr__ = "2.1.1"
__versionstr__ = "2.2.0"
+129
View File
@@ -0,0 +1,129 @@
# SPDX-License-Identifier: Apache-2.0
#
# The OpenSearch Contributors require contributions made to
# this file be licensed under the Apache-2.0 license or a
# compatible open source license.
#
# Modifications Copyright OpenSearch Contributors. See
# GitHub history for details.
#
# Licensed to Elasticsearch B.V. under one or more contributor
# license agreements. See the NOTICE file distributed with
# this work for additional information regarding copyright
# ownership. Elasticsearch B.V. licenses this file to you under
# the Apache License, Version 2.0 (the "License"); you may
# not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing,
# software distributed under the License is distributed on an
# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
# KIND, either express or implied. See the License for the
# specific language governing permissions and limitations
# under the License.
from six import string_types
import opensearchpy
from opensearchpy.serializer import serializer
class Connections(object):
"""
Class responsible for holding connections to different clusters. Used as a
singleton in this module.
"""
def __init__(self):
self._kwargs = {}
self._conns = {}
def configure(self, **kwargs):
"""
Configure multiple connections at once, useful for passing in config
dictionaries obtained from other sources, like Django's settings or a
configuration management tool.
Example::
connections.configure(
default={'hosts': 'localhost'},
dev={'hosts': ['opensearchdev1.example.com:9200'], 'sniff_on_start': True},
)
Connections will only be constructed lazily when requested through
``get_connection``.
"""
for k in list(self._conns):
# try and preserve existing client to keep the persistent connections alive
if k in self._kwargs and kwargs.get(k, None) == self._kwargs[k]:
continue
del self._conns[k]
self._kwargs = kwargs
def add_connection(self, alias, conn):
"""
Add a connection object, it will be passed through as-is.
"""
self._conns[alias] = conn
def remove_connection(self, alias):
"""
Remove connection from the registry. Raises ``KeyError`` if connection
wasn't found.
"""
errors = 0
for d in (self._conns, self._kwargs):
try:
del d[alias]
except KeyError:
errors += 1
if errors == 2:
raise KeyError("There is no connection with alias %r." % alias)
def create_connection(self, alias="default", **kwargs):
"""
Construct an instance of ``opensearchpy.OpenSearch`` and register
it under given alias.
"""
kwargs.setdefault("serializer", serializer)
conn = self._conns[alias] = opensearchpy.OpenSearch(**kwargs)
return conn
def get_connection(self, alias="default"):
"""
Retrieve a connection, construct it if necessary (only configuration
was passed to us). If a non-string alias has been passed through we
assume it's already a client instance and will just return it as-is.
Raises ``KeyError`` if no client (or its definition) is registered
under the alias.
"""
# do not check isinstance(OpenSearch) so that people can wrap their
# clients
if not isinstance(alias, string_types):
return alias
# connection already established
try:
return self._conns[alias]
except KeyError:
pass
# if not, try to create it
try:
return self.create_connection(alias, **self._kwargs[alias])
except KeyError:
# no connection and no kwargs to set one up
raise KeyError("There is no connection with alias %r." % alias)
connections = Connections()
configure = connections.configure
add_connection = connections.add_connection
remove_connection = connections.remove_connection
create_connection = connections.create_connection
get_connection = connections.get_connection
+29
View File
@@ -0,0 +1,29 @@
# SPDX-License-Identifier: Apache-2.0
#
# The OpenSearch Contributors require contributions made to
# this file be licensed under the Apache-2.0 license or a
# compatible open source license.
#
# Modifications Copyright OpenSearch Contributors. See
# GitHub history for details.
#
# Licensed to Elasticsearch B.V. under one or more contributor
# license agreements. See the NOTICE file distributed with
# this work for additional information regarding copyright
# ownership. Elasticsearch B.V. licenses this file to you under
# the Apache License, Version 2.0 (the "License"); you may
# not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing,
# software distributed under the License is distributed on an
# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
# KIND, either express or implied. See the License for the
# specific language governing permissions and limitations
# under the License.
# THIS FILE IS AUTOMATICALLY GENERATED, DO NOT EDIT.
class Connections(object): ...
+1
View File
@@ -8,6 +8,7 @@
# GitHub history for details.
#
import asyncio
import os
import ssl
+22
View File
@@ -38,6 +38,12 @@ __all__ = [
"ConnectionTimeout",
"AuthenticationException",
"AuthorizationException",
"OpenSearchDslException",
"UnknownDslObject",
"ValidationException",
"IllegalOperation",
"OpenSearchWarning",
"OpenSearchDeprecationWarning",
]
@@ -163,6 +169,22 @@ class AuthorizationException(TransportError):
"""Exception representing a 403 status code."""
class OpenSearchDslException(Exception):
"""Base class for all OpenSearchDsl exceptions"""
class UnknownDslObject(OpenSearchDslException):
"""Exception representing UnknownDSLObject"""
class ValidationException(ValueError, OpenSearchDslException):
"""Exception representing Validation Error"""
class IllegalOperation(OpenSearchDslException):
"""Exception representing IllegalOperation"""
class OpenSearchWarning(Warning):
"""Warning that is raised when a deprecated option
or incorrect usage is flagged via the 'Warning' HTTP header.
+4
View File
@@ -52,6 +52,10 @@ class ConflictError(TransportError): ...
class RequestError(TransportError): ...
class AuthenticationException(TransportError): ...
class AuthorizationException(TransportError): ...
class OpenSearchDslException(Exception): ...
class UnknownDslObject(OpenSearchDslException): ...
class ValidationException(ValueError, OpenSearchDslException): ...
class IllegalOperation(OpenSearchDslException): ...
class OpenSearchWarning(Warning): ...
OpenSearchDeprecationWarning = OpenSearchWarning
+454
View File
@@ -0,0 +1,454 @@
# SPDX-License-Identifier: Apache-2.0
#
# The OpenSearch Contributors require contributions made to
# this file be licensed under the Apache-2.0 license or a
# compatible open source license.
#
# Modifications Copyright OpenSearch Contributors. See
# GitHub history for details.
#
# Licensed to Elasticsearch B.V. under one or more contributor
# license agreements. See the NOTICE file distributed with
# this work for additional information regarding copyright
# ownership. Elasticsearch B.V. licenses this file to you under
# the Apache License, Version 2.0 (the "License"); you may
# not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing,
# software distributed under the License is distributed on an
# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
# KIND, either express or implied. See the License for the
# specific language governing permissions and limitations
# under the License.
try:
import collections.abc as collections_abc # only works on python 3.3+
except ImportError:
import collections as collections_abc
from .response.aggs import AggResponse, BucketData, FieldBucketData, TopHitsData
from .utils import DslBase
def A(name_or_agg, filter=None, **params):
if filter is not None:
if name_or_agg != "filter":
raise ValueError(
"Aggregation %r doesn't accept positional argument 'filter'."
% name_or_agg
)
params["filter"] = filter
# {"terms": {"field": "tags"}, "aggs": {...}}
if isinstance(name_or_agg, collections_abc.Mapping):
if params:
raise ValueError("A() cannot accept parameters when passing in a dict.")
# copy to avoid modifying in-place
agg = name_or_agg.copy()
# pop out nested aggs
aggs = agg.pop("aggs", None)
# pop out meta data
meta = agg.pop("meta", None)
# should be {"terms": {"field": "tags"}}
if len(agg) != 1:
raise ValueError(
'A() can only accept dict with an aggregation ({"terms": {...}}). '
"Instead it got (%r)" % name_or_agg
)
agg_type, params = agg.popitem()
if aggs:
params = params.copy()
params["aggs"] = aggs
if meta:
params = params.copy()
params["meta"] = meta
return Agg.get_dsl_class(agg_type)(_expand__to_dot=False, **params)
# Terms(...) just return the nested agg
elif isinstance(name_or_agg, Agg):
if params:
raise ValueError(
"A() cannot accept parameters when passing in an Agg object."
)
return name_or_agg
# "terms", field="tags"
return Agg.get_dsl_class(name_or_agg)(**params)
class Agg(DslBase):
_type_name = "agg"
_type_shortcut = staticmethod(A)
name = None
def __contains__(self, key):
return False
def to_dict(self):
d = super(Agg, self).to_dict()
if "meta" in d[self.name]:
d["meta"] = d[self.name].pop("meta")
return d
def result(self, search, data):
return AggResponse(self, search, data)
class AggBase(object):
_param_defs = {
"aggs": {"type": "agg", "hash": True},
}
def __contains__(self, key):
return key in self._params.get("aggs", {})
def __getitem__(self, agg_name):
agg = self._params.setdefault("aggs", {})[agg_name] # propagate KeyError
# make sure we're not mutating a shared state - whenever accessing a
# bucket, return a shallow copy of it to be safe
if isinstance(agg, Bucket):
agg = A(agg.name, **agg._params)
# be sure to store the copy so any modifications to it will affect us
self._params["aggs"][agg_name] = agg
return agg
def __setitem__(self, agg_name, agg):
self.aggs[agg_name] = A(agg)
def __iter__(self):
return iter(self.aggs)
def _agg(self, bucket, name, agg_type, *args, **params):
agg = self[name] = A(agg_type, *args, **params)
# For chaining - when creating new buckets return them...
if bucket:
return agg
# otherwise return self._base so we can keep chaining
else:
return self._base
def metric(self, name, agg_type, *args, **params):
return self._agg(False, name, agg_type, *args, **params)
def bucket(self, name, agg_type, *args, **params):
return self._agg(True, name, agg_type, *args, **params)
def pipeline(self, name, agg_type, *args, **params):
return self._agg(False, name, agg_type, *args, **params)
def result(self, search, data):
return BucketData(self, search, data)
class Bucket(AggBase, Agg):
def __init__(self, **params):
super(Bucket, self).__init__(**params)
# remember self for chaining
self._base = self
def to_dict(self):
d = super(AggBase, self).to_dict()
if "aggs" in d[self.name]:
d["aggs"] = d[self.name].pop("aggs")
return d
class Filter(Bucket):
name = "filter"
_param_defs = {
"filter": {"type": "query"},
"aggs": {"type": "agg", "hash": True},
}
def __init__(self, filter=None, **params):
if filter is not None:
params["filter"] = filter
super(Filter, self).__init__(**params)
def to_dict(self):
d = super(Filter, self).to_dict()
d[self.name].update(d[self.name].pop("filter", {}))
return d
class Pipeline(Agg):
pass
# bucket aggregations
class Filters(Bucket):
name = "filters"
_param_defs = {
"filters": {"type": "query", "hash": True},
"aggs": {"type": "agg", "hash": True},
}
class Children(Bucket):
name = "children"
class Parent(Bucket):
name = "parent"
class DateHistogram(Bucket):
name = "date_histogram"
def result(self, search, data):
return FieldBucketData(self, search, data)
class AutoDateHistogram(DateHistogram):
name = "auto_date_histogram"
class DateRange(Bucket):
name = "date_range"
class GeoDistance(Bucket):
name = "geo_distance"
class GeohashGrid(Bucket):
name = "geohash_grid"
class GeotileGrid(Bucket):
name = "geotile_grid"
class GeoCentroid(Bucket):
name = "geo_centroid"
class Global(Bucket):
name = "global"
class Histogram(Bucket):
name = "histogram"
def result(self, search, data):
return FieldBucketData(self, search, data)
class IPRange(Bucket):
name = "ip_range"
class Missing(Bucket):
name = "missing"
class Nested(Bucket):
name = "nested"
class Range(Bucket):
name = "range"
class RareTerms(Bucket):
name = "rare_terms"
def result(self, search, data):
return FieldBucketData(self, search, data)
class ReverseNested(Bucket):
name = "reverse_nested"
class SignificantTerms(Bucket):
name = "significant_terms"
class SignificantText(Bucket):
name = "significant_text"
class Terms(Bucket):
name = "terms"
def result(self, search, data):
return FieldBucketData(self, search, data)
class Sampler(Bucket):
name = "sampler"
class DiversifiedSampler(Bucket):
name = "diversified_sampler"
class Composite(Bucket):
name = "composite"
_param_defs = {
"sources": {"type": "agg", "hash": True, "multi": True},
"aggs": {"type": "agg", "hash": True},
}
class VariableWidthHistogram(Bucket):
name = "variable_width_histogram"
def result(self, search, data):
return FieldBucketData(self, search, data)
# metric aggregations
class TopHits(Agg):
name = "top_hits"
def result(self, search, data):
return TopHitsData(self, search, data)
class Avg(Agg):
name = "avg"
class WeightedAvg(Agg):
name = "weighted_avg"
class Cardinality(Agg):
name = "cardinality"
class ExtendedStats(Agg):
name = "extended_stats"
class Boxplot(Agg):
name = "boxplot"
class GeoBounds(Agg):
name = "geo_bounds"
class Max(Agg):
name = "max"
class MedianAbsoluteDeviation(Agg):
name = "median_absolute_deviation"
class Min(Agg):
name = "min"
class Percentiles(Agg):
name = "percentiles"
class PercentileRanks(Agg):
name = "percentile_ranks"
class ScriptedMetric(Agg):
name = "scripted_metric"
class Stats(Agg):
name = "stats"
class Sum(Agg):
name = "sum"
class TTest(Agg):
name = "t_test"
class ValueCount(Agg):
name = "value_count"
# pipeline aggregations
class AvgBucket(Pipeline):
name = "avg_bucket"
class BucketScript(Pipeline):
name = "bucket_script"
class BucketSelector(Pipeline):
name = "bucket_selector"
class CumulativeSum(Pipeline):
name = "cumulative_sum"
class CumulativeCardinality(Pipeline):
name = "cumulative_cardinality"
class Derivative(Pipeline):
name = "derivative"
class ExtendedStatsBucket(Pipeline):
name = "extended_stats_bucket"
class Inference(Pipeline):
name = "inference"
class MaxBucket(Pipeline):
name = "max_bucket"
class MinBucket(Pipeline):
name = "min_bucket"
class MovingFn(Pipeline):
name = "moving_fn"
class MovingAvg(Pipeline):
name = "moving_avg"
class MovingPercentiles(Pipeline):
name = "moving_percentiles"
class Normalize(Pipeline):
name = "normalize"
class PercentilesBucket(Pipeline):
name = "percentiles_bucket"
class SerialDiff(Pipeline):
name = "serial_diff"
class StatsBucket(Pipeline):
name = "stats_bucket"
class SumBucket(Pipeline):
name = "sum_bucket"
class BucketSort(Pipeline):
name = "bucket_sort"
+104
View File
@@ -0,0 +1,104 @@
# SPDX-License-Identifier: Apache-2.0
#
# The OpenSearch Contributors require contributions made to
# this file be licensed under the Apache-2.0 license or a
# compatible open source license.
#
# Modifications Copyright OpenSearch Contributors. See
# GitHub history for details.
#
# Licensed to Elasticsearch B.V. under one or more contributor
# license agreements. See the NOTICE file distributed with
# this work for additional information regarding copyright
# ownership. Elasticsearch B.V. licenses this file to you under
# the Apache License, Version 2.0 (the "License"); you may
# not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing,
# software distributed under the License is distributed on an
# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
# KIND, either express or implied. See the License for the
# specific language governing permissions and limitations
# under the License.
from typing import Any
from _typeshed import Incomplete
from ..response.aggs import AggResponse as AggResponse
from ..response.aggs import BucketData as BucketData
from ..response.aggs import FieldBucketData as FieldBucketData
from ..response.aggs import TopHitsData as TopHitsData
from .utils import DslBase
from .utils import DslBase as DslBase
def A(name_or_agg: Any, filter: Incomplete | None = ..., **params: Any) -> Any: ...
class Agg(DslBase): ...
class AggBase(object): ...
class Bucket(AggBase, Agg): ...
class Filter(Bucket): ...
class Pipeline(Agg): ...
class Filters(Bucket): ...
class Children(Bucket): ...
class Parent(Bucket): ...
class DateHistogram(Bucket): ...
class AutoDateHistogram(DateHistogram): ...
class DateRange(Bucket): ...
class GeoDistance(Bucket): ...
class GeohashGrid(Bucket): ...
class GeotileGrid(Bucket): ...
class GeoCentroid(Bucket): ...
class Global(Bucket): ...
class Histogram(Bucket): ...
class IPRange(Bucket): ...
class Missing(Bucket): ...
class Nested(Bucket): ...
class Range(Bucket): ...
class RareTerms(Bucket): ...
class ReverseNested(Bucket): ...
class SignificantTerms(Bucket): ...
class SignificantText(Bucket): ...
class Terms(Bucket): ...
class Sampler(Bucket): ...
class DiversifiedSampler(Bucket): ...
class Composite(Bucket): ...
class VariableWidthHistogram(Bucket): ...
class TopHits(Agg): ...
class Avg(Agg): ...
class WeightedAvg(Agg): ...
class Cardinality(Agg): ...
class ExtendedStats(Agg): ...
class Boxplot(Agg): ...
class GeoBounds(Agg): ...
class Max(Agg): ...
class MedianAbsoluteDeviation(Agg): ...
class Min(Agg): ...
class Percentiles(Agg): ...
class PercentileRanks(Agg): ...
class ScriptedMetric(Agg): ...
class Stats(Agg): ...
class Sum(Agg): ...
class TTest(Agg): ...
class ValueCount(Agg): ...
class AvgBucket(Pipeline): ...
class BucketScript(Pipeline): ...
class BucketSelector(Pipeline): ...
class CumulativeSum(Pipeline): ...
class CumulativeCardinality(Pipeline): ...
class Derivative(Pipeline): ...
class ExtendedStatsBucket(Pipeline): ...
class Inference(Pipeline): ...
class MaxBucket(Pipeline): ...
class MinBucket(Pipeline): ...
class MovingFn(Pipeline): ...
class MovingAvg(Pipeline): ...
class MovingPercentiles(Pipeline): ...
class Normalize(Pipeline): ...
class PercentilesBucket(Pipeline): ...
class SerialDiff(Pipeline): ...
class StatsBucket(Pipeline): ...
class SumBucket(Pipeline): ...
class BucketSort(Pipeline): ...
+294
View File
@@ -0,0 +1,294 @@
# SPDX-License-Identifier: Apache-2.0
#
# The OpenSearch Contributors require contributions made to
# this file be licensed under the Apache-2.0 license or a
# compatible open source license.
#
# Modifications Copyright OpenSearch Contributors. See
# GitHub history for details.
#
# Licensed to Elasticsearch B.V. under one or more contributor
# license agreements. See the NOTICE file distributed with
# this work for additional information regarding copyright
# ownership. Elasticsearch B.V. licenses this file to you under
# the Apache License, Version 2.0 (the "License"); you may
# not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing,
# software distributed under the License is distributed on an
# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
# KIND, either express or implied. See the License for the
# specific language governing permissions and limitations
# under the License.
import six
from opensearchpy.connection.connections import get_connection
from .utils import AttrDict, DslBase, merge
__all__ = ["tokenizer", "analyzer", "char_filter", "token_filter", "normalizer"]
class AnalysisBase(object):
@classmethod
def _type_shortcut(cls, name_or_instance, type=None, **kwargs):
if isinstance(name_or_instance, cls):
if type or kwargs:
raise ValueError("%s() cannot accept parameters." % cls.__name__)
return name_or_instance
if not (type or kwargs):
return cls.get_dsl_class("builtin")(name_or_instance)
return cls.get_dsl_class(type, "custom")(
name_or_instance, type or "custom", **kwargs
)
class CustomAnalysis(object):
name = "custom"
def __init__(self, filter_name, builtin_type="custom", **kwargs):
self._builtin_type = builtin_type
self._name = filter_name
super(CustomAnalysis, self).__init__(**kwargs)
def to_dict(self):
# only name to present in lists
return self._name
def get_definition(self):
d = super(CustomAnalysis, self).to_dict()
d = d.pop(self.name)
d["type"] = self._builtin_type
return d
class CustomAnalysisDefinition(CustomAnalysis):
def get_analysis_definition(self):
out = {self._type_name: {self._name: self.get_definition()}}
t = getattr(self, "tokenizer", None)
if "tokenizer" in self._param_defs and hasattr(t, "get_definition"):
out["tokenizer"] = {t._name: t.get_definition()}
filters = {
f._name: f.get_definition()
for f in self.filter
if hasattr(f, "get_definition")
}
if filters:
out["filter"] = filters
# any sub filter definitions like multiplexers etc?
for f in self.filter:
if hasattr(f, "get_analysis_definition"):
d = f.get_analysis_definition()
if d:
merge(out, d, True)
char_filters = {
f._name: f.get_definition()
for f in self.char_filter
if hasattr(f, "get_definition")
}
if char_filters:
out["char_filter"] = char_filters
return out
class BuiltinAnalysis(object):
name = "builtin"
def __init__(self, name):
self._name = name
super(BuiltinAnalysis, self).__init__()
def to_dict(self):
# only name to present in lists
return self._name
class Analyzer(AnalysisBase, DslBase):
_type_name = "analyzer"
name = None
class BuiltinAnalyzer(BuiltinAnalysis, Analyzer):
def get_analysis_definition(self):
return {}
class CustomAnalyzer(CustomAnalysisDefinition, Analyzer):
_param_defs = {
"filter": {"type": "token_filter", "multi": True},
"char_filter": {"type": "char_filter", "multi": True},
"tokenizer": {"type": "tokenizer"},
}
def simulate(self, text, using="default", explain=False, attributes=None):
"""
Use the Analyze API of opensearch to test the outcome of this analyzer.
:arg text: Text to be analyzed
:arg using: connection alias to use, defaults to ``'default'``
:arg explain: will output all token attributes for each token. You can
filter token attributes you want to output by setting ``attributes``
option.
:arg attributes: if ``explain`` is specified, filter the token
attributes to return.
"""
opensearch = get_connection(using)
body = {"text": text, "explain": explain}
if attributes:
body["attributes"] = attributes
definition = self.get_analysis_definition()
analyzer_def = self.get_definition()
for section in ("tokenizer", "char_filter", "filter"):
if section not in analyzer_def:
continue
sec_def = definition.get(section, {})
sec_names = analyzer_def[section]
if isinstance(sec_names, six.string_types):
body[section] = sec_def.get(sec_names, sec_names)
else:
body[section] = [
sec_def.get(sec_name, sec_name) for sec_name in sec_names
]
if self._builtin_type != "custom":
body["analyzer"] = self._builtin_type
return AttrDict(opensearch.indices.analyze(body=body))
class Normalizer(AnalysisBase, DslBase):
_type_name = "normalizer"
name = None
class BuiltinNormalizer(BuiltinAnalysis, Normalizer):
def get_analysis_definition(self):
return {}
class CustomNormalizer(CustomAnalysisDefinition, Normalizer):
_param_defs = {
"filter": {"type": "token_filter", "multi": True},
"char_filter": {"type": "char_filter", "multi": True},
}
class Tokenizer(AnalysisBase, DslBase):
_type_name = "tokenizer"
name = None
class BuiltinTokenizer(BuiltinAnalysis, Tokenizer):
pass
class CustomTokenizer(CustomAnalysis, Tokenizer):
pass
class TokenFilter(AnalysisBase, DslBase):
_type_name = "token_filter"
name = None
class BuiltinTokenFilter(BuiltinAnalysis, TokenFilter):
pass
class CustomTokenFilter(CustomAnalysis, TokenFilter):
pass
class MultiplexerTokenFilter(CustomTokenFilter):
name = "multiplexer"
def get_definition(self):
d = super(CustomTokenFilter, self).get_definition()
if "filters" in d:
d["filters"] = [
# comma delimited string given by user
fs if isinstance(fs, six.string_types) else
# list of strings or TokenFilter objects
", ".join(f.to_dict() if hasattr(f, "to_dict") else f for f in fs)
for fs in self.filters
]
return d
def get_analysis_definition(self):
if not hasattr(self, "filters"):
return {}
fs = {}
d = {"filter": fs}
for filters in self.filters:
if isinstance(filters, six.string_types):
continue
fs.update(
{
f._name: f.get_definition()
for f in filters
if hasattr(f, "get_definition")
}
)
return d
class ConditionalTokenFilter(CustomTokenFilter):
name = "condition"
def get_definition(self):
d = super(CustomTokenFilter, self).get_definition()
if "filter" in d:
d["filter"] = [
f.to_dict() if hasattr(f, "to_dict") else f for f in self.filter
]
return d
def get_analysis_definition(self):
if not hasattr(self, "filter"):
return {}
return {
"filter": {
f._name: f.get_definition()
for f in self.filter
if hasattr(f, "get_definition")
}
}
class CharFilter(AnalysisBase, DslBase):
_type_name = "char_filter"
name = None
class BuiltinCharFilter(BuiltinAnalysis, CharFilter):
pass
class CustomCharFilter(CustomAnalysis, CharFilter):
pass
# shortcuts for direct use
analyzer = Analyzer._type_shortcut
tokenizer = Tokenizer._type_shortcut
token_filter = TokenFilter._type_shortcut
char_filter = CharFilter._type_shortcut
normalizer = Normalizer._type_shortcut
+49
View File
@@ -0,0 +1,49 @@
# SPDX-License-Identifier: Apache-2.0
#
# The OpenSearch Contributors require contributions made to
# this file be licensed under the Apache-2.0 license or a
# compatible open source license.
#
# Modifications Copyright OpenSearch Contributors. See
# GitHub history for details.
#
# Licensed to Elasticsearch B.V. under one or more contributor
# license agreements. See the NOTICE file distributed with
# this work for additional information regarding copyright
# ownership. Elasticsearch B.V. licenses this file to you under
# the Apache License, Version 2.0 (the "License"); you may
# not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing,
# software distributed under the License is distributed on an
# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
# KIND, either express or implied. See the License for the
# specific language governing permissions and limitations
# under the License.
from .utils import DslBase
class AnalysisBase(object): ...
class CustomAnalysis(object): ...
class CustomAnalysisDefinition(CustomAnalysis): ...
class BuiltinAnalysis(object): ...
class Analyzer(AnalysisBase, DslBase): ...
class BuiltinAnalyzer(BuiltinAnalysis, Analyzer): ...
class CustomAnalyzer(CustomAnalysisDefinition, Analyzer): ...
class Normalizer(AnalysisBase, DslBase): ...
class BuiltinNormalizer(BuiltinAnalysis, Normalizer): ...
class CustomNormalizer(CustomAnalysisDefinition, Normalizer): ...
class Tokenizer(AnalysisBase, DslBase): ...
class BuiltinTokenizer(BuiltinAnalysis, Tokenizer): ...
class CustomTokenizer(CustomAnalysis, Tokenizer): ...
class TokenFilter(AnalysisBase, DslBase): ...
class BuiltinTokenFilter(BuiltinAnalysis, TokenFilter): ...
class CustomTokenFilter(CustomAnalysis, TokenFilter): ...
class MultiplexerTokenFilter(CustomTokenFilter): ...
class ConditionalTokenFilter(CustomTokenFilter): ...
class CharFilter(AnalysisBase, DslBase): ...
class BuiltinCharFilter(BuiltinAnalysis, CharFilter): ...
class CustomCharFilter(CustomAnalysis, CharFilter): ...
+505
View File
@@ -0,0 +1,505 @@
# SPDX-License-Identifier: Apache-2.0
#
# The OpenSearch Contributors require contributions made to
# this file be licensed under the Apache-2.0 license or a
# compatible open source license.
#
# Modifications Copyright OpenSearch Contributors. See
# GitHub history for details.
#
# Licensed to Elasticsearch B.V. under one or more contributor
# license agreements. See the NOTICE file distributed with
# this work for additional information regarding copyright
# ownership. Elasticsearch B.V. licenses this file to you under
# the Apache License, Version 2.0 (the "License"); you may
# not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing,
# software distributed under the License is distributed on an
# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
# KIND, either express or implied. See the License for the
# specific language governing permissions and limitations
# under the License.
try:
import collections.abc as collections_abc # only works on python 3.3+
except ImportError:
import collections as collections_abc
from fnmatch import fnmatch
from six import add_metaclass, iteritems
from opensearchpy.connection.connections import get_connection
from opensearchpy.exceptions import NotFoundError, RequestError
from ..exceptions import IllegalOperation, ValidationException
from .field import Field
from .index import Index
from .mapping import Mapping
from .search import Search
from .utils import DOC_META_FIELDS, META_FIELDS, ObjectBase, merge
class MetaField(object):
def __init__(self, *args, **kwargs):
self.args, self.kwargs = args, kwargs
class DocumentMeta(type):
def __new__(cls, name, bases, attrs):
# DocumentMeta filters attrs in place
attrs["_doc_type"] = DocumentOptions(name, bases, attrs)
return super(DocumentMeta, cls).__new__(cls, name, bases, attrs)
class IndexMeta(DocumentMeta):
# global flag to guard us from associating an Index with the base Document
# class, only user defined subclasses should have an _index attr
_document_initialized = False
def __new__(cls, name, bases, attrs):
new_cls = super(IndexMeta, cls).__new__(cls, name, bases, attrs)
if cls._document_initialized:
index_opts = attrs.pop("Index", None)
index = cls.construct_index(index_opts, bases)
new_cls._index = index
index.document(new_cls)
cls._document_initialized = True
return new_cls
@classmethod
def construct_index(cls, opts, bases):
if opts is None:
for b in bases:
if hasattr(b, "_index"):
return b._index
# Set None as Index name so it will set _all while making the query
return Index(name=None)
i = Index(getattr(opts, "name", "*"), using=getattr(opts, "using", "default"))
i.settings(**getattr(opts, "settings", {}))
i.aliases(**getattr(opts, "aliases", {}))
for a in getattr(opts, "analyzers", ()):
i.analyzer(a)
return i
class DocumentOptions(object):
def __init__(self, name, bases, attrs):
meta = attrs.pop("Meta", None)
# create the mapping instance
self.mapping = getattr(meta, "mapping", Mapping())
# register all declared fields into the mapping
for name, value in list(iteritems(attrs)):
if isinstance(value, Field):
self.mapping.field(name, value)
del attrs[name]
# add all the mappings for meta fields
for name in dir(meta):
if isinstance(getattr(meta, name, None), MetaField):
params = getattr(meta, name)
self.mapping.meta(name, *params.args, **params.kwargs)
# document inheritance - include the fields from parents' mappings
for b in bases:
if hasattr(b, "_doc_type") and hasattr(b._doc_type, "mapping"):
self.mapping.update(b._doc_type.mapping, update_only=True)
@property
def name(self):
return self.mapping.properties.name
@add_metaclass(DocumentMeta)
class InnerDoc(ObjectBase):
"""
Common class for inner documents like Object or Nested
"""
@classmethod
def from_opensearch(cls, data, data_only=False):
if data_only:
data = {"_source": data}
return super(InnerDoc, cls).from_opensearch(data)
@add_metaclass(IndexMeta)
class Document(ObjectBase):
"""
Model-like class for persisting documents in opensearch.
"""
@classmethod
def _matches(cls, hit):
if cls._index._name is None:
return True
return fnmatch(hit.get("_index", ""), cls._index._name)
@classmethod
def _get_using(cls, using=None):
return using or cls._index._using
@classmethod
def _get_connection(cls, using=None):
return get_connection(cls._get_using(using))
@classmethod
def _default_index(cls, index=None):
return index or cls._index._name
@classmethod
def init(cls, index=None, using=None):
"""
Create the index and populate the mappings in opensearch.
"""
i = cls._index
if index:
i = i.clone(name=index)
i.save(using=using)
def _get_index(self, index=None, required=True):
if index is None:
index = getattr(self.meta, "index", None)
if index is None:
index = getattr(self._index, "_name", None)
if index is None and required:
raise ValidationException("No index")
if index and "*" in index:
raise ValidationException("You cannot write to a wildcard index.")
return index
def __repr__(self):
return "{}({})".format(
self.__class__.__name__,
", ".join(
"{}={!r}".format(key, getattr(self.meta, key))
for key in ("index", "id")
if key in self.meta
),
)
@classmethod
def search(cls, using=None, index=None):
"""
Create an :class:`~opensearchpy.Search` instance that will search
over this ``Document``.
"""
return Search(
using=cls._get_using(using), index=cls._default_index(index), doc_type=[cls]
)
@classmethod
def get(cls, id, using=None, index=None, **kwargs):
"""
Retrieve a single document from opensearch using its ``id``.
:arg id: ``id`` of the document to be retrieved
:arg index: opensearch index to use, if the ``Document`` is
associated with an index this can be omitted.
:arg using: connection alias to use, defaults to ``'default'``
Any additional keyword arguments will be passed to
``OpenSearch.get`` unchanged.
"""
opensearch = cls._get_connection(using)
doc = opensearch.get(index=cls._default_index(index), id=id, **kwargs)
if not doc.get("found", False):
return None
return cls.from_opensearch(doc)
@classmethod
def exists(cls, id, using=None, index=None, **kwargs):
"""
check if exists a single document from opensearch using its ``id``.
:arg id: ``id`` of the document to check if exists
:arg index: opensearch index to use, if the ``Document`` is
associated with an index this can be omitted.
:arg using: connection alias to use, defaults to ``'default'``
Any additional keyword arguments will be passed to
``OpenSearch.exists`` unchanged.
"""
opensearch = cls._get_connection(using)
return opensearch.exists(index=cls._default_index(index), id=id, **kwargs)
@classmethod
def mget(
cls, docs, using=None, index=None, raise_on_error=True, missing="none", **kwargs
):
r"""
Retrieve multiple document by their ``id``\s. Returns a list of instances
in the same order as requested.
:arg docs: list of ``id``\s of the documents to be retrieved or a list
of document specifications as per
https://opensearch.org/docs/latest/opensearch/rest-api/document-apis/multi-get/
:arg index: opensearch index to use, if the ``Document`` is
associated with an index this can be omitted.
:arg using: connection alias to use, defaults to ``'default'``
:arg missing: what to do when one of the documents requested is not
found. Valid options are ``'none'`` (use ``None``), ``'raise'`` (raise
``NotFoundError``) or ``'skip'`` (ignore the missing document).
Any additional keyword arguments will be passed to
``OpenSearch.mget`` unchanged.
"""
if missing not in ("raise", "skip", "none"):
raise ValueError("'missing' must be 'raise', 'skip', or 'none'.")
opensearch = cls._get_connection(using)
body = {
"docs": [
doc if isinstance(doc, collections_abc.Mapping) else {"_id": doc}
for doc in docs
]
}
results = opensearch.mget(body, index=cls._default_index(index), **kwargs)
objs, error_docs, missing_docs = [], [], []
for doc in results["docs"]:
if doc.get("found"):
if error_docs or missing_docs:
# We're going to raise an exception anyway, so avoid an
# expensive call to cls.from_opensearch().
continue
objs.append(cls.from_opensearch(doc))
elif doc.get("error"):
if raise_on_error:
error_docs.append(doc)
if missing == "none":
objs.append(None)
# The doc didn't cause an error, but the doc also wasn't found.
elif missing == "raise":
missing_docs.append(doc)
elif missing == "none":
objs.append(None)
if error_docs:
error_ids = [doc["_id"] for doc in error_docs]
message = "Required routing not provided for documents %s."
message %= ", ".join(error_ids)
raise RequestError(400, message, error_docs)
if missing_docs:
missing_ids = [doc["_id"] for doc in missing_docs]
message = "Documents %s not found." % ", ".join(missing_ids)
raise NotFoundError(404, message, {"docs": missing_docs})
return objs
def delete(self, using=None, index=None, **kwargs):
"""
Delete the instance in opensearch.
:arg index: opensearch index to use, if the ``Document`` is
associated with an index this can be omitted.
:arg using: connection alias to use, defaults to ``'default'``
Any additional keyword arguments will be passed to
``OpenSearch.delete`` unchanged.
"""
opensearch = self._get_connection(using)
# extract routing etc from meta
doc_meta = {k: self.meta[k] for k in DOC_META_FIELDS if k in self.meta}
# Optimistic concurrency control
if "seq_no" in self.meta and "primary_term" in self.meta:
doc_meta["if_seq_no"] = self.meta["seq_no"]
doc_meta["if_primary_term"] = self.meta["primary_term"]
doc_meta.update(kwargs)
opensearch.delete(index=self._get_index(index), **doc_meta)
def to_dict(self, include_meta=False, skip_empty=True):
"""
Serialize the instance into a dictionary so that it can be saved in opensearch.
:arg include_meta: if set to ``True`` will include all the metadata
(``_index``, ``_id`` etc). Otherwise just the document's
data is serialized. This is useful when passing multiple instances into
``opensearchpy.helpers.bulk``.
:arg skip_empty: if set to ``False`` will cause empty values (``None``,
``[]``, ``{}``) to be left on the document. Those values will be
stripped out otherwise as they make no difference in opensearch.
"""
d = super(Document, self).to_dict(skip_empty=skip_empty)
if not include_meta:
return d
meta = {"_" + k: self.meta[k] for k in DOC_META_FIELDS if k in self.meta}
# in case of to_dict include the index unlike save/update/delete
index = self._get_index(required=False)
if index is not None:
meta["_index"] = index
meta["_source"] = d
return meta
def update(
self,
using=None,
index=None,
detect_noop=True,
doc_as_upsert=False,
refresh=False,
retry_on_conflict=None,
script=None,
script_id=None,
scripted_upsert=False,
upsert=None,
return_doc_meta=False,
**fields
):
"""
Partial update of the document, specify fields you wish to update and
both the instance and the document in opensearch will be updated::
doc = MyDocument(title='Document Title!')
doc.save()
doc.update(title='New Document Title!')
:arg index: opensearch index to use, if the ``Document`` is
associated with an index this can be omitted.
:arg using: connection alias to use, defaults to ``'default'``
:arg detect_noop: Set to ``False`` to disable noop detection.
:arg refresh: Control when the changes made by this request are visible
to search. Set to ``True`` for immediate effect.
:arg retry_on_conflict: In between the get and indexing phases of the
update, it is possible that another process might have already
updated the same document. By default, the update will fail with a
version conflict exception. The retry_on_conflict parameter
controls how many times to retry the update before finally throwing
an exception.
:arg doc_as_upsert: Instead of sending a partial doc plus an upsert
doc, setting doc_as_upsert to true will use the contents of doc as
the upsert value
:arg return_doc_meta: set to ``True`` to return all metadata from the
index API call instead of only the operation result
:return operation result noop/updated
"""
body = {
"doc_as_upsert": doc_as_upsert,
"detect_noop": detect_noop,
}
# scripted update
if script or script_id:
if upsert is not None:
body["upsert"] = upsert
if script:
script = {"source": script}
else:
script = {"id": script_id}
script["params"] = fields
body["script"] = script
body["scripted_upsert"] = scripted_upsert
# partial document update
else:
if not fields:
raise IllegalOperation(
"You cannot call update() without updating individual fields or a script. "
"If you wish to update the entire object use save()."
)
# update given fields locally
merge(self, fields)
# prepare data for OpenSearch
values = self.to_dict()
# if fields were given: partial update
body["doc"] = {k: values.get(k) for k in fields.keys()}
# extract routing etc from meta
doc_meta = {k: self.meta[k] for k in DOC_META_FIELDS if k in self.meta}
if retry_on_conflict is not None:
doc_meta["retry_on_conflict"] = retry_on_conflict
# Optimistic concurrency control
if (
retry_on_conflict in (None, 0)
and "seq_no" in self.meta
and "primary_term" in self.meta
):
doc_meta["if_seq_no"] = self.meta["seq_no"]
doc_meta["if_primary_term"] = self.meta["primary_term"]
meta = self._get_connection(using).update(
index=self._get_index(index), body=body, refresh=refresh, **doc_meta
)
# update meta information from OpenSearch
for k in META_FIELDS:
if "_" + k in meta:
setattr(self.meta, k, meta["_" + k])
return meta if return_doc_meta else meta["result"]
def save(
self,
using=None,
index=None,
validate=True,
skip_empty=True,
return_doc_meta=False,
**kwargs
):
"""
Save the document into opensearch. If the document doesn't exist it
is created, it is overwritten otherwise. Returns ``True`` if this
operations resulted in new document being created.
:arg index: opensearch index to use, if the ``Document`` is
associated with an index this can be omitted.
:arg using: connection alias to use, defaults to ``'default'``
:arg validate: set to ``False`` to skip validating the document
:arg skip_empty: if set to ``False`` will cause empty values (``None``,
``[]``, ``{}``) to be left on the document. Those values will be
stripped out otherwise as they make no difference in opensearch.
:arg return_doc_meta: set to ``True`` to return all metadata from the
update API call instead of only the operation result
Any additional keyword arguments will be passed to
``OpenSearch.index`` unchanged.
:return operation result created/updated
"""
if validate:
self.full_clean()
opensearch = self._get_connection(using)
# extract routing etc from meta
doc_meta = {k: self.meta[k] for k in DOC_META_FIELDS if k in self.meta}
# Optimistic concurrency control
if "seq_no" in self.meta and "primary_term" in self.meta:
doc_meta["if_seq_no"] = self.meta["seq_no"]
doc_meta["if_primary_term"] = self.meta["primary_term"]
doc_meta.update(kwargs)
meta = opensearch.index(
index=self._get_index(index),
body=self.to_dict(skip_empty=skip_empty),
**doc_meta
)
# update meta information from OpenSearch
for k in META_FIELDS:
if "_" + k in meta:
setattr(self.meta, k, meta["_" + k])
return meta if return_doc_meta else meta["result"]
+36
View File
@@ -0,0 +1,36 @@
# SPDX-License-Identifier: Apache-2.0
#
# The OpenSearch Contributors require contributions made to
# this file be licensed under the Apache-2.0 license or a
# compatible open source license.
#
# Modifications Copyright OpenSearch Contributors. See
# GitHub history for details.
#
# Licensed to Elasticsearch B.V. under one or more contributor
# license agreements. See the NOTICE file distributed with
# this work for additional information regarding copyright
# ownership. Elasticsearch B.V. licenses this file to you under
# the Apache License, Version 2.0 (the "License"); you may
# not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing,
# software distributed under the License is distributed on an
# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
# KIND, either express or implied. See the License for the
# specific language governing permissions and limitations
# under the License.
# THIS FILE IS AUTOMATICALLY GENERATED, DO NOT EDIT.
from .utils import ObjectBase
class MetaField(object): ...
class DocumentMeta(type): ...
class IndexMeta(DocumentMeta): ...
class DocumentOptions(object): ...
class InnerDoc(ObjectBase): ...
class Document(ObjectBase): ...
+472
View File
@@ -0,0 +1,472 @@
# SPDX-License-Identifier: Apache-2.0
#
# The OpenSearch Contributors require contributions made to
# this file be licensed under the Apache-2.0 license or a
# compatible open source license.
#
# Modifications Copyright OpenSearch Contributors. See
# GitHub history for details.
#
# Licensed to Elasticsearch B.V. under one or more contributor
# license agreements. See the NOTICE file distributed with
# this work for additional information regarding copyright
# ownership. Elasticsearch B.V. licenses this file to you under
# the Apache License, Version 2.0 (the "License"); you may
# not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing,
# software distributed under the License is distributed on an
# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
# KIND, either express or implied. See the License for the
# specific language governing permissions and limitations
# under the License.
from datetime import datetime, timedelta
from six import iteritems, itervalues
from opensearchpy.helpers.aggs import A
from .query import MatchAll, Nested, Range, Terms
from .response import Response
from .search import Search
from .utils import AttrDict
__all__ = [
"FacetedSearch",
"HistogramFacet",
"TermsFacet",
"DateHistogramFacet",
"RangeFacet",
"NestedFacet",
]
class Facet(object):
"""
A facet on faceted search. Wraps and aggregation and provides functionality
to create a filter for selected values and return a list of facet values
from the result of the aggregation.
"""
agg_type = None
def __init__(self, metric=None, metric_sort="desc", **kwargs):
self.filter_values = ()
self._params = kwargs
self._metric = metric
if metric and metric_sort:
self._params["order"] = {"metric": metric_sort}
def get_aggregation(self):
"""
Return the aggregation object.
"""
agg = A(self.agg_type, **self._params)
if self._metric:
agg.metric("metric", self._metric)
return agg
def add_filter(self, filter_values):
"""
Construct a filter.
"""
if not filter_values:
return
f = self.get_value_filter(filter_values[0])
for v in filter_values[1:]:
f |= self.get_value_filter(v)
return f
def get_value_filter(self, filter_value):
"""
Construct a filter for an individual value
"""
pass
def is_filtered(self, key, filter_values):
"""
Is a filter active on the given key.
"""
return key in filter_values
def get_value(self, bucket):
"""
return a value representing a bucket. Its key as default.
"""
return bucket["key"]
def get_metric(self, bucket):
"""
Return a metric, by default doc_count for a bucket.
"""
if self._metric:
return bucket["metric"]["value"]
return bucket["doc_count"]
def get_values(self, data, filter_values):
"""
Turn the raw bucket data into a list of tuples containing the key,
number of documents and a flag indicating whether this value has been
selected or not.
"""
out = []
for bucket in data.buckets:
key = self.get_value(bucket)
out.append(
(key, self.get_metric(bucket), self.is_filtered(key, filter_values))
)
return out
class TermsFacet(Facet):
agg_type = "terms"
def add_filter(self, filter_values):
"""Create a terms filter instead of bool containing term filters."""
if filter_values:
return Terms(
_expand__to_dot=False, **{self._params["field"]: filter_values}
)
class RangeFacet(Facet):
agg_type = "range"
def _range_to_dict(self, range):
key, range = range
out = {"key": key}
if range[0] is not None:
out["from"] = range[0]
if range[1] is not None:
out["to"] = range[1]
return out
def __init__(self, ranges, **kwargs):
super(RangeFacet, self).__init__(**kwargs)
self._params["ranges"] = list(map(self._range_to_dict, ranges))
self._params["keyed"] = False
self._ranges = dict(ranges)
def get_value_filter(self, filter_value):
f, t = self._ranges[filter_value]
limits = {}
if f is not None:
limits["gte"] = f
if t is not None:
limits["lt"] = t
return Range(_expand__to_dot=False, **{self._params["field"]: limits})
class HistogramFacet(Facet):
agg_type = "histogram"
def get_value_filter(self, filter_value):
return Range(
_expand__to_dot=False,
**{
self._params["field"]: {
"gte": filter_value,
"lt": filter_value + self._params["interval"],
}
}
)
def _date_interval_year(d):
return d.replace(
year=d.year + 1, day=(28 if d.month == 2 and d.day == 29 else d.day)
)
def _date_interval_month(d):
return (d + timedelta(days=32)).replace(day=1)
def _date_interval_week(d):
return d + timedelta(days=7)
def _date_interval_day(d):
return d + timedelta(days=1)
def _date_interval_hour(d):
return d + timedelta(hours=1)
class DateHistogramFacet(Facet):
agg_type = "date_histogram"
DATE_INTERVALS = {
"year": _date_interval_year,
"1Y": _date_interval_year,
"month": _date_interval_month,
"1M": _date_interval_month,
"week": _date_interval_week,
"1w": _date_interval_week,
"day": _date_interval_day,
"1d": _date_interval_day,
"hour": _date_interval_hour,
"1h": _date_interval_hour,
}
def __init__(self, **kwargs):
kwargs.setdefault("min_doc_count", 0)
super(DateHistogramFacet, self).__init__(**kwargs)
def get_value(self, bucket):
if not isinstance(bucket["key"], datetime):
# OpenSearch returns key=None instead of 0 for date 1970-01-01,
# so we need to set key to 0 to avoid TypeError exception
if bucket["key"] is None:
bucket["key"] = 0
# Preserve milliseconds in the datetime
return datetime.utcfromtimestamp(int(bucket["key"]) / 1000.0)
else:
return bucket["key"]
def get_value_filter(self, filter_value):
for interval_type in ("calendar_interval", "fixed_interval"):
if interval_type in self._params:
break
else:
interval_type = "interval"
return Range(
_expand__to_dot=False,
**{
self._params["field"]: {
"gte": filter_value,
"lt": self.DATE_INTERVALS[self._params[interval_type]](
filter_value
),
}
}
)
class NestedFacet(Facet):
agg_type = "nested"
def __init__(self, path, nested_facet):
self._path = path
self._inner = nested_facet
super(NestedFacet, self).__init__(
path=path, aggs={"inner": nested_facet.get_aggregation()}
)
def get_values(self, data, filter_values):
return self._inner.get_values(data.inner, filter_values)
def add_filter(self, filter_values):
inner_q = self._inner.add_filter(filter_values)
if inner_q:
return Nested(path=self._path, query=inner_q)
class FacetedResponse(Response):
@property
def query_string(self):
return self._faceted_search._query
@property
def facets(self):
if not hasattr(self, "_facets"):
super(AttrDict, self).__setattr__("_facets", AttrDict({}))
for name, facet in iteritems(self._faceted_search.facets):
self._facets[name] = facet.get_values(
getattr(getattr(self.aggregations, "_filter_" + name), name),
self._faceted_search.filter_values.get(name, ()),
)
return self._facets
class FacetedSearch(object):
"""
Abstraction for creating faceted navigation searches that takes care of
composing the queries, aggregations and filters as needed as well as
presenting the results in an easy-to-consume fashion::
class BlogSearch(FacetedSearch):
index = 'blogs'
doc_types = [Blog, Post]
fields = ['title^5', 'category', 'description', 'body']
facets = {
'type': TermsFacet(field='_type'),
'category': TermsFacet(field='category'),
'weekly_posts': DateHistogramFacet(field='published_from', interval='week')
}
def search(self):
' Override search to add your own filters '
s = super(BlogSearch, self).search()
return s.filter('term', published=True)
# when using:
blog_search = BlogSearch("web framework", filters={"category": "python"})
# supports pagination
blog_search[10:20]
response = blog_search.execute()
# easy access to aggregation results:
for category, hit_count, is_selected in response.facets.category:
print(
"Category %s has %d hits%s." % (
category,
hit_count,
' and is chosen' if is_selected else ''
)
)
"""
index = None
doc_types = None
fields = None
facets = {}
using = "default"
def __init__(self, query=None, filters={}, sort=()):
"""
:arg query: the text to search for
:arg filters: facet values to filter
:arg sort: sort information to be passed to :class:`~opensearchpy.Search`
"""
self._query = query
self._filters = {}
self._sort = sort
self.filter_values = {}
for name, value in iteritems(filters):
self.add_filter(name, value)
self._s = self.build_search()
def count(self):
return self._s.count()
def __getitem__(self, k):
self._s = self._s[k]
return self
def __iter__(self):
return iter(self._s)
def add_filter(self, name, filter_values):
"""
Add a filter for a facet.
"""
# normalize the value into a list
if not isinstance(filter_values, (tuple, list)):
if filter_values is None:
return
filter_values = [
filter_values,
]
# remember the filter values for use in FacetedResponse
self.filter_values[name] = filter_values
# get the filter from the facet
f = self.facets[name].add_filter(filter_values)
if f is None:
return
self._filters[name] = f
def search(self):
"""
Returns the base Search object to which the facets are added.
You can customize the query by overriding this method and returning a
modified search object.
"""
s = Search(doc_type=self.doc_types, index=self.index, using=self.using)
return s.response_class(FacetedResponse)
def query(self, search, query):
"""
Add query part to ``search``.
Override this if you wish to customize the query used.
"""
if query:
if self.fields:
return search.query("multi_match", fields=self.fields, query=query)
else:
return search.query("multi_match", query=query)
return search
def aggregate(self, search):
"""
Add aggregations representing the facets selected, including potential
filters.
"""
for f, facet in iteritems(self.facets):
agg = facet.get_aggregation()
agg_filter = MatchAll()
for field, filter in iteritems(self._filters):
if f == field:
continue
agg_filter &= filter
search.aggs.bucket("_filter_" + f, "filter", filter=agg_filter).bucket(
f, agg
)
def filter(self, search):
"""
Add a ``post_filter`` to the search request narrowing the results based
on the facet filters.
"""
if not self._filters:
return search
post_filter = MatchAll()
for f in itervalues(self._filters):
post_filter &= f
return search.post_filter(post_filter)
def highlight(self, search):
"""
Add highlighting for all the fields
"""
return search.highlight(
*(f if "^" not in f else f.split("^", 1)[0] for f in self.fields)
)
def sort(self, search):
"""
Add sorting information to the request.
"""
if self._sort:
search = search.sort(*self._sort)
return search
def build_search(self):
"""
Construct the ``Search`` object.
"""
s = self.search()
s = self.query(s, self._query)
s = self.filter(s)
if self.fields:
s = self.highlight(s)
s = self.sort(s)
self.aggregate(s)
return s
def execute(self):
"""
Execute the search and return the response.
"""
r = self._s.execute()
r._faceted_search = self
return r
+36
View File
@@ -0,0 +1,36 @@
# SPDX-License-Identifier: Apache-2.0
#
# The OpenSearch Contributors require contributions made to
# this file be licensed under the Apache-2.0 license or a
# compatible open source license.
#
# Modifications Copyright OpenSearch Contributors. See
# GitHub history for details.
#
# Licensed to Elasticsearch B.V. under one or more contributor
# license agreements. See the NOTICE file distributed with
# this work for additional information regarding copyright
# ownership. Elasticsearch B.V. licenses this file to you under
# the Apache License, Version 2.0 (the "License"); you may
# not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing,
# software distributed under the License is distributed on an
# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
# KIND, either express or implied. See the License for the
# specific language governing permissions and limitations
# under the License.
from opensearchpy.helpers.response import Response
class Facet(object): ...
class TermsFacet(Facet): ...
class RangeFacet(Facet): ...
class HistogramFacet(Facet): ...
class DateHistogramFacet(Facet): ...
class NestedFacet(Facet): ...
class FacetedResponse(Response): ...
class FacetedSearch(object): ...
+530
View File
@@ -0,0 +1,530 @@
# SPDX-License-Identifier: Apache-2.0
#
# The OpenSearch Contributors require contributions made to
# this file be licensed under the Apache-2.0 license or a
# compatible open source license.
#
# Modifications Copyright OpenSearch Contributors. See
# GitHub history for details.
#
# Licensed to Elasticsearch B.V. under one or more contributor
# license agreements. See the NOTICE file distributed with
# this work for additional information regarding copyright
# ownership. Elasticsearch B.V. licenses this file to you under
# the Apache License, Version 2.0 (the "License"); you may
# not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing,
# software distributed under the License is distributed on an
# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
# KIND, either express or implied. See the License for the
# specific language governing permissions and limitations
# under the License.
import base64
import copy
import ipaddress
try:
import collections.abc as collections_abc # only works on python 3.3+
except ImportError:
import collections as collections_abc
from datetime import date, datetime
from dateutil import parser, tz
from six import integer_types, iteritems, string_types
from six.moves import map
from ..exceptions import ValidationException
from .query import Q
from .utils import AttrDict, AttrList, DslBase
from .wrappers import Range
unicode = type("")
def construct_field(name_or_field, **params):
# {"type": "text", "analyzer": "snowball"}
if isinstance(name_or_field, collections_abc.Mapping):
if params:
raise ValueError(
"construct_field() cannot accept parameters when passing in a dict."
)
params = name_or_field.copy()
if "type" not in params:
# inner object can be implicitly defined
if "properties" in params:
name = "object"
else:
raise ValueError('construct_field() needs to have a "type" key.')
else:
name = params.pop("type")
return Field.get_dsl_class(name)(**params)
# Text()
if isinstance(name_or_field, Field):
if params:
raise ValueError(
"construct_field() cannot accept parameters "
"when passing in a construct_field object."
)
return name_or_field
# "text", analyzer="snowball"
return Field.get_dsl_class(name_or_field)(**params)
class Field(DslBase):
_type_name = "field"
_type_shortcut = staticmethod(construct_field)
# all fields can be multifields
_param_defs = {"fields": {"type": "field", "hash": True}}
name = None
_coerce = False
def __init__(self, multi=False, required=False, *args, **kwargs):
"""
:arg bool multi: specifies whether field can contain array of values
:arg bool required: specifies whether field is required
"""
self._multi = multi
self._required = required
super(Field, self).__init__(*args, **kwargs)
def __getitem__(self, subfield):
return self._params.get("fields", {})[subfield]
def _serialize(self, data):
return data
def _deserialize(self, data):
return data
def _empty(self):
return None
def empty(self):
if self._multi:
return AttrList([])
return self._empty()
def serialize(self, data):
if isinstance(data, (list, AttrList, tuple)):
return list(map(self._serialize, data))
return self._serialize(data)
def deserialize(self, data):
if isinstance(data, (list, AttrList, tuple)):
data = [None if d is None else self._deserialize(d) for d in data]
return data
if data is None:
return None
return self._deserialize(data)
def clean(self, data):
if data is not None:
data = self.deserialize(data)
if data in (None, [], {}) and self._required:
raise ValidationException("Value required for this field.")
return data
def to_dict(self):
d = super(Field, self).to_dict()
name, value = d.popitem()
value["type"] = name
return value
class CustomField(Field):
name = "custom"
_coerce = True
def to_dict(self):
if isinstance(self.builtin_type, Field):
return self.builtin_type.to_dict()
d = super(CustomField, self).to_dict()
d["type"] = self.builtin_type
return d
class Object(Field):
name = "object"
_coerce = True
def __init__(self, doc_class=None, dynamic=None, properties=None, **kwargs):
"""
:arg document.InnerDoc doc_class: base doc class that handles mapping.
If no `doc_class` is provided, new instance of `InnerDoc` will be created,
populated with `properties` and used. Can not be provided together with `properties`
:arg dynamic: whether new properties may be created dynamically.
Valid values are `True`, `False`, `'strict'`.
Can not be provided together with `doc_class`.
:arg dict properties: used to construct underlying mapping if no `doc_class` is provided.
Can not be provided together with `doc_class`
"""
if doc_class and (properties or dynamic is not None):
raise ValidationException(
"doc_class and properties/dynamic should not be provided together"
)
if doc_class:
self._doc_class = doc_class
else:
# FIXME import
from opensearchpy.helpers.document import InnerDoc
# no InnerDoc subclass, creating one instead...
self._doc_class = type("InnerDoc", (InnerDoc,), {})
for name, field in iteritems(properties or {}):
self._doc_class._doc_type.mapping.field(name, field)
if dynamic is not None:
self._doc_class._doc_type.mapping.meta("dynamic", dynamic)
self._mapping = copy.deepcopy(self._doc_class._doc_type.mapping)
super(Object, self).__init__(**kwargs)
def __getitem__(self, name):
return self._mapping[name]
def __contains__(self, name):
return name in self._mapping
def _empty(self):
return self._wrap({})
def _wrap(self, data):
return self._doc_class.from_opensearch(data, data_only=True)
def empty(self):
if self._multi:
return AttrList([], self._wrap)
return self._empty()
def to_dict(self):
d = self._mapping.to_dict()
d.update(super(Object, self).to_dict())
return d
def _collect_fields(self):
return self._mapping.properties._collect_fields()
def _deserialize(self, data):
# don't wrap already wrapped data
if isinstance(data, self._doc_class):
return data
if isinstance(data, AttrDict):
data = data._d_
return self._wrap(data)
def _serialize(self, data):
if data is None:
return None
# somebody assigned raw dict to the field, we should tolerate that
if isinstance(data, collections_abc.Mapping):
return data
return data.to_dict()
def clean(self, data):
data = super(Object, self).clean(data)
if data is None:
return None
if isinstance(data, (list, AttrList)):
for d in data:
d.full_clean()
else:
data.full_clean()
return data
def update(self, other, update_only=False):
if not isinstance(other, Object):
# not an inner/nested object, no merge possible
return
self._mapping.update(other._mapping, update_only)
class Nested(Object):
name = "nested"
def __init__(self, *args, **kwargs):
kwargs.setdefault("multi", True)
super(Nested, self).__init__(*args, **kwargs)
class Date(Field):
name = "date"
_coerce = True
def __init__(self, default_timezone=None, *args, **kwargs):
"""
:arg default_timezone: timezone that will be automatically used for tz-naive values
May be instance of `datetime.tzinfo` or string containing TZ offset
"""
self._default_timezone = default_timezone
if isinstance(self._default_timezone, string_types):
self._default_timezone = tz.gettz(self._default_timezone)
super(Date, self).__init__(*args, **kwargs)
def _deserialize(self, data):
if isinstance(data, string_types):
try:
data = parser.parse(data)
except Exception as e:
raise ValidationException(
"Could not parse date from the value (%r)" % data, e
)
if isinstance(data, datetime):
if self._default_timezone and data.tzinfo is None:
data = data.replace(tzinfo=self._default_timezone)
return data
if isinstance(data, date):
return data
if isinstance(data, integer_types):
# Divide by a float to preserve milliseconds on the datetime.
return datetime.utcfromtimestamp(data / 1000.0)
raise ValidationException("Could not parse date from the value (%r)" % data)
class Text(Field):
_param_defs = {
"fields": {"type": "field", "hash": True},
"analyzer": {"type": "analyzer"},
"search_analyzer": {"type": "analyzer"},
"search_quote_analyzer": {"type": "analyzer"},
}
name = "text"
class SearchAsYouType(Field):
_param_defs = {
"analyzer": {"type": "analyzer"},
"search_analyzer": {"type": "analyzer"},
"search_quote_analyzer": {"type": "analyzer"},
}
name = "search_as_you_type"
class Keyword(Field):
_param_defs = {
"fields": {"type": "field", "hash": True},
"search_analyzer": {"type": "analyzer"},
"normalizer": {"type": "normalizer"},
}
name = "keyword"
class ConstantKeyword(Keyword):
name = "constant_keyword"
class Boolean(Field):
name = "boolean"
_coerce = True
def _deserialize(self, data):
if data == "false":
return False
return bool(data)
def clean(self, data):
if data is not None:
data = self.deserialize(data)
if data is None and self._required:
raise ValidationException("Value required for this field.")
return data
class Float(Field):
name = "float"
_coerce = True
def _deserialize(self, data):
return float(data)
class DenseVector(Float):
name = "dense_vector"
def __init__(self, dims, **kwargs):
kwargs["multi"] = True
super(DenseVector, self).__init__(dims=dims, **kwargs)
class SparseVector(Field):
name = "sparse_vector"
class HalfFloat(Float):
name = "half_float"
class ScaledFloat(Float):
name = "scaled_float"
def __init__(self, scaling_factor, *args, **kwargs):
super(ScaledFloat, self).__init__(
scaling_factor=scaling_factor, *args, **kwargs
)
class Double(Float):
name = "double"
class RankFeature(Float):
name = "rank_feature"
class RankFeatures(Field):
name = "rank_features"
class Integer(Field):
name = "integer"
_coerce = True
def _deserialize(self, data):
return int(data)
class Byte(Integer):
name = "byte"
class Short(Integer):
name = "short"
class Long(Integer):
name = "long"
class Ip(Field):
name = "ip"
_coerce = True
def _deserialize(self, data):
# the ipaddress library for pypy only accepts unicode.
return ipaddress.ip_address(unicode(data))
def _serialize(self, data):
if data is None:
return None
return str(data)
class Binary(Field):
name = "binary"
_coerce = True
def clean(self, data):
# Binary fields are opaque, so there's not much cleaning
# that can be done.
return data
def _deserialize(self, data):
return base64.b64decode(data)
def _serialize(self, data):
if data is None:
return None
return base64.b64encode(data).decode()
class GeoPoint(Field):
name = "geo_point"
class GeoShape(Field):
name = "geo_shape"
class Completion(Field):
_param_defs = {
"analyzer": {"type": "analyzer"},
"search_analyzer": {"type": "analyzer"},
}
name = "completion"
class Percolator(Field):
name = "percolator"
_coerce = True
def _deserialize(self, data):
return Q(data)
def _serialize(self, data):
if data is None:
return None
return data.to_dict()
class RangeField(Field):
_coerce = True
_core_field = None
def _deserialize(self, data):
if isinstance(data, Range):
return data
data = dict((k, self._core_field.deserialize(v)) for k, v in iteritems(data))
return Range(data)
def _serialize(self, data):
if data is None:
return None
if not isinstance(data, collections_abc.Mapping):
data = data.to_dict()
return dict((k, self._core_field.serialize(v)) for k, v in iteritems(data))
class IntegerRange(RangeField):
name = "integer_range"
_core_field = Integer()
class FloatRange(RangeField):
name = "float_range"
_core_field = Float()
class LongRange(RangeField):
name = "long_range"
_core_field = Long()
class DoubleRange(RangeField):
name = "double_range"
_core_field = Double()
class DateRange(RangeField):
name = "date_range"
_core_field = Date()
class IpRange(Field):
# not a RangeField since ip_range supports CIDR ranges
name = "ip_range"
class Join(Field):
name = "join"
class TokenCount(Field):
name = "token_count"
class Murmur3(Field):
name = "murmur3"
+70
View File
@@ -0,0 +1,70 @@
# SPDX-License-Identifier: Apache-2.0
#
# The OpenSearch Contributors require contributions made to
# this file be licensed under the Apache-2.0 license or a
# compatible open source license.
#
# Modifications Copyright OpenSearch Contributors. See
# GitHub history for details.
#
# Licensed to Elasticsearch B.V. under one or more contributor
# license agreements. See the NOTICE file distributed with
# this work for additional information regarding copyright
# ownership. Elasticsearch B.V. licenses this file to you under
# the Apache License, Version 2.0 (the "License"); you may
# not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing,
# software distributed under the License is distributed on an
# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
# KIND, either express or implied. See the License for the
# specific language governing permissions and limitations
# under the License.
from typing import Any
from .utils import DslBase
class Field(DslBase): ...
class CustomField(Field): ...
class Object(Field): ...
class Nested(Object): ...
class Date(Field): ...
class Text(Field): ...
class SearchAsYouType(Field): ...
class Keyword(Field): ...
class ConstantKeyword(Keyword): ...
class Boolean(Field): ...
class Float(Field): ...
class DenseVector(Float): ...
class SparseVector(Field): ...
class HalfFloat(Float): ...
class ScaledFloat(Float): ...
class Double(Float): ...
class RankFeature(Float): ...
class RankFeatures(Field): ...
class Integer(Field): ...
class Byte(Integer): ...
class Short(Integer): ...
class Long(Integer): ...
class Ip(Field): ...
class Binary(Field): ...
class GeoPoint(Field): ...
class GeoShape(Field): ...
class Completion(Field): ...
class Percolator(Field): ...
class RangeField(Field): ...
class IntegerRange(RangeField): ...
class FloatRange(RangeField): ...
class LongRange(RangeField): ...
class DoubleRange(RangeField): ...
class DateRange(RangeField): ...
class IpRange(Field): ...
class Join(Field): ...
class TokenCount(Field): ...
class Murmur3(Field): ...
def construct_field(name_or_field: Any, **params: Any) -> Any: ...
+127
View File
@@ -0,0 +1,127 @@
# SPDX-License-Identifier: Apache-2.0
#
# The OpenSearch Contributors require contributions made to
# this file be licensed under the Apache-2.0 license or a
# compatible open source license.
#
# Modifications Copyright OpenSearch Contributors. See
# GitHub history for details.
#
# Licensed to Elasticsearch B.V. under one or more contributor
# license agreements. See the NOTICE file distributed with
# this work for additional information regarding copyright
# ownership. Elasticsearch B.V. licenses this file to you under
# the Apache License, Version 2.0 (the "License"); you may
# not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing,
# software distributed under the License is distributed on an
# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
# KIND, either express or implied. See the License for the
# specific language governing permissions and limitations
# under the License.
try:
import collections.abc as collections_abc # only works on python 3.3+
except ImportError:
import collections as collections_abc
from .utils import DslBase
def SF(name_or_sf, **params):
# {"script_score": {"script": "_score"}, "filter": {}}
if isinstance(name_or_sf, collections_abc.Mapping):
if params:
raise ValueError("SF() cannot accept parameters when passing in a dict.")
kwargs = {}
sf = name_or_sf.copy()
for k in ScoreFunction._param_defs:
if k in name_or_sf:
kwargs[k] = sf.pop(k)
# not sf, so just filter+weight, which used to be boost factor
if not sf:
name = "boost_factor"
# {'FUNCTION': {...}}
elif len(sf) == 1:
name, params = sf.popitem()
else:
raise ValueError("SF() got an unexpected fields in the dictionary: %r" % sf)
# boost factor special case, see https://github.com/elastic/elasticsearch/issues/6343
if not isinstance(params, collections_abc.Mapping):
params = {"value": params}
# mix known params (from _param_defs) and from inside the function
kwargs.update(params)
return ScoreFunction.get_dsl_class(name)(**kwargs)
# ScriptScore(script="_score", filter=Q())
if isinstance(name_or_sf, ScoreFunction):
if params:
raise ValueError(
"SF() cannot accept parameters when passing in a ScoreFunction object."
)
return name_or_sf
# "script_score", script="_score", filter=Q()
return ScoreFunction.get_dsl_class(name_or_sf)(**params)
class ScoreFunction(DslBase):
_type_name = "score_function"
_type_shortcut = staticmethod(SF)
_param_defs = {
"query": {"type": "query"},
"filter": {"type": "query"},
"weight": {},
}
name = None
def to_dict(self):
d = super(ScoreFunction, self).to_dict()
# filter and query dicts should be at the same level as us
for k in self._param_defs:
if k in d[self.name]:
d[k] = d[self.name].pop(k)
return d
class ScriptScore(ScoreFunction):
name = "script_score"
class BoostFactor(ScoreFunction):
name = "boost_factor"
def to_dict(self):
d = super(BoostFactor, self).to_dict()
if "value" in d[self.name]:
d[self.name] = d[self.name].pop("value")
else:
del d[self.name]
return d
class RandomScore(ScoreFunction):
name = "random_score"
class FieldValueFactor(ScoreFunction):
name = "field_value_factor"
class Linear(ScoreFunction):
name = "linear"
class Gauss(ScoreFunction):
name = "gauss"
class Exp(ScoreFunction):
name = "exp"
+40
View File
@@ -0,0 +1,40 @@
# SPDX-License-Identifier: Apache-2.0
#
# The OpenSearch Contributors require contributions made to
# this file be licensed under the Apache-2.0 license or a
# compatible open source license.
#
# Modifications Copyright OpenSearch Contributors. See
# GitHub history for details.
#
# Licensed to Elasticsearch B.V. under one or more contributor
# license agreements. See the NOTICE file distributed with
# this work for additional information regarding copyright
# ownership. Elasticsearch B.V. licenses this file to you under
# the Apache License, Version 2.0 (the "License"); you may
# not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing,
# software distributed under the License is distributed on an
# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
# KIND, either express or implied. See the License for the
# specific language governing permissions and limitations
# under the License.
from typing import Any
from .utils import DslBase
class ScoreFunction(DslBase): ...
class ScriptScore(ScoreFunction): ...
class BoostFactor(ScoreFunction): ...
class RandomScore(ScoreFunction): ...
class FieldValueFactor(ScoreFunction): ...
class Linear(ScoreFunction): ...
class Gauss(ScoreFunction): ...
class Exp(ScoreFunction): ...
def SF(name_or_sf: Any, **params: Any) -> Any: ...
+640
View File
@@ -0,0 +1,640 @@
# SPDX-License-Identifier: Apache-2.0
#
# The OpenSearch Contributors require contributions made to
# this file be licensed under the Apache-2.0 license or a
# compatible open source license.
#
# Modifications Copyright OpenSearch Contributors. See
# GitHub history for details.
#
# Licensed to Elasticsearch B.V. under one or more contributor
# license agreements. See the NOTICE file distributed with
# this work for additional information regarding copyright
# ownership. Elasticsearch B.V. licenses this file to you under
# the Apache License, Version 2.0 (the "License"); you may
# not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing,
# software distributed under the License is distributed on an
# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
# KIND, either express or implied. See the License for the
# specific language governing permissions and limitations
# under the License.
from opensearchpy.connection.connections import get_connection
from opensearchpy.helpers import analysis
from ..exceptions import IllegalOperation
from .mapping import Mapping
from .search import Search
from .update_by_query import UpdateByQuery
from .utils import merge
class IndexTemplate(object):
def __init__(self, name, template, index=None, order=None, **kwargs):
if index is None:
self._index = Index(template, **kwargs)
else:
if kwargs:
raise ValueError(
"You cannot specify options for Index when"
" passing an Index instance."
)
self._index = index.clone()
self._index._name = template
self._template_name = name
self.order = order
def __getattr__(self, attr_name):
return getattr(self._index, attr_name)
def to_dict(self):
d = self._index.to_dict()
d["index_patterns"] = [self._index._name]
if self.order is not None:
d["order"] = self.order
return d
def save(self, using=None):
opensearch = get_connection(using or self._index._using)
return opensearch.indices.put_template(
name=self._template_name, body=self.to_dict()
)
class Index(object):
def __init__(self, name, using="default"):
"""
:arg name: name of the index
:arg using: connection alias to use, defaults to ``'default'``
"""
self._name = name
self._doc_types = []
self._using = using
self._settings = {}
self._aliases = {}
self._analysis = {}
self._mapping = None
def get_or_create_mapping(self):
if self._mapping is None:
self._mapping = Mapping()
return self._mapping
def as_template(self, template_name, pattern=None, order=None):
# TODO: should we allow pattern to be a top-level arg?
# or maybe have an IndexPattern that allows for it and have
# Document._index be that?
return IndexTemplate(
template_name, pattern or self._name, index=self, order=order
)
def resolve_nested(self, field_path):
for doc in self._doc_types:
nested, field = doc._doc_type.mapping.resolve_nested(field_path)
if field is not None:
return nested, field
if self._mapping:
return self._mapping.resolve_nested(field_path)
return (), None
def resolve_field(self, field_path):
for doc in self._doc_types:
field = doc._doc_type.mapping.resolve_field(field_path)
if field is not None:
return field
if self._mapping:
return self._mapping.resolve_field(field_path)
return None
def load_mappings(self, using=None):
self.get_or_create_mapping().update_from_opensearch(
self._name, using=using or self._using
)
def clone(self, name=None, using=None):
"""
Create a copy of the instance with another name or connection alias.
Useful for creating multiple indices with shared configuration::
i = Index('base-index')
i.settings(number_of_shards=1)
i.create()
i2 = i.clone('other-index')
i2.create()
:arg name: name of the index
:arg using: connection alias to use, defaults to ``'default'``
"""
i = Index(name or self._name, using=using or self._using)
i._settings = self._settings.copy()
i._aliases = self._aliases.copy()
i._analysis = self._analysis.copy()
i._doc_types = self._doc_types[:]
if self._mapping is not None:
i._mapping = self._mapping._clone()
return i
def _get_connection(self, using=None):
if self._name is None:
raise ValueError("You cannot perform API calls on the default index.")
return get_connection(using or self._using)
connection = property(_get_connection)
def mapping(self, mapping):
"""
Associate a mapping (an instance of
:class:`~opensearchpy.Mapping`) with this index.
This means that, when this index is created, it will contain the
mappings for the document type defined by those mappings.
"""
self.get_or_create_mapping().update(mapping)
def document(self, document):
"""
Associate a :class:`~opensearchpy.Document` subclass with an index.
This means that, when this index is created, it will contain the
mappings for the ``Document``. If the ``Document`` class doesn't have a
default index yet (by defining ``class Index``), this instance will be
used. Can be used as a decorator::
i = Index('blog')
@i.document
class Post(Document):
title = Text()
# create the index, including Post mappings
i.create()
# .search() will now return a Search object that will return
# properly deserialized Post instances
s = i.search()
"""
self._doc_types.append(document)
# If the document index does not have any name, that means the user
# did not set any index already to the document.
# So set this index as document index
if document._index._name is None:
document._index = self
return document
def settings(self, **kwargs):
"""
Add settings to the index::
i = Index('i')
i.settings(number_of_shards=1, number_of_replicas=0)
Multiple calls to ``settings`` will merge the keys, later overriding
the earlier.
"""
self._settings.update(kwargs)
return self
def aliases(self, **kwargs):
"""
Add aliases to the index definition::
i = Index('blog-v2')
i.aliases(blog={}, published={'filter': Q('term', published=True)})
"""
self._aliases.update(kwargs)
return self
def analyzer(self, *args, **kwargs):
"""
Explicitly add an analyzer to an index. Note that all custom analyzers
defined in mappings will also be created. This is useful for search analyzers.
Example::
from opensearchpy import analyzer, tokenizer
my_analyzer = analyzer('my_analyzer',
tokenizer=tokenizer('trigram', 'nGram', min_gram=3, max_gram=3),
filter=['lowercase']
)
i = Index('blog')
i.analyzer(my_analyzer)
"""
analyzer = analysis.analyzer(*args, **kwargs)
d = analyzer.get_analysis_definition()
# empty custom analyzer, probably already defined out of our control
if not d:
return
# merge the definition
merge(self._analysis, d, True)
def to_dict(self):
out = {}
if self._settings:
out["settings"] = self._settings
if self._aliases:
out["aliases"] = self._aliases
mappings = self._mapping.to_dict() if self._mapping else {}
analysis = self._mapping._collect_analysis() if self._mapping else {}
for d in self._doc_types:
mapping = d._doc_type.mapping
merge(mappings, mapping.to_dict(), True)
merge(analysis, mapping._collect_analysis(), True)
if mappings:
out["mappings"] = mappings
if analysis or self._analysis:
merge(analysis, self._analysis)
out.setdefault("settings", {})["analysis"] = analysis
return out
def search(self, using=None):
"""
Return a :class:`~opensearchpy.Search` object searching over the
index (or all the indices belonging to this template) and its
``Document``\\s.
"""
return Search(
using=using or self._using, index=self._name, doc_type=self._doc_types
)
def updateByQuery(self, using=None):
"""
Return a :class:`~opensearchpy.UpdateByQuery` object searching over the index
(or all the indices belonging to this template) and updating Documents that match
the search criteria.
For more information, see here:
https://opensearch.org/docs/latest/opensearch/rest-api/document-apis/update-by-query/
"""
return UpdateByQuery(
using=using or self._using,
index=self._name,
)
def create(self, using=None, **kwargs):
"""
Creates the index in opensearch.
Any additional keyword arguments will be passed to
``OpenSearch.indices.create`` unchanged.
"""
return self._get_connection(using).indices.create(
index=self._name, body=self.to_dict(), **kwargs
)
def is_closed(self, using=None):
state = self._get_connection(using).cluster.state(
index=self._name, metric="metadata"
)
return state["metadata"]["indices"][self._name]["state"] == "close"
def save(self, using=None):
"""
Sync the index definition with opensearch, creating the index if it
doesn't exist and updating its settings and mappings if it does.
Note some settings and mapping changes cannot be done on an open
index (or at all on an existing index) and for those this method will
fail with the underlying exception.
"""
if not self.exists(using=using):
return self.create(using=using)
body = self.to_dict()
settings = body.pop("settings", {})
analysis = settings.pop("analysis", None)
current_settings = self.get_settings(using=using)[self._name]["settings"][
"index"
]
if analysis:
if self.is_closed(using=using):
# closed index, update away
settings["analysis"] = analysis
else:
# compare analysis definition, if all analysis objects are
# already defined as requested, skip analysis update and
# proceed, otherwise raise IllegalOperation
existing_analysis = current_settings.get("analysis", {})
if any(
existing_analysis.get(section, {}).get(k, None)
!= analysis[section][k]
for section in analysis
for k in analysis[section]
):
raise IllegalOperation(
"You cannot update analysis configuration on an open index, "
"you need to close index %s first." % self._name
)
# try and update the settings
if settings:
settings = settings.copy()
for k, v in list(settings.items()):
if k in current_settings and current_settings[k] == str(v):
del settings[k]
if settings:
self.put_settings(using=using, body=settings)
# update the mappings, any conflict in the mappings will result in an
# exception
mappings = body.pop("mappings", {})
if mappings:
self.put_mapping(using=using, body=mappings)
def analyze(self, using=None, **kwargs):
"""
Perform the analysis process on a text and return the tokens breakdown
of the text.
Any additional keyword arguments will be passed to
``OpenSearch.indices.analyze`` unchanged.
"""
return self._get_connection(using).indices.analyze(index=self._name, **kwargs)
def refresh(self, using=None, **kwargs):
"""
Performs a refresh operation on the index.
Any additional keyword arguments will be passed to
``OpenSearch.indices.refresh`` unchanged.
"""
return self._get_connection(using).indices.refresh(index=self._name, **kwargs)
def flush(self, using=None, **kwargs):
"""
Performs a flush operation on the index.
Any additional keyword arguments will be passed to
``OpenSearch.indices.flush`` unchanged.
"""
return self._get_connection(using).indices.flush(index=self._name, **kwargs)
def get(self, using=None, **kwargs):
"""
The get index API allows to retrieve information about the index.
Any additional keyword arguments will be passed to
``OpenSearch.indices.get`` unchanged.
"""
return self._get_connection(using).indices.get(index=self._name, **kwargs)
def open(self, using=None, **kwargs):
"""
Opens the index in opensearch.
Any additional keyword arguments will be passed to
``OpenSearch.indices.open`` unchanged.
"""
return self._get_connection(using).indices.open(index=self._name, **kwargs)
def close(self, using=None, **kwargs):
"""
Closes the index in opensearch.
Any additional keyword arguments will be passed to
``OpenSearch.indices.close`` unchanged.
"""
return self._get_connection(using).indices.close(index=self._name, **kwargs)
def delete(self, using=None, **kwargs):
"""
Deletes the index in opensearch.
Any additional keyword arguments will be passed to
``OpenSearch.indices.delete`` unchanged.
"""
return self._get_connection(using).indices.delete(index=self._name, **kwargs)
def exists(self, using=None, **kwargs):
"""
Returns ``True`` if the index already exists in opensearch.
Any additional keyword arguments will be passed to
``OpenSearch.indices.exists`` unchanged.
"""
return self._get_connection(using).indices.exists(index=self._name, **kwargs)
def put_mapping(self, using=None, **kwargs):
"""
Register specific mapping definition for a specific type.
Any additional keyword arguments will be passed to
``OpenSearch.indices.put_mapping`` unchanged.
"""
return self._get_connection(using).indices.put_mapping(
index=self._name, **kwargs
)
def get_mapping(self, using=None, **kwargs):
"""
Retrieve specific mapping definition for a specific type.
Any additional keyword arguments will be passed to
``OpenSearch.indices.get_mapping`` unchanged.
"""
return self._get_connection(using).indices.get_mapping(
index=self._name, **kwargs
)
def get_field_mapping(self, using=None, **kwargs):
"""
Retrieve mapping definition of a specific field.
Any additional keyword arguments will be passed to
``OpenSearch.indices.get_field_mapping`` unchanged.
"""
return self._get_connection(using).indices.get_field_mapping(
index=self._name, **kwargs
)
def put_alias(self, using=None, **kwargs):
"""
Create an alias for the index.
Any additional keyword arguments will be passed to
``OpenSearch.indices.put_alias`` unchanged.
"""
return self._get_connection(using).indices.put_alias(index=self._name, **kwargs)
def exists_alias(self, using=None, **kwargs):
"""
Return a boolean indicating whether given alias exists for this index.
Any additional keyword arguments will be passed to
``OpenSearch.indices.exists_alias`` unchanged.
"""
return self._get_connection(using).indices.exists_alias(
index=self._name, **kwargs
)
def get_alias(self, using=None, **kwargs):
"""
Retrieve a specified alias.
Any additional keyword arguments will be passed to
``OpenSearch.indices.get_alias`` unchanged.
"""
return self._get_connection(using).indices.get_alias(index=self._name, **kwargs)
def delete_alias(self, using=None, **kwargs):
"""
Delete specific alias.
Any additional keyword arguments will be passed to
``OpenSearch.indices.delete_alias`` unchanged.
"""
return self._get_connection(using).indices.delete_alias(
index=self._name, **kwargs
)
def get_settings(self, using=None, **kwargs):
"""
Retrieve settings for the index.
Any additional keyword arguments will be passed to
``OpenSearch.indices.get_settings`` unchanged.
"""
return self._get_connection(using).indices.get_settings(
index=self._name, **kwargs
)
def put_settings(self, using=None, **kwargs):
"""
Change specific index level settings in real time.
Any additional keyword arguments will be passed to
``OpenSearch.indices.put_settings`` unchanged.
"""
return self._get_connection(using).indices.put_settings(
index=self._name, **kwargs
)
def stats(self, using=None, **kwargs):
"""
Retrieve statistics on different operations happening on the index.
Any additional keyword arguments will be passed to
``OpenSearch.indices.stats`` unchanged.
"""
return self._get_connection(using).indices.stats(index=self._name, **kwargs)
def segments(self, using=None, **kwargs):
"""
Provide low level segments information that a Lucene index (shard
level) is built with.
Any additional keyword arguments will be passed to
``OpenSearch.indices.segments`` unchanged.
"""
return self._get_connection(using).indices.segments(index=self._name, **kwargs)
def validate_query(self, using=None, **kwargs):
"""
Validate a potentially expensive query without executing it.
Any additional keyword arguments will be passed to
``OpenSearch.indices.validate_query`` unchanged.
"""
return self._get_connection(using).indices.validate_query(
index=self._name, **kwargs
)
def clear_cache(self, using=None, **kwargs):
"""
Clear all caches or specific cached associated with the index.
Any additional keyword arguments will be passed to
``OpenSearch.indices.clear_cache`` unchanged.
"""
return self._get_connection(using).indices.clear_cache(
index=self._name, **kwargs
)
def recovery(self, using=None, **kwargs):
"""
The indices recovery API provides insight into on-going shard
recoveries for the index.
Any additional keyword arguments will be passed to
``OpenSearch.indices.recovery`` unchanged.
"""
return self._get_connection(using).indices.recovery(index=self._name, **kwargs)
def upgrade(self, using=None, **kwargs):
"""
Upgrade the index to the latest format.
Any additional keyword arguments will be passed to
``OpenSearch.indices.upgrade`` unchanged.
"""
return self._get_connection(using).indices.upgrade(index=self._name, **kwargs)
def get_upgrade(self, using=None, **kwargs):
"""
Monitor how much of the index is upgraded.
Any additional keyword arguments will be passed to
``OpenSearch.indices.get_upgrade`` unchanged.
"""
return self._get_connection(using).indices.get_upgrade(
index=self._name, **kwargs
)
def shard_stores(self, using=None, **kwargs):
"""
Provides store information for shard copies of the index. Store
information reports on which nodes shard copies exist, the shard copy
version, indicating how recent they are, and any exceptions encountered
while opening the shard index or from earlier engine failure.
Any additional keyword arguments will be passed to
``OpenSearch.indices.shard_stores`` unchanged.
"""
return self._get_connection(using).indices.shard_stores(
index=self._name, **kwargs
)
def forcemerge(self, using=None, **kwargs):
"""
The force merge API allows to force merging of the index through an
API. The merge relates to the number of segments a Lucene index holds
within each shard. The force merge operation allows to reduce the
number of segments by merging them.
This call will block until the merge is complete. If the http
connection is lost, the request will continue in the background, and
any new requests will block until the previous force merge is complete.
Any additional keyword arguments will be passed to
``OpenSearch.indices.forcemerge`` unchanged.
"""
return self._get_connection(using).indices.forcemerge(
index=self._name, **kwargs
)
def shrink(self, using=None, **kwargs):
"""
The shrink index API allows you to shrink an existing index into a new
index with fewer primary shards. The number of primary shards in the
target index must be a factor of the shards in the source index. For
example an index with 8 primary shards can be shrunk into 4, 2 or 1
primary shards or an index with 15 primary shards can be shrunk into 5,
3 or 1. If the number of shards in the index is a prime number it can
only be shrunk into a single primary shard. Before shrinking, a
(primary or replica) copy of every shard in the index must be present
on the same node.
Any additional keyword arguments will be passed to
``OpenSearch.indices.shrink`` unchanged.
"""
return self._get_connection(using).indices.shrink(index=self._name, **kwargs)
+28
View File
@@ -0,0 +1,28 @@
# SPDX-License-Identifier: Apache-2.0
#
# The OpenSearch Contributors require contributions made to
# this file be licensed under the Apache-2.0 license or a
# compatible open source license.
#
# Modifications Copyright OpenSearch Contributors. See
# GitHub history for details.
#
# Licensed to Elasticsearch B.V. under one or more contributor
# license agreements. See the NOTICE file distributed with
# this work for additional information regarding copyright
# ownership. Elasticsearch B.V. licenses this file to you under
# the Apache License, Version 2.0 (the "License"); you may
# not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing,
# software distributed under the License is distributed on an
# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
# KIND, either express or implied. See the License for the
# specific language governing permissions and limitations
# under the License.
class IndexTemplate(object): ...
class Index(object): ...
+249
View File
@@ -0,0 +1,249 @@
# SPDX-License-Identifier: Apache-2.0
#
# The OpenSearch Contributors require contributions made to
# this file be licensed under the Apache-2.0 license or a
# compatible open source license.
#
# Modifications Copyright OpenSearch Contributors. See
# GitHub history for details.
#
# Licensed to Elasticsearch B.V. under one or more contributor
# license agreements. See the NOTICE file distributed with
# this work for additional information regarding copyright
# ownership. Elasticsearch B.V. licenses this file to you under
# the Apache License, Version 2.0 (the "License"); you may
# not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing,
# software distributed under the License is distributed on an
# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
# KIND, either express or implied. See the License for the
# specific language governing permissions and limitations
# under the License.
try:
import collections.abc as collections_abc # only works on python 3.3+
except ImportError:
import collections as collections_abc
from itertools import chain
from six import iteritems, itervalues
from opensearchpy.connection.connections import get_connection
from opensearchpy.helpers.field import Nested, Text, construct_field
from .utils import DslBase
META_FIELDS = frozenset(
(
"dynamic",
"transform",
"dynamic_date_formats",
"date_detection",
"numeric_detection",
"dynamic_templates",
"enabled",
)
)
class Properties(DslBase):
name = "properties"
_param_defs = {"properties": {"type": "field", "hash": True}}
def __init__(self):
super(Properties, self).__init__()
def __repr__(self):
return "Properties()"
def __getitem__(self, name):
return self.properties[name]
def __contains__(self, name):
return name in self.properties
def to_dict(self):
return super(Properties, self).to_dict()["properties"]
def field(self, name, *args, **kwargs):
self.properties[name] = construct_field(*args, **kwargs)
return self
def _collect_fields(self):
"""Iterate over all Field objects within, including multi fields."""
for f in itervalues(self.properties.to_dict()):
yield f
# multi fields
if hasattr(f, "fields"):
for inner_f in itervalues(f.fields.to_dict()):
yield inner_f
# nested and inner objects
if hasattr(f, "_collect_fields"):
for inner_f in f._collect_fields():
yield inner_f
def update(self, other_object):
if not hasattr(other_object, "properties"):
# not an inner/nested object, no merge possible
return
our, other = self.properties, other_object.properties
for name in other:
if name in our:
if hasattr(our[name], "update"):
our[name].update(other[name])
continue
our[name] = other[name]
class Mapping(object):
def __init__(self):
self.properties = Properties()
self._meta = {}
def __repr__(self):
return "Mapping()"
def _clone(self):
m = Mapping()
m.properties._params = self.properties._params.copy()
return m
@classmethod
def from_opensearch(cls, index, using="default"):
m = cls()
m.update_from_opensearch(index, using)
return m
def resolve_nested(self, field_path):
field = self
nested = []
parts = field_path.split(".")
for i, step in enumerate(parts):
try:
field = field[step]
except KeyError:
return (), None
if isinstance(field, Nested):
nested.append(".".join(parts[: i + 1]))
return nested, field
def resolve_field(self, field_path):
field = self
for step in field_path.split("."):
try:
field = field[step]
except KeyError:
return
return field
def _collect_analysis(self):
analysis = {}
fields = []
if "_all" in self._meta:
fields.append(Text(**self._meta["_all"]))
for f in chain(fields, self.properties._collect_fields()):
for analyzer_name in (
"analyzer",
"normalizer",
"search_analyzer",
"search_quote_analyzer",
):
if not hasattr(f, analyzer_name):
continue
analyzer = getattr(f, analyzer_name)
d = analyzer.get_analysis_definition()
# empty custom analyzer, probably already defined out of our control
if not d:
continue
# merge the definition
# TODO: conflict detection/resolution
for key in d:
analysis.setdefault(key, {}).update(d[key])
return analysis
def save(self, index, using="default"):
from opensearchpy.helpers.index import Index
index = Index(index, using=using)
index.mapping(self)
return index.save()
def update_from_opensearch(self, index, using="default"):
opensearch = get_connection(using)
raw = opensearch.indices.get_mapping(index=index)
_, raw = raw.popitem()
self._update_from_dict(raw["mappings"])
def _update_from_dict(self, raw):
for name, definition in iteritems(raw.get("properties", {})):
self.field(name, definition)
# metadata like _all etc
for name, value in iteritems(raw):
if name != "properties":
if isinstance(value, collections_abc.Mapping):
self.meta(name, **value)
else:
self.meta(name, value)
def update(self, mapping, update_only=False):
for name in mapping:
if update_only and name in self:
# nested and inner objects, merge recursively
if hasattr(self[name], "update"):
# FIXME only merge subfields, not the settings
self[name].update(mapping[name], update_only)
continue
self.field(name, mapping[name])
if update_only:
for name in mapping._meta:
if name not in self._meta:
self._meta[name] = mapping._meta[name]
else:
self._meta.update(mapping._meta)
def __contains__(self, name):
return name in self.properties.properties
def __getitem__(self, name):
return self.properties.properties[name]
def __iter__(self):
return iter(self.properties.properties)
def field(self, *args, **kwargs):
self.properties.field(*args, **kwargs)
return self
def meta(self, name, params=None, **kwargs):
if not name.startswith("_") and name not in META_FIELDS:
name = "_" + name
if params and kwargs:
raise ValueError("Meta configs cannot have both value and a dictionary.")
self._meta[name] = kwargs if params is None else params
return self
def to_dict(self):
meta = self._meta
# hard coded serialization of analyzers in _all
if "_all" in meta:
meta = meta.copy()
_all = meta["_all"] = meta["_all"].copy()
for f in ("analyzer", "search_analyzer", "search_quote_analyzer"):
if hasattr(_all.get(f, None), "to_dict"):
_all[f] = _all[f].to_dict()
meta.update(self.properties.to_dict())
return meta
+30
View File
@@ -0,0 +1,30 @@
# SPDX-License-Identifier: Apache-2.0
#
# The OpenSearch Contributors require contributions made to
# this file be licensed under the Apache-2.0 license or a
# compatible open source license.
#
# Modifications Copyright OpenSearch Contributors. See
# GitHub history for details.
#
# Licensed to Elasticsearch B.V. under one or more contributor
# license agreements. See the NOTICE file distributed with
# this work for additional information regarding copyright
# ownership. Elasticsearch B.V. licenses this file to you under
# the Apache License, Version 2.0 (the "License"); you may
# not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing,
# software distributed under the License is distributed on an
# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
# KIND, either express or implied. See the License for the
# specific language governing permissions and limitations
# under the License.
from .utils import DslBase
class Properties(DslBase): ...
class Mapping(object): ...
+525
View File
@@ -0,0 +1,525 @@
# SPDX-License-Identifier: Apache-2.0
#
# The OpenSearch Contributors require contributions made to
# this file be licensed under the Apache-2.0 license or a
# compatible open source license.
#
# Modifications Copyright OpenSearch Contributors. See
# GitHub history for details.
#
# Licensed to Elasticsearch B.V. under one or more contributor
# license agreements. See the NOTICE file distributed with
# this work for additional information regarding copyright
# ownership. Elasticsearch B.V. licenses this file to you under
# the Apache License, Version 2.0 (the "License"); you may
# not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing,
# software distributed under the License is distributed on an
# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
# KIND, either express or implied. See the License for the
# specific language governing permissions and limitations
# under the License.
try:
import collections.abc as collections_abc # only works on python 3.3+
except ImportError:
import collections as collections_abc
from itertools import chain
# 'SF' looks unused but the test suite assumes it's available
# from this module so others are liable to do so as well.
from ..helpers.function import SF # noqa: F401
from ..helpers.function import ScoreFunction
from .utils import DslBase
def Q(name_or_query="match_all", **params):
# {"match": {"title": "python"}}
if isinstance(name_or_query, collections_abc.Mapping):
if params:
raise ValueError("Q() cannot accept parameters when passing in a dict.")
if len(name_or_query) != 1:
raise ValueError(
'Q() can only accept dict with a single query ({"match": {...}}). '
"Instead it got (%r)" % name_or_query
)
name, params = name_or_query.copy().popitem()
return Query.get_dsl_class(name)(_expand__to_dot=False, **params)
# MatchAll()
if isinstance(name_or_query, Query):
if params:
raise ValueError(
"Q() cannot accept parameters when passing in a Query object."
)
return name_or_query
# s.query = Q('filtered', query=s.query)
if hasattr(name_or_query, "_proxied"):
return name_or_query._proxied
# "match", title="python"
return Query.get_dsl_class(name_or_query)(**params)
class Query(DslBase):
_type_name = "query"
_type_shortcut = staticmethod(Q)
name = None
def __add__(self, other):
# make sure we give queries that know how to combine themselves
# preference
if hasattr(other, "__radd__"):
return other.__radd__(self)
return Bool(must=[self, other])
def __invert__(self):
return Bool(must_not=[self])
def __or__(self, other):
# make sure we give queries that know how to combine themselves
# preference
if hasattr(other, "__ror__"):
return other.__ror__(self)
return Bool(should=[self, other])
def __and__(self, other):
# make sure we give queries that know how to combine themselves
# preference
if hasattr(other, "__rand__"):
return other.__rand__(self)
return Bool(must=[self, other])
class MatchAll(Query):
name = "match_all"
def __add__(self, other):
return other._clone()
__and__ = __rand__ = __radd__ = __add__
def __or__(self, other):
return self
__ror__ = __or__
def __invert__(self):
return MatchNone()
EMPTY_QUERY = MatchAll()
class MatchNone(Query):
name = "match_none"
def __add__(self, other):
return self
__and__ = __rand__ = __radd__ = __add__
def __or__(self, other):
return other._clone()
__ror__ = __or__
def __invert__(self):
return MatchAll()
class Bool(Query):
name = "bool"
_param_defs = {
"must": {"type": "query", "multi": True},
"should": {"type": "query", "multi": True},
"must_not": {"type": "query", "multi": True},
"filter": {"type": "query", "multi": True},
}
def __add__(self, other):
q = self._clone()
if isinstance(other, Bool):
q.must += other.must
q.should += other.should
q.must_not += other.must_not
q.filter += other.filter
else:
q.must.append(other)
return q
__radd__ = __add__
def __or__(self, other):
for q in (self, other):
if isinstance(q, Bool) and not any(
(q.must, q.must_not, q.filter, getattr(q, "minimum_should_match", None))
):
other = self if q is other else other
q = q._clone()
if isinstance(other, Bool) and not any(
(
other.must,
other.must_not,
other.filter,
getattr(other, "minimum_should_match", None),
)
):
q.should.extend(other.should)
else:
q.should.append(other)
return q
return Bool(should=[self, other])
__ror__ = __or__
@property
def _min_should_match(self):
return getattr(
self,
"minimum_should_match",
0 if not self.should or (self.must or self.filter) else 1,
)
def __invert__(self):
# Because an empty Bool query is treated like
# MatchAll the inverse should be MatchNone
if not any(chain(self.must, self.filter, self.should, self.must_not)):
return MatchNone()
negations = []
for q in chain(self.must, self.filter):
negations.append(~q)
for q in self.must_not:
negations.append(q)
if self.should and self._min_should_match:
negations.append(Bool(must_not=self.should[:]))
if len(negations) == 1:
return negations[0]
return Bool(should=negations)
def __and__(self, other):
q = self._clone()
if isinstance(other, Bool):
q.must += other.must
q.must_not += other.must_not
q.filter += other.filter
q.should = []
# reset minimum_should_match as it will get calculated below
if "minimum_should_match" in q._params:
del q._params["minimum_should_match"]
for qx in (self, other):
# TODO: percentages will fail here
min_should_match = qx._min_should_match
# all subqueries are required
if len(qx.should) <= min_should_match:
q.must.extend(qx.should)
# not all of them are required, use it and remember min_should_match
elif not q.should:
q.minimum_should_match = min_should_match
q.should = qx.should
# all queries are optional, just extend should
elif q._min_should_match == 0 and min_should_match == 0:
q.should.extend(qx.should)
# not all are required, add a should list to the must with proper min_should_match
else:
q.must.append(
Bool(should=qx.should, minimum_should_match=min_should_match)
)
else:
if not (q.must or q.filter) and q.should:
q._params.setdefault("minimum_should_match", 1)
q.must.append(other)
return q
__rand__ = __and__
class FunctionScore(Query):
name = "function_score"
_param_defs = {
"query": {"type": "query"},
"filter": {"type": "query"},
"functions": {"type": "score_function", "multi": True},
}
def __init__(self, **kwargs):
if "functions" in kwargs:
pass
else:
fns = kwargs["functions"] = []
for name in ScoreFunction._classes:
if name in kwargs:
fns.append({name: kwargs.pop(name)})
super(FunctionScore, self).__init__(**kwargs)
# compound queries
class Boosting(Query):
name = "boosting"
_param_defs = {"positive": {"type": "query"}, "negative": {"type": "query"}}
class ConstantScore(Query):
name = "constant_score"
_param_defs = {"query": {"type": "query"}, "filter": {"type": "query"}}
class DisMax(Query):
name = "dis_max"
_param_defs = {"queries": {"type": "query", "multi": True}}
class Filtered(Query):
name = "filtered"
_param_defs = {"query": {"type": "query"}, "filter": {"type": "query"}}
class Indices(Query):
name = "indices"
_param_defs = {"query": {"type": "query"}, "no_match_query": {"type": "query"}}
class Percolate(Query):
name = "percolate"
# relationship queries
class Nested(Query):
name = "nested"
_param_defs = {"query": {"type": "query"}}
class HasChild(Query):
name = "has_child"
_param_defs = {"query": {"type": "query"}}
class HasParent(Query):
name = "has_parent"
_param_defs = {"query": {"type": "query"}}
class TopChildren(Query):
name = "top_children"
_param_defs = {"query": {"type": "query"}}
# compount span queries
class SpanFirst(Query):
name = "span_first"
_param_defs = {"match": {"type": "query"}}
class SpanMulti(Query):
name = "span_multi"
_param_defs = {"match": {"type": "query"}}
class SpanNear(Query):
name = "span_near"
_param_defs = {"clauses": {"type": "query", "multi": True}}
class SpanNot(Query):
name = "span_not"
_param_defs = {"exclude": {"type": "query"}, "include": {"type": "query"}}
class SpanOr(Query):
name = "span_or"
_param_defs = {"clauses": {"type": "query", "multi": True}}
class FieldMaskingSpan(Query):
name = "field_masking_span"
_param_defs = {"query": {"type": "query"}}
class SpanContaining(Query):
name = "span_containing"
_param_defs = {"little": {"type": "query"}, "big": {"type": "query"}}
# Original implementation contained
# a typo: remove in v8.0.
SpanContainining = SpanContaining
class SpanWithin(Query):
name = "span_within"
_param_defs = {"little": {"type": "query"}, "big": {"type": "query"}}
# core queries
class Common(Query):
name = "common"
class Fuzzy(Query):
name = "fuzzy"
class FuzzyLikeThis(Query):
name = "fuzzy_like_this"
class FuzzyLikeThisField(Query):
name = "fuzzy_like_this_field"
class RankFeature(Query):
name = "rank_feature"
class DistanceFeature(Query):
name = "distance_feature"
class GeoBoundingBox(Query):
name = "geo_bounding_box"
class GeoDistance(Query):
name = "geo_distance"
class GeoDistanceRange(Query):
name = "geo_distance_range"
class GeoPolygon(Query):
name = "geo_polygon"
class GeoShape(Query):
name = "geo_shape"
class GeohashCell(Query):
name = "geohash_cell"
class Ids(Query):
name = "ids"
class Intervals(Query):
name = "intervals"
class Limit(Query):
name = "limit"
class Match(Query):
name = "match"
class MatchPhrase(Query):
name = "match_phrase"
class MatchPhrasePrefix(Query):
name = "match_phrase_prefix"
class MatchBoolPrefix(Query):
name = "match_bool_prefix"
class Exists(Query):
name = "exists"
class MoreLikeThis(Query):
name = "more_like_this"
class MoreLikeThisField(Query):
name = "more_like_this_field"
class MultiMatch(Query):
name = "multi_match"
class Prefix(Query):
name = "prefix"
class QueryString(Query):
name = "query_string"
class Range(Query):
name = "range"
class Regexp(Query):
name = "regexp"
class Shape(Query):
name = "shape"
class SimpleQueryString(Query):
name = "simple_query_string"
class SpanTerm(Query):
name = "span_term"
class Template(Query):
name = "template"
class Term(Query):
name = "term"
class Terms(Query):
name = "terms"
class TermsSet(Query):
name = "terms_set"
class Wildcard(Query):
name = "wildcard"
class Script(Query):
name = "script"
class ScriptScore(Query):
name = "script_score"
_param_defs = {"query": {"type": "query"}}
class Type(Query):
name = "type"
class ParentId(Query):
name = "parent_id"
class Wrapper(Query):
name = "wrapper"
+95
View File
@@ -0,0 +1,95 @@
# SPDX-License-Identifier: Apache-2.0
#
# The OpenSearch Contributors require contributions made to
# this file be licensed under the Apache-2.0 license or a
# compatible open source license.
#
# Modifications Copyright OpenSearch Contributors. See
# GitHub history for details.
#
# Licensed to Elasticsearch B.V. under one or more contributor
# license agreements. See the NOTICE file distributed with
# this work for additional information regarding copyright
# ownership. Elasticsearch B.V. licenses this file to you under
# the Apache License, Version 2.0 (the "License"); you may
# not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing,
# software distributed under the License is distributed on an
# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
# KIND, either express or implied. See the License for the
# specific language governing permissions and limitations
# under the License.
from typing import Any
from .utils import DslBase
class Query(DslBase): ...
class MatchAll(Query): ...
class MatchNone(Query): ...
class Bool(Query): ...
class FunctionScore(Query): ...
class Boosting(Query): ...
class ConstantScore(Query): ...
class DisMax(Query): ...
class Filtered(Query): ...
class Indices(Query): ...
class Percolate(Query): ...
class Nested(Query): ...
class HasChild(Query): ...
class HasParent(Query): ...
class TopChildren(Query): ...
class SpanFirst(Query): ...
class SpanMulti(Query): ...
class SpanNear(Query): ...
class SpanNot(Query): ...
class SpanOr(Query): ...
class FieldMaskingSpan(Query): ...
class SpanContaining(Query): ...
class SpanWithin(Query): ...
class Common(Query): ...
class Fuzzy(Query): ...
class FuzzyLikeThis(Query): ...
class FuzzyLikeThisField(Query): ...
class RankFeature(Query): ...
class DistanceFeature(Query): ...
class GeoBoundingBox(Query): ...
class GeoDistance(Query): ...
class GeoDistanceRange(Query): ...
class GeoPolygon(Query): ...
class GeoShape(Query): ...
class GeohashCell(Query): ...
class Ids(Query): ...
class Intervals(Query): ...
class Limit(Query): ...
class Match(Query): ...
class MatchPhrase(Query): ...
class MatchPhrasePrefix(Query): ...
class MatchBoolPrefix(Query): ...
class Exists(Query): ...
class MoreLikeThis(Query): ...
class MoreLikeThisField(Query): ...
class MultiMatch(Query): ...
class Prefix(Query): ...
class QueryString(Query): ...
class Range(Query): ...
class Regexp(Query): ...
class Shape(Query): ...
class SimpleQueryString(Query): ...
class SpanTerm(Query): ...
class Template(Query): ...
class Term(Query): ...
class Terms(Query): ...
class TermsSet(Query): ...
class Wildcard(Query): ...
class Script(Query): ...
class ScriptScore(Query): ...
class Type(Query): ...
class ParentId(Query): ...
class Wrapper(Query): ...
def Q(name_or_query: Any, **params: Any) -> Any: ...
+127
View File
@@ -0,0 +1,127 @@
# SPDX-License-Identifier: Apache-2.0
#
# The OpenSearch Contributors require contributions made to
# this file be licensed under the Apache-2.0 license or a
# compatible open source license.
#
# Modifications Copyright OpenSearch Contributors. See
# GitHub history for details.
#
# Licensed to Elasticsearch B.V. under one or more contributor
# license agreements. See the NOTICE file distributed with
# this work for additional information regarding copyright
# ownership. Elasticsearch B.V. licenses this file to you under
# the Apache License, Version 2.0 (the "License"); you may
# not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing,
# software distributed under the License is distributed on an
# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
# KIND, either express or implied. See the License for the
# specific language governing permissions and limitations
# under the License.
from ..utils import AttrDict, AttrList, _wrap
from .hit import Hit, HitMeta
__all__ = ["Response", "AggResponse", "UpdateByQueryResponse", "Hit", "HitMeta"]
class Response(AttrDict):
def __init__(self, search, response, doc_class=None):
super(AttrDict, self).__setattr__("_search", search)
super(AttrDict, self).__setattr__("_doc_class", doc_class)
super(Response, self).__init__(response)
def __iter__(self):
return iter(self.hits)
def __getitem__(self, key):
if isinstance(key, (slice, int)):
# for slicing etc
return self.hits[key]
return super(Response, self).__getitem__(key)
def __nonzero__(self):
return bool(self.hits)
__bool__ = __nonzero__
def __repr__(self):
return "<Response: %r>" % (self.hits or self.aggregations)
def __len__(self):
return len(self.hits)
def __getstate__(self):
return self._d_, self._search, self._doc_class
def __setstate__(self, state):
super(AttrDict, self).__setattr__("_d_", state[0])
super(AttrDict, self).__setattr__("_search", state[1])
super(AttrDict, self).__setattr__("_doc_class", state[2])
def success(self):
return self._shards.total == self._shards.successful and not self.timed_out
@property
def hits(self):
if not hasattr(self, "_hits"):
h = self._d_["hits"]
try:
hits = AttrList(map(self._search._get_result, h["hits"]))
except AttributeError as e:
# avoid raising AttributeError since it will be hidden by the property
raise TypeError("Could not parse hits.", e)
# avoid assigning _hits into self._d_
super(AttrDict, self).__setattr__("_hits", hits)
for k in h:
setattr(self._hits, k, _wrap(h[k]))
return self._hits
@property
def aggregations(self):
return self.aggs
@property
def aggs(self):
if not hasattr(self, "_aggs"):
aggs = AggResponse(
self._search.aggs, self._search, self._d_.get("aggregations", {})
)
# avoid assigning _aggs into self._d_
super(AttrDict, self).__setattr__("_aggs", aggs)
return self._aggs
class AggResponse(AttrDict):
def __init__(self, aggs, search, data):
super(AttrDict, self).__setattr__("_meta", {"search": search, "aggs": aggs})
super(AggResponse, self).__init__(data)
def __getitem__(self, attr_name):
if attr_name in self._meta["aggs"]:
# don't do self._meta['aggs'][attr_name] to avoid copying
agg = self._meta["aggs"].aggs[attr_name]
return agg.result(self._meta["search"], self._d_[attr_name])
return super(AggResponse, self).__getitem__(attr_name)
def __iter__(self):
for name in self._meta["aggs"]:
yield self[name]
class UpdateByQueryResponse(AttrDict):
def __init__(self, search, response, doc_class=None):
super(AttrDict, self).__setattr__("_search", search)
super(AttrDict, self).__setattr__("_doc_class", doc_class)
super(UpdateByQueryResponse, self).__init__(response)
def success(self):
return not self.timed_out and not self.failures
@@ -0,0 +1,31 @@
# SPDX-License-Identifier: Apache-2.0
#
# The OpenSearch Contributors require contributions made to
# this file be licensed under the Apache-2.0 license or a
# compatible open source license.
#
# Modifications Copyright OpenSearch Contributors. See
# GitHub history for details.
#
# Licensed to Elasticsearch B.V. under one or more contributor
# license agreements. See the NOTICE file distributed with
# this work for additional information regarding copyright
# ownership. Elasticsearch B.V. licenses this file to you under
# the Apache License, Version 2.0 (the "License"); you may
# not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing,
# software distributed under the License is distributed on an
# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
# KIND, either express or implied. See the License for the
# specific language governing permissions and limitations
# under the License.
from ..utils import AttrDict
class Response(AttrDict): ...
class AggResponse(AttrDict): ...
class UpdateByQueryResponse(AttrDict): ...
+89
View File
@@ -0,0 +1,89 @@
# SPDX-License-Identifier: Apache-2.0
#
# The OpenSearch Contributors require contributions made to
# this file be licensed under the Apache-2.0 license or a
# compatible open source license.
#
# Modifications Copyright OpenSearch Contributors. See
# GitHub history for details.
#
# Licensed to Elasticsearch B.V. under one or more contributor
# license agreements. See the NOTICE file distributed with
# this work for additional information regarding copyright
# ownership. Elasticsearch B.V. licenses this file to you under
# the Apache License, Version 2.0 (the "License"); you may
# not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing,
# software distributed under the License is distributed on an
# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
# KIND, either express or implied. See the License for the
# specific language governing permissions and limitations
# under the License.
from ..utils import AttrDict, AttrList
from . import AggResponse, Response
class Bucket(AggResponse):
def __init__(self, aggs, search, data, field=None):
super(Bucket, self).__init__(aggs, search, data)
class FieldBucket(Bucket):
def __init__(self, aggs, search, data, field=None):
if field:
data["key"] = field.deserialize(data["key"])
super(FieldBucket, self).__init__(aggs, search, data, field)
class BucketData(AggResponse):
_bucket_class = Bucket
def _wrap_bucket(self, data):
return self._bucket_class(
self._meta["aggs"],
self._meta["search"],
data,
field=self._meta.get("field"),
)
def __iter__(self):
return iter(self.buckets)
def __len__(self):
return len(self.buckets)
def __getitem__(self, key):
if isinstance(key, (int, slice)):
return self.buckets[key]
return super(BucketData, self).__getitem__(key)
@property
def buckets(self):
if not hasattr(self, "_buckets"):
field = getattr(self._meta["aggs"], "field", None)
if field:
self._meta["field"] = self._meta["search"]._resolve_field(field)
bs = self._d_["buckets"]
if isinstance(bs, list):
bs = AttrList(bs, obj_wrapper=self._wrap_bucket)
else:
bs = AttrDict({k: self._wrap_bucket(bs[k]) for k in bs})
super(AttrDict, self).__setattr__("_buckets", bs)
return self._buckets
class FieldBucketData(BucketData):
_bucket_class = FieldBucket
class TopHitsData(Response):
def __init__(self, agg, search, data):
super(AttrDict, self).__setattr__(
"meta", AttrDict({"agg": agg, "search": search})
)
super(TopHitsData, self).__init__(search, data)
+34
View File
@@ -0,0 +1,34 @@
# SPDX-License-Identifier: Apache-2.0
#
# The OpenSearch Contributors require contributions made to
# this file be licensed under the Apache-2.0 license or a
# compatible open source license.
#
# Modifications Copyright OpenSearch Contributors. See
# GitHub history for details.
#
# Licensed to Elasticsearch B.V. under one or more contributor
# license agreements. See the NOTICE file distributed with
# this work for additional information regarding copyright
# ownership. Elasticsearch B.V. licenses this file to you under
# the Apache License, Version 2.0 (the "License"); you may
# not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing,
# software distributed under the License is distributed on an
# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
# KIND, either express or implied. See the License for the
# specific language governing permissions and limitations
# under the License.
from . import AggResponse as AggResponse
from . import Response as Response
class Bucket(AggResponse): ...
class FieldBucket(Bucket): ...
class BucketData(AggResponse): ...
class FieldBucketData(BucketData): ...
class TopHitsData(Response): ...
+60
View File
@@ -0,0 +1,60 @@
# SPDX-License-Identifier: Apache-2.0
#
# The OpenSearch Contributors require contributions made to
# this file be licensed under the Apache-2.0 license or a
# compatible open source license.
#
# Modifications Copyright OpenSearch Contributors. See
# GitHub history for details.
#
# Licensed to Elasticsearch B.V. under one or more contributor
# license agreements. See the NOTICE file distributed with
# this work for additional information regarding copyright
# ownership. Elasticsearch B.V. licenses this file to you under
# the Apache License, Version 2.0 (the "License"); you may
# not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing,
# software distributed under the License is distributed on an
# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
# KIND, either express or implied. See the License for the
# specific language governing permissions and limitations
# under the License.
from ..utils import AttrDict, HitMeta
class Hit(AttrDict):
def __init__(self, document):
data = {}
if "_source" in document:
data = document["_source"]
if "fields" in document:
data.update(document["fields"])
super(Hit, self).__init__(data)
# assign meta as attribute and not as key in self._d_
super(AttrDict, self).__setattr__("meta", HitMeta(document))
def __getstate__(self):
# add self.meta since it is not in self.__dict__
return super(Hit, self).__getstate__() + (self.meta,)
def __setstate__(self, state):
super(AttrDict, self).__setattr__("meta", state[-1])
super(Hit, self).__setstate__(state[:-1])
def __dir__(self):
# be sure to expose meta in dir(self)
return super(Hit, self).__dir__() + ["meta"]
def __repr__(self):
return "<Hit({}): {}>".format(
"/".join(
getattr(self.meta, key) for key in ("index", "id") if key in self.meta
),
super(Hit, self).__repr__(),
)
+29
View File
@@ -0,0 +1,29 @@
# SPDX-License-Identifier: Apache-2.0
#
# The OpenSearch Contributors require contributions made to
# this file be licensed under the Apache-2.0 license or a
# compatible open source license.
#
# Modifications Copyright OpenSearch Contributors. See
# GitHub history for details.
#
# Licensed to Elasticsearch B.V. under one or more contributor
# license agreements. See the NOTICE file distributed with
# this work for additional information regarding copyright
# ownership. Elasticsearch B.V. licenses this file to you under
# the Apache License, Version 2.0 (the "License"); you may
# not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing,
# software distributed under the License is distributed on an
# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
# KIND, either express or implied. See the License for the
# specific language governing permissions and limitations
# under the License.
from ..utils import AttrDict
class Hit(AttrDict): ...
+827
View File
@@ -0,0 +1,827 @@
# SPDX-License-Identifier: Apache-2.0
#
# The OpenSearch Contributors require contributions made to
# this file be licensed under the Apache-2.0 license or a
# compatible open source license.
#
# Modifications Copyright OpenSearch Contributors. See
# GitHub history for details.
#
# Licensed to Elasticsearch B.V. under one or more contributor
# license agreements. See the NOTICE file distributed with
# this work for additional information regarding copyright
# ownership. Elasticsearch B.V. licenses this file to you under
# the Apache License, Version 2.0 (the "License"); you may
# not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing,
# software distributed under the License is distributed on an
# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
# KIND, either express or implied. See the License for the
# specific language governing permissions and limitations
# under the License.
import copy
try:
import collections.abc as collections_abc # only works on python 3.3+
except ImportError:
import collections as collections_abc
from six import iteritems, string_types
from opensearchpy.connection.connections import get_connection
from opensearchpy.exceptions import TransportError
from opensearchpy.helpers import scan
from ..exceptions import IllegalOperation
from ..helpers.aggs import A, AggBase
from ..helpers.query import Bool, Q
from .response import Hit, Response
from .utils import AttrDict, DslBase, recursive_to_dict
class QueryProxy(object):
"""
Simple proxy around DSL objects (queries) that can be called
(to add query/post_filter) and also allows attribute access which is proxied to
the wrapped query.
"""
def __init__(self, search, attr_name):
self._search = search
self._proxied = None
self._attr_name = attr_name
def __nonzero__(self):
return self._proxied is not None
__bool__ = __nonzero__
def __call__(self, *args, **kwargs):
s = self._search._clone()
# we cannot use self._proxied since we just cloned self._search and
# need to access the new self on the clone
proxied = getattr(s, self._attr_name)
if proxied._proxied is None:
proxied._proxied = Q(*args, **kwargs)
else:
proxied._proxied &= Q(*args, **kwargs)
# always return search to be chainable
return s
def __getattr__(self, attr_name):
return getattr(self._proxied, attr_name)
def __setattr__(self, attr_name, value):
if not attr_name.startswith("_"):
self._proxied = Q(self._proxied.to_dict())
setattr(self._proxied, attr_name, value)
super(QueryProxy, self).__setattr__(attr_name, value)
def __getstate__(self):
return self._search, self._proxied, self._attr_name
def __setstate__(self, state):
self._search, self._proxied, self._attr_name = state
class ProxyDescriptor(object):
"""
Simple descriptor to enable setting of queries and filters as:
s = Search()
s.query = Q(...)
"""
def __init__(self, name):
self._attr_name = "_%s_proxy" % name
def __get__(self, instance, owner):
return getattr(instance, self._attr_name)
def __set__(self, instance, value):
proxy = getattr(instance, self._attr_name)
proxy._proxied = Q(value)
class AggsProxy(AggBase, DslBase):
name = "aggs"
def __init__(self, search):
self._base = self
self._search = search
self._params = {"aggs": {}}
def to_dict(self):
return super(AggsProxy, self).to_dict().get("aggs", {})
class Request(object):
def __init__(self, using="default", index=None, doc_type=None, extra=None):
self._using = using
self._index = None
if isinstance(index, (tuple, list)):
self._index = list(index)
elif index:
self._index = [index]
self._doc_type = []
self._doc_type_map = {}
if isinstance(doc_type, (tuple, list)):
self._doc_type.extend(doc_type)
elif isinstance(doc_type, collections_abc.Mapping):
self._doc_type.extend(doc_type.keys())
self._doc_type_map.update(doc_type)
elif doc_type:
self._doc_type.append(doc_type)
self._params = {}
self._extra = extra or {}
def __eq__(self, other):
return (
isinstance(other, Request)
and other._params == self._params
and other._index == self._index
and other._doc_type == self._doc_type
and other.to_dict() == self.to_dict()
)
def __copy__(self):
return self._clone()
def params(self, **kwargs):
"""
Specify query params to be used when executing the search. All the
keyword arguments will override the current values.
Example::
s = Search()
s = s.params(routing='user-1', preference='local')
"""
s = self._clone()
s._params.update(kwargs)
return s
def index(self, *index):
"""
Set the index for the search. If called empty it will remove all information.
Example:
s = Search()
s = s.index('twitter-2015.01.01', 'twitter-2015.01.02')
s = s.index(['twitter-2015.01.01', 'twitter-2015.01.02'])
"""
# .index() resets
s = self._clone()
if not index:
s._index = None
else:
indexes = []
for i in index:
if isinstance(i, string_types):
indexes.append(i)
elif isinstance(i, list):
indexes += i
elif isinstance(i, tuple):
indexes += list(i)
s._index = (self._index or []) + indexes
return s
def _resolve_field(self, path):
for dt in self._doc_type:
if not hasattr(dt, "_index"):
continue
field = dt._index.resolve_field(path)
if field is not None:
return field
def _resolve_nested(self, hit, parent_class=None):
doc_class = Hit
nested_path = []
nesting = hit["_nested"]
while nesting and "field" in nesting:
nested_path.append(nesting["field"])
nesting = nesting.get("_nested")
nested_path = ".".join(nested_path)
if hasattr(parent_class, "_index"):
nested_field = parent_class._index.resolve_field(nested_path)
else:
nested_field = self._resolve_field(nested_path)
if nested_field is not None:
return nested_field._doc_class
return doc_class
def _get_result(self, hit, parent_class=None):
doc_class = Hit
dt = hit.get("_type")
if "_nested" in hit:
doc_class = self._resolve_nested(hit, parent_class)
elif dt in self._doc_type_map:
doc_class = self._doc_type_map[dt]
else:
for doc_type in self._doc_type:
if hasattr(doc_type, "_matches") and doc_type._matches(hit):
doc_class = doc_type
break
for t in hit.get("inner_hits", ()):
hit["inner_hits"][t] = Response(
self, hit["inner_hits"][t], doc_class=doc_class
)
callback = getattr(doc_class, "from_opensearch", doc_class)
return callback(hit)
def doc_type(self, *doc_type, **kwargs):
"""
Set the type to search through. You can supply a single value or
multiple. Values can be strings or subclasses of ``Document``.
You can also pass in any keyword arguments, mapping a doc_type to a
callback that should be used instead of the Hit class.
If no doc_type is supplied any information stored on the instance will
be erased.
Example:
s = Search().doc_type('product', 'store', User, custom=my_callback)
"""
# .doc_type() resets
s = self._clone()
if not doc_type and not kwargs:
s._doc_type = []
s._doc_type_map = {}
else:
s._doc_type.extend(doc_type)
s._doc_type.extend(kwargs.keys())
s._doc_type_map.update(kwargs)
return s
def using(self, client):
"""
Associate the search request with an opensearch client. A fresh copy
will be returned with current instance remaining unchanged.
:arg client: an instance of ``opensearchpy.OpenSearch`` to use or
an alias to look up in ``opensearchpy.connections``
"""
s = self._clone()
s._using = client
return s
def extra(self, **kwargs):
"""
Add extra keys to the request body. Mostly here for backwards
compatibility.
"""
s = self._clone()
if "from_" in kwargs:
kwargs["from"] = kwargs.pop("from_")
s._extra.update(kwargs)
return s
def _clone(self):
s = self.__class__(
using=self._using, index=self._index, doc_type=self._doc_type
)
s._doc_type_map = self._doc_type_map.copy()
s._extra = self._extra.copy()
s._params = self._params.copy()
return s
class Search(Request):
query = ProxyDescriptor("query")
post_filter = ProxyDescriptor("post_filter")
def __init__(self, **kwargs):
"""
Search request to opensearch.
:arg using: `OpenSearch` instance to use
:arg index: limit the search to index
:arg doc_type: only query this type.
All the parameters supplied (or omitted) at creation type can be later
overridden by methods (`using`, `index` and `doc_type` respectively).
"""
super(Search, self).__init__(**kwargs)
self.aggs = AggsProxy(self)
self._sort = []
self._source = None
self._highlight = {}
self._highlight_opts = {}
self._suggest = {}
self._script_fields = {}
self._response_class = Response
self._query_proxy = QueryProxy(self, "query")
self._post_filter_proxy = QueryProxy(self, "post_filter")
def filter(self, *args, **kwargs):
return self.query(Bool(filter=[Q(*args, **kwargs)]))
def exclude(self, *args, **kwargs):
return self.query(Bool(filter=[~Q(*args, **kwargs)]))
def __iter__(self):
"""
Iterate over the hits.
"""
return iter(self.execute())
def __getitem__(self, n):
"""
Support slicing the `Search` instance for pagination.
Slicing equates to the from/size parameters. E.g.::
s = Search().query(...)[0:25]
is equivalent to::
s = Search().query(...).extra(from_=0, size=25)
"""
s = self._clone()
if isinstance(n, slice):
# If negative slicing, abort.
if n.start and n.start < 0 or n.stop and n.stop < 0:
raise ValueError("Search does not support negative slicing.")
# OpenSearch won't get all results so we default to size: 10 if
# stop not given.
s._extra["from"] = n.start or 0
s._extra["size"] = max(
0, n.stop - (n.start or 0) if n.stop is not None else 10
)
return s
else: # This is an index lookup, equivalent to slicing by [n:n+1].
# If negative index, abort.
if n < 0:
raise ValueError("Search does not support negative indexing.")
s._extra["from"] = n
s._extra["size"] = 1
return s
@classmethod
def from_dict(cls, d):
"""
Construct a new `Search` instance from a raw dict containing the search
body. Useful when migrating from raw dictionaries.
Example::
s = Search.from_dict({
"query": {
"bool": {
"must": [...]
}
},
"aggs": {...}
})
s = s.filter('term', published=True)
"""
s = cls()
s.update_from_dict(d)
return s
def _clone(self):
"""
Return a clone of the current search request. Performs a shallow copy
of all the underlying objects. Used internally by most state modifying
APIs.
"""
s = super(Search, self)._clone()
s._response_class = self._response_class
s._sort = self._sort[:]
s._source = copy.copy(self._source) if self._source is not None else None
s._highlight = self._highlight.copy()
s._highlight_opts = self._highlight_opts.copy()
s._suggest = self._suggest.copy()
s._script_fields = self._script_fields.copy()
for x in ("query", "post_filter"):
getattr(s, x)._proxied = getattr(self, x)._proxied
# copy top-level bucket definitions
if self.aggs._params.get("aggs"):
s.aggs._params = {"aggs": self.aggs._params["aggs"].copy()}
return s
def response_class(self, cls):
"""
Override the default wrapper used for the response.
"""
s = self._clone()
s._response_class = cls
return s
def update_from_dict(self, d):
"""
Apply options from a serialized body to the current instance. Modifies
the object in-place. Used mostly by ``from_dict``.
"""
d = d.copy()
if "query" in d:
self.query._proxied = Q(d.pop("query"))
if "post_filter" in d:
self.post_filter._proxied = Q(d.pop("post_filter"))
aggs = d.pop("aggs", d.pop("aggregations", {}))
if aggs:
self.aggs._params = {
"aggs": {name: A(value) for (name, value) in iteritems(aggs)}
}
if "sort" in d:
self._sort = d.pop("sort")
if "_source" in d:
self._source = d.pop("_source")
if "highlight" in d:
high = d.pop("highlight").copy()
self._highlight = high.pop("fields")
self._highlight_opts = high
if "suggest" in d:
self._suggest = d.pop("suggest")
if "text" in self._suggest:
text = self._suggest.pop("text")
for s in self._suggest.values():
s.setdefault("text", text)
if "script_fields" in d:
self._script_fields = d.pop("script_fields")
self._extra.update(d)
return self
def script_fields(self, **kwargs):
"""
Define script fields to be calculated on hits.
Example::
s = Search()
s = s.script_fields(times_two="doc['field'].value * 2")
s = s.script_fields(
times_three={
'script': {
'lang': 'painless',
'source': "doc['field'].value * params.n",
'params': {'n': 3}
}
}
)
"""
s = self._clone()
for name in kwargs:
if isinstance(kwargs[name], string_types):
kwargs[name] = {"script": kwargs[name]}
s._script_fields.update(kwargs)
return s
def source(self, fields=None, **kwargs):
"""
Selectively control how the _source field is returned.
:arg fields: wildcard string, array of wildcards, or dictionary of includes and excludes
If ``fields`` is None, the entire document will be returned for
each hit. If fields is a dictionary with keys of 'includes' and/or
'excludes' the fields will be either included or excluded appropriately.
Calling this multiple times with the same named parameter will override the
previous values with the new ones.
Example::
s = Search()
s = s.source(includes=['obj1.*'], excludes=["*.description"])
s = Search()
s = s.source(includes=['obj1.*']).source(excludes=["*.description"])
"""
s = self._clone()
if fields and kwargs:
raise ValueError("You cannot specify fields and kwargs at the same time.")
if fields is not None:
s._source = fields
return s
if kwargs and not isinstance(s._source, dict):
s._source = {}
for key, value in kwargs.items():
if value is None:
try:
del s._source[key]
except KeyError:
pass
else:
s._source[key] = value
return s
def sort(self, *keys):
"""
Add sorting information to the search request. If called without
arguments it will remove all sort requirements. Otherwise it will
replace them. Acceptable arguments are::
'some.field'
'-some.other.field'
{'different.field': {'any': 'dict'}}
so for example::
s = Search().sort(
'category',
'-title',
{"price" : {"order" : "asc", "mode" : "avg"}}
)
will sort by ``category``, ``title`` (in descending order) and
``price`` in ascending order using the ``avg`` mode.
The API returns a copy of the Search object and can thus be chained.
"""
s = self._clone()
s._sort = []
for k in keys:
if isinstance(k, string_types) and k.startswith("-"):
if k[1:] == "_score":
raise IllegalOperation("Sorting by `-_score` is not allowed.")
k = {k[1:]: {"order": "desc"}}
s._sort.append(k)
return s
def highlight_options(self, **kwargs):
"""
Update the global highlighting options used for this request. For
example::
s = Search()
s = s.highlight_options(order='score')
"""
s = self._clone()
s._highlight_opts.update(kwargs)
return s
def highlight(self, *fields, **kwargs):
"""
Request highlighting of some fields. All keyword arguments passed in will be
used as parameters for all the fields in the ``fields`` parameter. Example::
Search().highlight('title', 'body', fragment_size=50)
will produce the equivalent of::
{
"highlight": {
"fields": {
"body": {"fragment_size": 50},
"title": {"fragment_size": 50}
}
}
}
If you want to have different options for different fields
you can call ``highlight`` twice::
Search().highlight('title', fragment_size=50).highlight('body', fragment_size=100)
which will produce::
{
"highlight": {
"fields": {
"body": {"fragment_size": 100},
"title": {"fragment_size": 50}
}
}
}
"""
s = self._clone()
for f in fields:
s._highlight[f] = kwargs
return s
def suggest(self, name, text, **kwargs):
"""
Add a suggestions request to the search.
:arg name: name of the suggestion
:arg text: text to suggest on
All keyword arguments will be added to the suggestions body. For example::
s = Search()
s = s.suggest('suggestion-1', 'OpenSearch', term={'field': 'body'})
"""
s = self._clone()
s._suggest[name] = {"text": text}
s._suggest[name].update(kwargs)
return s
def to_dict(self, count=False, **kwargs):
"""
Serialize the search into the dictionary that will be sent over as the
request's body.
:arg count: a flag to specify if we are interested in a body for count -
no aggregations, no pagination bounds etc.
All additional keyword arguments will be included into the dictionary.
"""
d = {}
if self.query:
d["query"] = self.query.to_dict()
# count request doesn't care for sorting and other things
if not count:
if self.post_filter:
d["post_filter"] = self.post_filter.to_dict()
if self.aggs.aggs:
d.update(self.aggs.to_dict())
if self._sort:
d["sort"] = self._sort
d.update(recursive_to_dict(self._extra))
if self._source not in (None, {}):
d["_source"] = self._source
if self._highlight:
d["highlight"] = {"fields": self._highlight}
d["highlight"].update(self._highlight_opts)
if self._suggest:
d["suggest"] = self._suggest
if self._script_fields:
d["script_fields"] = self._script_fields
d.update(recursive_to_dict(kwargs))
return d
def count(self):
"""
Return the number of hits matching the query and filters. Note that
only the actual number is returned.
"""
if hasattr(self, "_response") and self._response.hits.total.relation == "eq":
return self._response.hits.total.value
opensearch = get_connection(self._using)
d = self.to_dict(count=True)
# TODO: failed shards detection
return opensearch.count(index=self._index, body=d, **self._params)["count"]
def execute(self, ignore_cache=False):
"""
Execute the search and return an instance of ``Response`` wrapping all
the data.
:arg ignore_cache: if set to ``True``, consecutive calls will hit
OpenSearch, while cached result will be ignored. Defaults to `False`
"""
if ignore_cache or not hasattr(self, "_response"):
opensearch = get_connection(self._using)
self._response = self._response_class(
self,
opensearch.search(
index=self._index, body=self.to_dict(), **self._params
),
)
return self._response
def scan(self):
"""
Turn the search into a scan search and return a generator that will
iterate over all the documents matching the query.
Use ``params`` method to specify any additional arguments you with to
pass to the underlying ``scan`` helper from ``opensearchpy``
"""
opensearch = get_connection(self._using)
for hit in scan(
opensearch, query=self.to_dict(), index=self._index, **self._params
):
yield self._get_result(hit)
def delete(self):
"""
delete() executes the query by delegating to delete_by_query()
"""
opensearch = get_connection(self._using)
return AttrDict(
opensearch.delete_by_query(
index=self._index, body=self.to_dict(), **self._params
)
)
class MultiSearch(Request):
"""
Combine multiple :class:`~opensearchpy.Search` objects into a single
request.
"""
def __init__(self, **kwargs):
super(MultiSearch, self).__init__(**kwargs)
self._searches = []
def __getitem__(self, key):
return self._searches[key]
def __iter__(self):
return iter(self._searches)
def _clone(self):
ms = super(MultiSearch, self)._clone()
ms._searches = self._searches[:]
return ms
def add(self, search):
"""
Adds a new :class:`~opensearchpy.Search` object to the request::
ms = MultiSearch(index='my-index')
ms = ms.add(Search(doc_type=Category).filter('term', category='python'))
ms = ms.add(Search(doc_type=Blog))
"""
ms = self._clone()
ms._searches.append(search)
return ms
def to_dict(self):
out = []
for s in self._searches:
meta = {}
if s._index:
meta["index"] = s._index
meta.update(s._params)
out.append(meta)
out.append(s.to_dict())
return out
def execute(self, ignore_cache=False, raise_on_error=True):
"""
Execute the multi search request and return a list of search results.
"""
if ignore_cache or not hasattr(self, "_response"):
opensearch = get_connection(self._using)
responses = opensearch.msearch(
index=self._index, body=self.to_dict(), **self._params
)
out = []
for s, r in zip(self._searches, responses["responses"]):
if r.get("error", False):
if raise_on_error:
raise TransportError("N/A", r["error"]["type"], r["error"])
r = None
else:
r = Response(s, r)
out.append(r)
self._response = out
return self._response
+35
View File
@@ -0,0 +1,35 @@
# SPDX-License-Identifier: Apache-2.0
#
# The OpenSearch Contributors require contributions made to
# this file be licensed under the Apache-2.0 license or a
# compatible open source license.
#
# Modifications Copyright OpenSearch Contributors. See
# GitHub history for details.
#
# Licensed to Elasticsearch B.V. under one or more contributor
# license agreements. See the NOTICE file distributed with
# this work for additional information regarding copyright
# ownership. Elasticsearch B.V. licenses this file to you under
# the Apache License, Version 2.0 (the "License"); you may
# not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing,
# software distributed under the License is distributed on an
# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
# KIND, either express or implied. See the License for the
# specific language governing permissions and limitations
# under the License.
from .aggs import AggBase
from .utils import DslBase
class QueryProxy(object): ...
class ProxyDescriptor(object): ...
class AggsProxy(AggBase, DslBase): ...
class Request(object): ...
class Search(Request): ...
class MultiSearch(Request): ...
+169
View File
@@ -0,0 +1,169 @@
# SPDX-License-Identifier: Apache-2.0
#
# The OpenSearch Contributors require contributions made to
# this file be licensed under the Apache-2.0 license or a
# compatible open source license.
#
# Modifications Copyright OpenSearch Contributors. See
# GitHub history for details.
#
# Licensed to Elasticsearch B.V. under one or more contributor
# license agreements. See the NOTICE file distributed with
# this work for additional information regarding copyright
# ownership. Elasticsearch B.V. licenses this file to you under
# the Apache License, Version 2.0 (the "License"); you may
# not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing,
# software distributed under the License is distributed on an
# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
# KIND, either express or implied. See the License for the
# specific language governing permissions and limitations
# under the License.
from opensearchpy.connection.connections import get_connection
from ..helpers.query import Bool, Q
from ..helpers.search import ProxyDescriptor, QueryProxy, Request
from .response import UpdateByQueryResponse
from .utils import recursive_to_dict
class UpdateByQuery(Request):
query = ProxyDescriptor("query")
def __init__(self, **kwargs):
"""
Update by query request to opensearch.
:arg using: `OpenSearch` instance to use
:arg index: limit the search to index
:arg doc_type: only query this type.
All the parameters supplied (or omitted) at creation type can be later
overridden by methods (`using`, `index` and `doc_type` respectively).
"""
super(UpdateByQuery, self).__init__(**kwargs)
self._response_class = UpdateByQueryResponse
self._script = {}
self._query_proxy = QueryProxy(self, "query")
def filter(self, *args, **kwargs):
return self.query(Bool(filter=[Q(*args, **kwargs)]))
def exclude(self, *args, **kwargs):
return self.query(Bool(filter=[~Q(*args, **kwargs)]))
@classmethod
def from_dict(cls, d):
"""
Construct a new `UpdateByQuery` instance from a raw dict containing the search
body. Useful when migrating from raw dictionaries.
Example::
ubq = UpdateByQuery.from_dict({
"query": {
"bool": {
"must": [...]
}
},
"script": {...}
})
ubq = ubq.filter('term', published=True)
"""
u = cls()
u.update_from_dict(d)
return u
def _clone(self):
"""
Return a clone of the current search request. Performs a shallow copy
of all the underlying objects. Used internally by most state modifying
APIs.
"""
ubq = super(UpdateByQuery, self)._clone()
ubq._response_class = self._response_class
ubq._script = self._script.copy()
ubq.query._proxied = self.query._proxied
return ubq
def response_class(self, cls):
"""
Override the default wrapper used for the response.
"""
ubq = self._clone()
ubq._response_class = cls
return ubq
def update_from_dict(self, d):
"""
Apply options from a serialized body to the current instance. Modifies
the object in-place. Used mostly by ``from_dict``.
"""
d = d.copy()
if "query" in d:
self.query._proxied = Q(d.pop("query"))
if "script" in d:
self._script = d.pop("script")
self._extra.update(d)
return self
def script(self, **kwargs):
"""
Define update action to take:
Note: the API only accepts a single script, so
calling the script multiple times will overwrite.
Example::
ubq = Search()
ubq = ubq.script(source="ctx._source.likes++"")
ubq = ubq.script(source="ctx._source.likes += params.f"",
lang="expression",
params={'f': 3})
"""
ubq = self._clone()
if ubq._script:
ubq._script = {}
ubq._script.update(kwargs)
return ubq
def to_dict(self, **kwargs):
"""
Serialize the search into the dictionary that will be sent over as the
request'ubq body.
All additional keyword arguments will be included into the dictionary.
"""
d = {}
if self.query:
d["query"] = self.query.to_dict()
if self._script:
d["script"] = self._script
d.update(recursive_to_dict(self._extra))
d.update(recursive_to_dict(kwargs))
return d
def execute(self):
"""
Execute the search and return an instance of ``Response`` wrapping all
the data.
"""
opensearch = get_connection(self._using)
self._response = self._response_class(
self,
opensearch.update_by_query(
index=self._index, body=self.to_dict(), **self._params
),
)
return self._response
+29
View File
@@ -0,0 +1,29 @@
# SPDX-License-Identifier: Apache-2.0
#
# The OpenSearch Contributors require contributions made to
# this file be licensed under the Apache-2.0 license or a
# compatible open source license.
#
# Modifications Copyright OpenSearch Contributors. See
# GitHub history for details.
#
# Licensed to Elasticsearch B.V. under one or more contributor
# license agreements. See the NOTICE file distributed with
# this work for additional information regarding copyright
# ownership. Elasticsearch B.V. licenses this file to you under
# the Apache License, Version 2.0 (the "License"); you may
# not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing,
# software distributed under the License is distributed on an
# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
# KIND, either express or implied. See the License for the
# specific language governing permissions and limitations
# under the License.
from .search import Request
class UpdateByQuery(Request): ...
+601
View File
@@ -0,0 +1,601 @@
# SPDX-License-Identifier: Apache-2.0
#
# The OpenSearch Contributors require contributions made to
# this file be licensed under the Apache-2.0 license or a
# compatible open source license.
#
# Modifications Copyright OpenSearch Contributors. See
# GitHub history for details.
#
# Licensed to Elasticsearch B.V. under one or more contributor
# license agreements. See the NOTICE file distributed with
# this work for additional information regarding copyright
# ownership. Elasticsearch B.V. licenses this file to you under
# the Apache License, Version 2.0 (the "License"); you may
# not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing,
# software distributed under the License is distributed on an
# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
# KIND, either express or implied. See the License for the
# specific language governing permissions and limitations
# under the License.
from __future__ import unicode_literals
try:
import collections.abc as collections_abc # only works on python 3.3+
except ImportError:
import collections as collections_abc
from copy import copy
from six import add_metaclass, iteritems
from six.moves import map
from opensearchpy.exceptions import UnknownDslObject, ValidationException
SKIP_VALUES = ("", None)
EXPAND__TO_DOT = True
DOC_META_FIELDS = frozenset(
(
"id",
"routing",
)
)
META_FIELDS = frozenset(
(
# OpenSearch metadata fields, except 'type'
"index",
"using",
"score",
"version",
"seq_no",
"primary_term",
)
).union(DOC_META_FIELDS)
def _wrap(val, obj_wrapper=None):
if isinstance(val, collections_abc.Mapping):
return AttrDict(val) if obj_wrapper is None else obj_wrapper(val)
if isinstance(val, list):
return AttrList(val)
return val
class AttrList(object):
def __init__(self, p, obj_wrapper=None):
# make iterables into lists
if not isinstance(p, list):
p = list(p)
self._l_ = p
self._obj_wrapper = obj_wrapper
def __repr__(self):
return repr(self._l_)
def __eq__(self, other):
if isinstance(other, AttrList):
return other._l_ == self._l_
# make sure we still equal to a dict with the same data
return other == self._l_
def __ne__(self, other):
return not self == other
def __getitem__(self, k):
p = self._l_[k]
if isinstance(k, slice):
return AttrList(p, obj_wrapper=self._obj_wrapper)
return _wrap(p, self._obj_wrapper)
def __setitem__(self, k, value):
self._l_[k] = value
def __iter__(self):
return map(lambda i: _wrap(i, self._obj_wrapper), self._l_)
def __len__(self):
return len(self._l_)
def __nonzero__(self):
return bool(self._l_)
__bool__ = __nonzero__
def __getattr__(self, name):
return getattr(self._l_, name)
def __getstate__(self):
return self._l_, self._obj_wrapper
def __setstate__(self, state):
self._l_, self._obj_wrapper = state
class AttrDict(object):
"""
Helper class to provide attribute like access (read and write) to
dictionaries. Used to provide a convenient way to access both results and
nested dsl dicts.
"""
def __init__(self, d):
# assign the inner dict manually to prevent __setattr__ from firing
super(AttrDict, self).__setattr__("_d_", d)
def __contains__(self, key):
return key in self._d_
def __nonzero__(self):
return bool(self._d_)
__bool__ = __nonzero__
def __dir__(self):
# introspection for auto-complete in IPython etc
return list(self._d_.keys())
def __eq__(self, other):
if isinstance(other, AttrDict):
return other._d_ == self._d_
# make sure we still equal to a dict with the same data
return other == self._d_
def __ne__(self, other):
return not self == other
def __repr__(self):
r = repr(self._d_)
if len(r) > 60:
r = r[:60] + "...}"
return r
def __getstate__(self):
return (self._d_,)
def __setstate__(self, state):
super(AttrDict, self).__setattr__("_d_", state[0])
def __getattr__(self, attr_name):
try:
return self.__getitem__(attr_name)
except KeyError:
raise AttributeError(
"{!r} object has no attribute {!r}".format(
self.__class__.__name__, attr_name
)
)
def get(self, key, default=None):
try:
return self.__getattr__(key)
except AttributeError:
if default is not None:
return default
raise
def __delattr__(self, attr_name):
try:
del self._d_[attr_name]
except KeyError:
raise AttributeError(
"{!r} object has no attribute {!r}".format(
self.__class__.__name__, attr_name
)
)
def __getitem__(self, key):
return _wrap(self._d_[key])
def __setitem__(self, key, value):
self._d_[key] = value
def __delitem__(self, key):
del self._d_[key]
def __setattr__(self, name, value):
if name in self._d_ or not hasattr(self.__class__, name):
self._d_[name] = value
else:
# there is an attribute on the class (could be property, ..) - don't add it as field
super(AttrDict, self).__setattr__(name, value)
def __iter__(self):
return iter(self._d_)
def to_dict(self):
return self._d_
class DslMeta(type):
"""
Base Metaclass for DslBase subclasses that builds a registry of all classes
for given DslBase subclass (== all the query types for the Query subclass
of DslBase).
It then uses the information from that registry (as well as `name` and
`shortcut` attributes from the base class) to construct any subclass based
on it's name.
For typical use see `QueryMeta` and `Query` in `opensearchpy.query`.
"""
_types = {}
def __init__(cls, name, bases, attrs):
super(DslMeta, cls).__init__(name, bases, attrs)
# skip for DslBase
if not hasattr(cls, "_type_shortcut"):
return
if cls.name is None:
# abstract base class, register it's shortcut
cls._types[cls._type_name] = cls._type_shortcut
# and create a registry for subclasses
if not hasattr(cls, "_classes"):
cls._classes = {}
elif cls.name not in cls._classes:
# normal class, register it
cls._classes[cls.name] = cls
@classmethod
def get_dsl_type(cls, name):
try:
return cls._types[name]
except KeyError:
raise UnknownDslObject("DSL type %s does not exist." % name)
@add_metaclass(DslMeta)
class DslBase(object):
"""
Base class for all DSL objects - queries, filters, aggregations etc. Wraps
a dictionary representing the object's json.
Provides several feature:
- attribute access to the wrapped dictionary (.field instead of ['field'])
- _clone method returning a copy of self
- to_dict method to serialize into dict (to be sent via opensearch-py)
- basic logical operators (&, | and ~) using a Bool(Filter|Query) TODO:
move into a class specific for Query/Filter
- respects the definition of the class and (de)serializes it's
attributes based on the `_param_defs` definition (for example turning
all values in the `must` attribute into Query objects)
"""
_param_defs = {}
@classmethod
def get_dsl_class(cls, name, default=None):
try:
return cls._classes[name]
except KeyError:
if default is not None:
return cls._classes[default]
raise UnknownDslObject(
"DSL class `{}` does not exist in {}.".format(name, cls._type_name)
)
def __init__(self, _expand__to_dot=EXPAND__TO_DOT, **params):
self._params = {}
for pname, pvalue in iteritems(params):
if "__" in pname and _expand__to_dot:
pname = pname.replace("__", ".")
self._setattr(pname, pvalue)
def _repr_params(self):
"""Produce a repr of all our parameters to be used in __repr__."""
return ", ".join(
"{}={!r}".format(n.replace(".", "__"), v)
for (n, v) in sorted(iteritems(self._params))
# make sure we don't include empty typed params
if "type" not in self._param_defs.get(n, {}) or v
)
def __repr__(self):
return "{}({})".format(self.__class__.__name__, self._repr_params())
def __eq__(self, other):
return isinstance(other, self.__class__) and other.to_dict() == self.to_dict()
def __ne__(self, other):
return not self == other
def __setattr__(self, name, value):
if name.startswith("_"):
return super(DslBase, self).__setattr__(name, value)
return self._setattr(name, value)
def _setattr(self, name, value):
# if this attribute has special type assigned to it...
if name in self._param_defs:
pinfo = self._param_defs[name]
if "type" in pinfo:
# get the shortcut used to construct this type (query.Q, aggs.A, etc)
shortcut = self.__class__.get_dsl_type(pinfo["type"])
# list of dict(name -> DslBase)
if pinfo.get("multi") and pinfo.get("hash"):
if not isinstance(value, (tuple, list)):
value = (value,)
value = list(
{k: shortcut(v) for (k, v) in iteritems(obj)} for obj in value
)
elif pinfo.get("multi"):
if not isinstance(value, (tuple, list)):
value = (value,)
value = list(map(shortcut, value))
# dict(name -> DslBase), make sure we pickup all the objs
elif pinfo.get("hash"):
value = {k: shortcut(v) for (k, v) in iteritems(value)}
# single value object, just convert
else:
value = shortcut(value)
self._params[name] = value
def __getattr__(self, name):
if name.startswith("_"):
raise AttributeError(
"{!r} object has no attribute {!r}".format(
self.__class__.__name__, name
)
)
value = None
try:
value = self._params[name]
except KeyError:
# compound types should never throw AttributeError and return empty
# container instead
if name in self._param_defs:
pinfo = self._param_defs[name]
if pinfo.get("multi"):
value = self._params.setdefault(name, [])
elif pinfo.get("hash"):
value = self._params.setdefault(name, {})
if value is None:
raise AttributeError(
"{!r} object has no attribute {!r}".format(
self.__class__.__name__, name
)
)
# wrap nested dicts in AttrDict for convenient access
if isinstance(value, collections_abc.Mapping):
return AttrDict(value)
return value
def to_dict(self):
"""
Serialize the DSL object to plain dict
"""
d = {}
for pname, value in iteritems(self._params):
pinfo = self._param_defs.get(pname)
# typed param
if pinfo and "type" in pinfo:
# don't serialize empty lists and dicts for typed fields
if value in ({}, []):
continue
# list of dict(name -> DslBase)
if pinfo.get("multi") and pinfo.get("hash"):
value = list(
{k: v.to_dict() for k, v in iteritems(obj)} for obj in value
)
# multi-values are serialized as list of dicts
elif pinfo.get("multi"):
value = list(map(lambda x: x.to_dict(), value))
# squash all the hash values into one dict
elif pinfo.get("hash"):
value = {k: v.to_dict() for k, v in iteritems(value)}
# serialize single values
else:
value = value.to_dict()
# serialize anything with to_dict method
elif hasattr(value, "to_dict"):
value = value.to_dict()
d[pname] = value
return {self.name: d}
def _clone(self):
c = self.__class__()
for attr in self._params:
c._params[attr] = copy(self._params[attr])
return c
class HitMeta(AttrDict):
def __init__(self, document, exclude=("_source", "_fields")):
d = {
k[1:] if k.startswith("_") else k: v
for (k, v) in iteritems(document)
if k not in exclude
}
if "type" in d:
# make sure we are consistent everywhere in python
d["doc_type"] = d.pop("type")
super(HitMeta, self).__init__(d)
class ObjectBase(AttrDict):
def __init__(self, meta=None, **kwargs):
meta = meta or {}
for k in list(kwargs):
if k.startswith("_") and k[1:] in META_FIELDS:
meta[k] = kwargs.pop(k)
super(AttrDict, self).__setattr__("meta", HitMeta(meta))
super(ObjectBase, self).__init__(kwargs)
@classmethod
def __list_fields(cls):
"""
Get all the fields defined for our class, if we have an Index, try
looking at the index mappings as well, mark the fields from Index as
optional.
"""
for name in cls._doc_type.mapping:
field = cls._doc_type.mapping[name]
yield name, field, False
if hasattr(cls.__class__, "_index"):
if not cls._index._mapping:
return
for name in cls._index._mapping:
# don't return fields that are in _doc_type
if name in cls._doc_type.mapping:
continue
field = cls._index._mapping[name]
yield name, field, True
@classmethod
def __get_field(cls, name):
try:
return cls._doc_type.mapping[name]
except KeyError:
# fallback to fields on the Index
if hasattr(cls, "_index") and cls._index._mapping:
try:
return cls._index._mapping[name]
except KeyError:
pass
@classmethod
def from_opensearch(cls, hit):
meta = hit.copy()
data = meta.pop("_source", {})
doc = cls(meta=meta)
doc._from_dict(data)
return doc
def _from_dict(self, data):
for k, v in iteritems(data):
f = self.__get_field(k)
if f and f._coerce:
v = f.deserialize(v)
setattr(self, k, v)
def __getstate__(self):
return self.to_dict(), self.meta._d_
def __setstate__(self, state):
data, meta = state
super(AttrDict, self).__setattr__("_d_", {})
super(AttrDict, self).__setattr__("meta", HitMeta(meta))
self._from_dict(data)
def __getattr__(self, name):
try:
return super(ObjectBase, self).__getattr__(name)
except AttributeError:
f = self.__get_field(name)
if hasattr(f, "empty"):
value = f.empty()
if value not in SKIP_VALUES:
setattr(self, name, value)
value = getattr(self, name)
return value
raise
def to_dict(self, skip_empty=True):
out = {}
for k, v in iteritems(self._d_):
# if this is a mapped field,
f = self.__get_field(k)
if f and f._coerce:
v = f.serialize(v)
# if someone assigned AttrList, unwrap it
if isinstance(v, AttrList):
v = v._l_
if skip_empty:
# don't serialize empty values
# careful not to include numeric zeros
if v in ([], {}, None):
continue
out[k] = v
return out
def clean_fields(self):
errors = {}
for name, field, optional in self.__list_fields():
data = self._d_.get(name, None)
if data is None and optional:
continue
try:
# save the cleaned value
data = field.clean(data)
except ValidationException as e:
errors.setdefault(name, []).append(e)
if name in self._d_ or data not in ([], {}, None):
self._d_[name] = data
if errors:
raise ValidationException(errors)
def clean(self):
pass
def full_clean(self):
self.clean_fields()
self.clean()
def merge(data, new_data, raise_on_conflict=False):
if not (
isinstance(data, (AttrDict, collections_abc.Mapping))
and isinstance(new_data, (AttrDict, collections_abc.Mapping))
):
raise ValueError(
"You can only merge two dicts! Got {!r} and {!r} instead.".format(
data, new_data
)
)
for key, value in iteritems(new_data):
if (
key in data
and isinstance(data[key], (AttrDict, collections_abc.Mapping))
and isinstance(value, (AttrDict, collections_abc.Mapping))
):
merge(data[key], value, raise_on_conflict)
elif key in data and data[key] != value and raise_on_conflict:
raise ValueError("Incompatible data for key %r, cannot be merged." % key)
else:
data[key] = value
def recursive_to_dict(data):
"""Recursively transform objects that potentially have .to_dict()
into dictionary literals by traversing AttrList, AttrDict, list,
tuple, and Mapping types.
"""
if isinstance(data, AttrList):
data = list(data._l_)
elif hasattr(data, "to_dict"):
data = data.to_dict()
if isinstance(data, (list, tuple)):
return type(data)(recursive_to_dict(inner) for inner in data)
elif isinstance(data, collections_abc.Mapping):
return {key: recursive_to_dict(val) for key, val in data.items()}
return data
+32
View File
@@ -0,0 +1,32 @@
# SPDX-License-Identifier: Apache-2.0
#
# The OpenSearch Contributors require contributions made to
# this file be licensed under the Apache-2.0 license or a
# compatible open source license.
#
# Modifications Copyright OpenSearch Contributors. See
# GitHub history for details.
#
# Licensed to Elasticsearch B.V. under one or more contributor
# license agreements. See the NOTICE file distributed with
# this work for additional information regarding copyright
# ownership. Elasticsearch B.V. licenses this file to you under
# the Apache License, Version 2.0 (the "License"); you may
# not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing,
# software distributed under the License is distributed on an
# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
# KIND, either express or implied. See the License for the
# specific language governing permissions and limitations
# under the License.
class AttrList(object): ...
class AttrDict(object): ...
class DslMeta(type): ...
class DslBase(object): ...
class HitMeta(AttrDict): ...
class ObjectBase(AttrDict): ...
+89
View File
@@ -0,0 +1,89 @@
# SPDX-License-Identifier: Apache-2.0
#
# The OpenSearch Contributors require contributions made to
# this file be licensed under the Apache-2.0 license or a
# compatible open source license.
#
# Modifications Copyright OpenSearch Contributors. See
# GitHub history for details.
#
# Licensed to Elasticsearch B.V. under one or more contributor
# license agreements. See the NOTICE file distributed with
# this work for additional information regarding copyright
# ownership. Elasticsearch B.V. licenses this file to you under
# the Apache License, Version 2.0 (the "License"); you may
# not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing,
# software distributed under the License is distributed on an
# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
# KIND, either express or implied. See the License for the
# specific language governing permissions and limitations
# under the License.
import operator
from six import iteritems, string_types
from .utils import AttrDict
__all__ = ["Range"]
class Range(AttrDict):
OPS = {
"lt": operator.lt,
"lte": operator.le,
"gt": operator.gt,
"gte": operator.ge,
}
def __init__(self, *args, **kwargs):
if args and (len(args) > 1 or kwargs or not isinstance(args[0], dict)):
raise ValueError(
"Range accepts a single dictionary or a set of keyword arguments."
)
data = args[0] if args else kwargs
for k in data:
if k not in self.OPS:
raise ValueError("Range received an unknown operator %r" % k)
if "gt" in data and "gte" in data:
raise ValueError("You cannot specify both gt and gte for Range.")
if "lt" in data and "lte" in data:
raise ValueError("You cannot specify both lt and lte for Range.")
super(Range, self).__init__(args[0] if args else kwargs)
def __repr__(self):
return "Range(%s)" % ", ".join("%s=%r" % op for op in iteritems(self._d_))
def __contains__(self, item):
if isinstance(item, string_types):
return super(Range, self).__contains__(item)
for op in self.OPS:
if op in self._d_ and not self.OPS[op](item, self._d_[op]):
return False
return True
@property
def upper(self):
if "lt" in self._d_:
return self._d_["lt"], False
if "lte" in self._d_:
return self._d_["lte"], True
return None, False
@property
def lower(self):
if "gt" in self._d_:
return self._d_["gt"], False
if "gte" in self._d_:
return self._d_["gte"], True
return None, False
+29
View File
@@ -0,0 +1,29 @@
# SPDX-License-Identifier: Apache-2.0
#
# The OpenSearch Contributors require contributions made to
# this file be licensed under the Apache-2.0 license or a
# compatible open source license.
#
# Modifications Copyright OpenSearch Contributors. See
# GitHub history for details.
#
# Licensed to Elasticsearch B.V. under one or more contributor
# license agreements. See the NOTICE file distributed with
# this work for additional information regarding copyright
# ownership. Elasticsearch B.V. licenses this file to you under
# the Apache License, Version 2.0 (the "License"); you may
# not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing,
# software distributed under the License is distributed on an
# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
# KIND, either express or implied. See the License for the
# specific language governing permissions and limitations
# under the License.
from .utils import AttrDict
class Range(AttrDict): ...
-1
View File
@@ -6,7 +6,6 @@
#
# Modifications Copyright OpenSearch Contributors. See
# GitHub history for details.
#
from typing import Any, Union
+13
View File
@@ -36,6 +36,7 @@ from decimal import Decimal
from .compat import string_types
from .exceptions import ImproperlyConfigured, SerializationError
from .helpers.utils import AttrList
INTEGER_TYPES = ()
FLOAT_TYPES = (Decimal,)
@@ -194,3 +195,15 @@ class Deserializer(object):
)
return deserializer.loads(s)
class AttrJSONSerializer(JSONSerializer):
def default(self, data):
if isinstance(data, AttrList):
return data._l_
if hasattr(data, "to_dict"):
return data.to_dict()
return super(AttrJSONSerializer, self).default(data)
serializer = AttrJSONSerializer()
+2
View File
@@ -51,3 +51,5 @@ class Deserializer(object):
default_mimetype: str = ...,
) -> None: ...
def loads(self, s: str, mimetype: Optional[str] = ...) -> Any: ...
class AttrJSONSerializer(JSONSerializer): ...
+11 -4
View File
@@ -27,6 +27,7 @@
import re
import sys
from os.path import abspath, dirname, join
from setuptools import find_packages, setup
@@ -48,21 +49,27 @@ packages = [
for package in find_packages(where=".", exclude=("test_opensearchpy*",))
if package == module_dir or package.startswith(module_dir + ".")
]
install_requires = [
"urllib3>=1.21.1, <2",
"certifi",
"requests>=2.4.0, <3.0.0",
"six",
"python-dateutil",
# ipaddress is included in stdlib since python 3.3
'ipaddress; python_version<"3.3"',
]
tests_require = [
"requests>=2.0.0, <3.0.0",
"coverage",
"coverage<7.0.0",
"mock",
"pyyaml",
"pytest",
"pytest>=3.0.0",
"pytest-cov",
"pytz",
"botocore;python_version>='3.6'",
]
if sys.version_info >= (3, 6):
tests_require.append("pytest-mock<4.0.0")
async_require = ["aiohttp>=3,<4"]
docs_require = ["sphinx", "sphinx_rtd_theme", "myst_parser", "sphinx_copybutton"]
@@ -70,7 +77,7 @@ generate_require = ["black", "jinja2"]
setup(
name=package_name,
description="Python low-level client for OpenSearch",
description="Python client for OpenSearch",
license="Apache-2.0",
url="https://github.com/opensearch-project/opensearch-py",
long_description=long_description,
-1
View File
@@ -85,7 +85,6 @@ def fetch_opensearch_repo():
def run_all(argv=None):
sys.exitfunc = lambda: sys.stderr.write("Shutting down....\n")
# fetch yaml tests anywhere that's not GitHub Actions
if "GITHUB_ACTION" not in environ:
fetch_opensearch_repo()
@@ -8,7 +8,6 @@
# Modifications Copyright OpenSearch Contributors. See
# GitHub history for details.
import sys
import pytest
@@ -34,8 +34,9 @@ import asyncio
import pytest
from mock import MagicMock, patch
from opensearchpy import TransportError, helpers
from opensearchpy.helpers import ScanError
from opensearchpy import TransportError
from opensearchpy._async import helpers
from opensearchpy.helpers import BulkIndexError, ScanError
pytestmark = pytest.mark.asyncio
@@ -135,7 +136,7 @@ class TestStreamingBulk(object):
async_client, [{"a": "b"}, {"a": "c"}], index="i", raise_on_error=True
):
assert ok
except helpers.BulkIndexError as e:
except BulkIndexError as e:
assert 2 == len(e.errors)
else:
assert False, "exception should have been raised"
@@ -346,7 +347,7 @@ class TestBulk(object):
)
await async_client.cluster.health(wait_for_status="yellow")
with pytest.raises(helpers.BulkIndexError):
with pytest.raises(BulkIndexError):
await helpers.async_bulk(async_client, [{"a": 42}, {"a": "c"}], index="i")
async def test_ignore_error_if_raised(self, async_client):
@@ -371,7 +372,7 @@ class TestBulk(object):
)
# ignore only the status code in the `ignore_status` argument
with pytest.raises(helpers.BulkIndexError):
with pytest.raises(BulkIndexError):
await helpers.async_bulk(
async_client, [{"a": 42}, {"a": "c"}], index="i", ignore_status=(444,)
)
@@ -870,7 +871,6 @@ class TestParentChildReindex:
self, async_client, parent_reindex_setup
):
await helpers.async_reindex(async_client, "test-index", "real-index")
assert {"question_answer": "question"} == (
await async_client.get(index="real-index", id=42)
)["_source"]
+77
View File
@@ -67,6 +67,11 @@ try:
except ImportError: # Old version of pytest for 2.7 and 3.5
from _pytest.monkeypatch import MonkeyPatch
from pytest import raises
from opensearchpy import OpenSearch, serializer
from opensearchpy.connection import connections
def gzip_decompress(data):
buf = gzip.GzipFile(fileobj=io.BytesIO(data), mode="rb")
@@ -999,3 +1004,75 @@ class TestConnectionHttpbin:
conn = RequestsHttpConnection("not.a.host.name")
with pytest.raises(ConnectionError):
conn.perform_request("GET", "/")
def test_default_connection_is_returned_by_default():
c = connections.Connections()
con, con2 = object(), object()
c.add_connection("default", con)
c.add_connection("not-default", con2)
assert c.get_connection() is con
def test_get_connection_created_connection_if_needed():
c = connections.Connections()
c.configure(default={"hosts": ["opensearch.com"]}, local={"hosts": ["localhost"]})
default = c.get_connection()
local = c.get_connection("local")
assert isinstance(default, OpenSearch)
assert isinstance(local, OpenSearch)
assert [{"host": "opensearch.com"}] == default.transport.hosts
assert [{"host": "localhost"}] == local.transport.hosts
def test_configure_preserves_unchanged_connections():
c = connections.Connections()
c.configure(default={"hosts": ["opensearch.com"]}, local={"hosts": ["localhost"]})
default = c.get_connection()
local = c.get_connection("local")
c.configure(
default={"hosts": ["not-opensearch.com"]}, local={"hosts": ["localhost"]}
)
new_default = c.get_connection()
new_local = c.get_connection("local")
assert new_local is local
assert new_default is not default
def test_remove_connection_removes_both_conn_and_conf():
c = connections.Connections()
c.configure(default={"hosts": ["opensearch.com"]}, local={"hosts": ["localhost"]})
c.add_connection("local2", object())
c.remove_connection("default")
c.get_connection("local2")
c.remove_connection("local2")
with raises(Exception):
c.get_connection("local2")
c.get_connection("default")
def test_create_connection_constructs_client():
c = connections.Connections()
c.create_connection("testing", hosts=["opensearch.com"])
con = c.get_connection("testing")
assert [{"host": "opensearch.com"}] == con.transport.hosts
def test_create_connection_adds_our_serializer():
c = connections.Connections()
c.create_connection("testing", hosts=["opensearch.com"])
assert c.get_connection("testing").transport.serializer is serializer.serializer
@@ -0,0 +1,25 @@
# SPDX-License-Identifier: Apache-2.0
#
# The OpenSearch Contributors require contributions made to
# this file be licensed under the Apache-2.0 license or a
# compatible open source license.
#
# Modifications Copyright OpenSearch Contributors. See
# GitHub history for details.
#
# Licensed to Elasticsearch B.V. under one or more contributor
# license agreements. See the NOTICE file distributed with
# this work for additional information regarding copyright
# ownership. Elasticsearch B.V. licenses this file to you under
# the Apache License, Version 2.0 (the "License"); you may
# not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing,
# software distributed under the License is distributed on an
# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
# KIND, either express or implied. See the License for the
# specific language governing permissions and limitations
# under the License.
+242
View File
@@ -0,0 +1,242 @@
# -*- coding: utf-8 -*-
# SPDX-License-Identifier: Apache-2.0
#
# The OpenSearch Contributors require contributions made to
# this file be licensed under the Apache-2.0 license or a
# compatible open source license.
#
# Modifications Copyright OpenSearch Contributors. See
# GitHub history for details.
#
# Licensed to Elasticsearch B.V. under one or more contributor
# license agreements. See the NOTICE file distributed with
# this work for additional information regarding copyright
# ownership. Elasticsearch B.V. licenses this file to you under
# the Apache License, Version 2.0 (the "License"); you may
# not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing,
# software distributed under the License is distributed on an
# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
# KIND, either express or implied. See the License for the
# specific language governing permissions and limitations
# under the License.
from mock import Mock
from pytest import fixture
from opensearchpy.connection.connections import add_connection, connections
@fixture
def mock_client(dummy_response):
client = Mock()
client.search.return_value = dummy_response
add_connection("mock", client)
yield client
connections._conn = {}
connections._kwargs = {}
@fixture
def dummy_response():
return {
"_shards": {"failed": 0, "successful": 10, "total": 10},
"hits": {
"hits": [
{
"_index": "test-index",
"_id": "opensearch",
"_score": 12.0,
"_source": {"city": "Amsterdam", "name": "OpenSearch"},
},
{
"_index": "test-index",
"_id": "42",
"_score": 11.123,
"_routing": "opensearch",
"_source": {
"name": {"first": "Shay", "last": "Bannon"},
"lang": "java",
"twitter": "kimchy",
},
},
{
"_index": "test-index",
"_id": "47",
"_score": 1,
"_routing": "opensearch",
"_source": {
"name": {"first": "Honza", "last": "Král"},
"lang": "python",
"twitter": "honzakral",
},
},
{
"_index": "test-index",
"_id": "53",
"_score": 16.0,
"_routing": "opensearch",
},
],
"max_score": 12.0,
"total": 123,
},
"timed_out": False,
"took": 123,
}
@fixture
def aggs_search():
from opensearchpy import Search
s = Search(index="flat-git")
s.aggs.bucket("popular_files", "terms", field="files", size=2).metric(
"line_stats", "stats", field="stats.lines"
).metric("top_commits", "top_hits", size=2, _source=["stats.*", "committed_date"])
s.aggs.bucket(
"per_month", "date_histogram", interval="month", field="info.committed_date"
)
s.aggs.metric("sum_lines", "sum", field="stats.lines")
return s
@fixture
def aggs_data():
return {
"took": 4,
"timed_out": False,
"_shards": {"total": 1, "successful": 1, "failed": 0},
"hits": {"total": 52, "hits": [], "max_score": 0.0},
"aggregations": {
"sum_lines": {"value": 25052.0},
"per_month": {
"buckets": [
{
"doc_count": 38,
"key": 1393632000000,
"key_as_string": "2014-03-01T00:00:00.000Z",
},
{
"doc_count": 11,
"key": 1396310400000,
"key_as_string": "2014-04-01T00:00:00.000Z",
},
{
"doc_count": 3,
"key": 1398902400000,
"key_as_string": "2014-05-01T00:00:00.000Z",
},
]
},
"popular_files": {
"buckets": [
{
"key": "opensearchpy",
"line_stats": {
"count": 40,
"max": 228.0,
"min": 2.0,
"sum": 2151.0,
"avg": 53.775,
},
"doc_count": 40,
"top_commits": {
"hits": {
"total": 40,
"hits": [
{
"_id": "3ca6e1e73a071a705b4babd2f581c91a2a3e5037",
"_type": "doc",
"_source": {
"stats": {
"files": 4,
"deletions": 7,
"lines": 30,
"insertions": 23,
},
"committed_date": "2014-05-02T13:47:19",
},
"_score": 1.0,
"_index": "flat-git",
},
{
"_id": "eb3e543323f189fd7b698e66295427204fff5755",
"_type": "doc",
"_source": {
"stats": {
"files": 1,
"deletions": 0,
"lines": 18,
"insertions": 18,
},
"committed_date": "2014-05-01T13:32:14",
},
"_score": 1.0,
"_index": "flat-git",
},
],
"max_score": 1.0,
}
},
},
{
"key": "test_opensearchpy/test_dsl",
"line_stats": {
"count": 35,
"max": 228.0,
"min": 2.0,
"sum": 1939.0,
"avg": 55.4,
},
"doc_count": 35,
"top_commits": {
"hits": {
"total": 35,
"hits": [
{
"_id": "3ca6e1e73a071a705b4babd2f581c91a2a3e5037",
"_type": "doc",
"_source": {
"stats": {
"files": 4,
"deletions": 7,
"lines": 30,
"insertions": 23,
},
"committed_date": "2014-05-02T13:47:19",
},
"_score": 1.0,
"_index": "flat-git",
},
{
"_id": "dd15b6ba17dd9ba16363a51f85b31f66f1fb1157",
"_type": "doc",
"_source": {
"stats": {
"files": 3,
"deletions": 18,
"lines": 62,
"insertions": 44,
},
"committed_date": "2014-05-01T13:30:44",
},
"_score": 1.0,
"_index": "flat-git",
},
],
"max_score": 1.0,
}
},
},
],
"doc_count_error_upper_bound": 0,
"sum_other_doc_count": 120,
},
},
}
@@ -35,7 +35,7 @@ import pytest
from opensearchpy import OpenSearch, helpers
from opensearchpy.serializer import JSONSerializer
from .test_cases import TestCase
from ..test_cases import TestCase
lock_side_effect = threading.Lock()
+365
View File
@@ -0,0 +1,365 @@
# SPDX-License-Identifier: Apache-2.0
#
# The OpenSearch Contributors require contributions made to
# this file be licensed under the Apache-2.0 license or a
# compatible open source license.
#
# Modifications Copyright OpenSearch Contributors. See
# GitHub history for details.
#
# Licensed to Elasticsearch B.V. under one or more contributor
# license agreements. See the NOTICE file distributed with
# this work for additional information regarding copyright
# ownership. Elasticsearch B.V. licenses this file to you under
# the Apache License, Version 2.0 (the "License"); you may
# not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing,
# software distributed under the License is distributed on an
# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
# KIND, either express or implied. See the License for the
# specific language governing permissions and limitations
# under the License.
from pytest import raises
from opensearchpy.helpers import aggs, query
def test_repr():
max_score = aggs.Max(field="score")
a = aggs.A("terms", field="tags", aggs={"max_score": max_score})
assert "Terms(aggs={'max_score': Max(field='score')}, field='tags')" == repr(a)
def test_meta():
max_score = aggs.Max(field="score")
a = aggs.A(
"terms", field="tags", aggs={"max_score": max_score}, meta={"some": "metadata"}
)
assert {
"terms": {"field": "tags"},
"aggs": {"max_score": {"max": {"field": "score"}}},
"meta": {"some": "metadata"},
} == a.to_dict()
def test_meta_from_dict():
max_score = aggs.Max(field="score")
a = aggs.A(
"terms", field="tags", aggs={"max_score": max_score}, meta={"some": "metadata"}
)
assert aggs.A(a.to_dict()) == a
def test_A_creates_proper_agg():
a = aggs.A("terms", field="tags")
assert isinstance(a, aggs.Terms)
assert a._params == {"field": "tags"}
def test_A_handles_nested_aggs_properly():
max_score = aggs.Max(field="score")
a = aggs.A("terms", field="tags", aggs={"max_score": max_score})
assert isinstance(a, aggs.Terms)
assert a._params == {"field": "tags", "aggs": {"max_score": max_score}}
def test_A_passes_aggs_through():
a = aggs.A("terms", field="tags")
assert aggs.A(a) is a
def test_A_from_dict():
d = {
"terms": {"field": "tags"},
"aggs": {"per_author": {"terms": {"field": "author.raw"}}},
}
a = aggs.A(d)
assert isinstance(a, aggs.Terms)
assert a._params == {
"field": "tags",
"aggs": {"per_author": aggs.A("terms", field="author.raw")},
}
assert a["per_author"] == aggs.A("terms", field="author.raw")
assert a.aggs.per_author == aggs.A("terms", field="author.raw")
def test_A_fails_with_incorrect_dict():
correct_d = {
"terms": {"field": "tags"},
"aggs": {"per_author": {"terms": {"field": "author.raw"}}},
}
with raises(Exception):
aggs.A(correct_d, field="f")
d = correct_d.copy()
del d["terms"]
with raises(Exception):
aggs.A(d)
d = correct_d.copy()
d["xx"] = {}
with raises(Exception):
aggs.A(d)
def test_A_fails_with_agg_and_params():
a = aggs.A("terms", field="tags")
with raises(Exception):
aggs.A(a, field="score")
def test_buckets_are_nestable():
a = aggs.Terms(field="tags")
b = a.bucket("per_author", "terms", field="author.raw")
assert isinstance(b, aggs.Terms)
assert b._params == {"field": "author.raw"}
assert a.aggs == {"per_author": b}
def test_metric_inside_buckets():
a = aggs.Terms(field="tags")
b = a.metric("max_score", "max", field="score")
# returns bucket so it's chainable
assert a is b
assert a.aggs["max_score"] == aggs.Max(field="score")
def test_buckets_equals_counts_subaggs():
a = aggs.Terms(field="tags")
a.bucket("per_author", "terms", field="author.raw")
b = aggs.Terms(field="tags")
assert a != b
def test_buckets_to_dict():
a = aggs.Terms(field="tags")
a.bucket("per_author", "terms", field="author.raw")
assert {
"terms": {"field": "tags"},
"aggs": {"per_author": {"terms": {"field": "author.raw"}}},
} == a.to_dict()
a = aggs.Terms(field="tags")
a.metric("max_score", "max", field="score")
assert {
"terms": {"field": "tags"},
"aggs": {"max_score": {"max": {"field": "score"}}},
} == a.to_dict()
def test_nested_buckets_are_reachable_as_getitem():
a = aggs.Terms(field="tags")
b = a.bucket("per_author", "terms", field="author.raw")
assert a["per_author"] is not b
assert a["per_author"] == b
def test_nested_buckets_are_settable_as_getitem():
a = aggs.Terms(field="tags")
b = a["per_author"] = aggs.A("terms", field="author.raw")
assert a.aggs["per_author"] is b
def test_filter_can_be_instantiated_using_positional_args():
a = aggs.Filter(query.Q("term", f=42))
assert {"filter": {"term": {"f": 42}}} == a.to_dict()
assert a == aggs.A("filter", query.Q("term", f=42))
def test_filter_aggregation_as_nested_agg():
a = aggs.Terms(field="tags")
a.bucket("filtered", "filter", query.Q("term", f=42))
assert {
"terms": {"field": "tags"},
"aggs": {"filtered": {"filter": {"term": {"f": 42}}}},
} == a.to_dict()
def test_filter_aggregation_with_nested_aggs():
a = aggs.Filter(query.Q("term", f=42))
a.bucket("testing", "terms", field="tags")
assert {
"filter": {"term": {"f": 42}},
"aggs": {"testing": {"terms": {"field": "tags"}}},
} == a.to_dict()
def test_filters_correctly_identifies_the_hash():
a = aggs.A(
"filters",
filters={
"group_a": {"term": {"group": "a"}},
"group_b": {"term": {"group": "b"}},
},
)
assert {
"filters": {
"filters": {
"group_a": {"term": {"group": "a"}},
"group_b": {"term": {"group": "b"}},
}
}
} == a.to_dict()
assert a.filters.group_a == query.Q("term", group="a")
def test_bucket_sort_agg():
bucket_sort_agg = aggs.BucketSort(sort=[{"total_sales": {"order": "desc"}}], size=3)
assert bucket_sort_agg.to_dict() == {
"bucket_sort": {"sort": [{"total_sales": {"order": "desc"}}], "size": 3}
}
a = aggs.DateHistogram(field="date", interval="month")
a.bucket("total_sales", "sum", field="price")
a.bucket(
"sales_bucket_sort",
"bucket_sort",
sort=[{"total_sales": {"order": "desc"}}],
size=3,
)
assert {
"date_histogram": {"field": "date", "interval": "month"},
"aggs": {
"total_sales": {"sum": {"field": "price"}},
"sales_bucket_sort": {
"bucket_sort": {"sort": [{"total_sales": {"order": "desc"}}], "size": 3}
},
},
} == a.to_dict()
def test_bucket_sort_agg_only_trnunc():
bucket_sort_agg = aggs.BucketSort(**{"from": 1, "size": 1})
assert bucket_sort_agg.to_dict() == {"bucket_sort": {"from": 1, "size": 1}}
a = aggs.DateHistogram(field="date", interval="month")
a.bucket("bucket_truncate", "bucket_sort", **{"from": 1, "size": 1})
assert {
"date_histogram": {"field": "date", "interval": "month"},
"aggs": {"bucket_truncate": {"bucket_sort": {"from": 1, "size": 1}}},
} == a.to_dict()
def test_geohash_grid_aggregation():
a = aggs.GeohashGrid(**{"field": "centroid", "precision": 3})
assert {"geohash_grid": {"field": "centroid", "precision": 3}} == a.to_dict()
def test_geotile_grid_aggregation():
a = aggs.GeotileGrid(**{"field": "centroid", "precision": 3})
assert {"geotile_grid": {"field": "centroid", "precision": 3}} == a.to_dict()
def test_boxplot_aggregation():
a = aggs.Boxplot(field="load_time")
assert {"boxplot": {"field": "load_time"}} == a.to_dict()
def test_rare_terms_aggregation():
a = aggs.RareTerms(field="the-field")
a.bucket("total_sales", "sum", field="price")
a.bucket(
"sales_bucket_sort",
"bucket_sort",
sort=[{"total_sales": {"order": "desc"}}],
size=3,
)
assert {
"aggs": {
"sales_bucket_sort": {
"bucket_sort": {"size": 3, "sort": [{"total_sales": {"order": "desc"}}]}
},
"total_sales": {"sum": {"field": "price"}},
},
"rare_terms": {"field": "the-field"},
} == a.to_dict()
def test_variable_width_histogram_aggregation():
a = aggs.VariableWidthHistogram(field="price", buckets=2)
assert {"variable_width_histogram": {"buckets": 2, "field": "price"}} == a.to_dict()
def test_median_absolute_deviation_aggregation():
a = aggs.MedianAbsoluteDeviation(field="rating")
assert {"median_absolute_deviation": {"field": "rating"}} == a.to_dict()
def test_t_test_aggregation():
a = aggs.TTest(
a={"field": "startup_time_before"},
b={"field": "startup_time_after"},
type="paired",
)
assert {
"t_test": {
"a": {"field": "startup_time_before"},
"b": {"field": "startup_time_after"},
"type": "paired",
}
} == a.to_dict()
def test_inference_aggregation():
a = aggs.Inference(model_id="model-id", buckets_path={"agg_name": "agg_name"})
assert {
"inference": {"buckets_path": {"agg_name": "agg_name"}, "model_id": "model-id"}
} == a.to_dict()
def test_moving_percentiles_aggregation():
a = aggs.DateHistogram()
a.bucket("the_percentile", "percentiles", field="price", percents=[1.0, 99.0])
a.pipeline(
"the_movperc", "moving_percentiles", buckets_path="the_percentile", window=10
)
assert {
"aggs": {
"the_movperc": {
"moving_percentiles": {"buckets_path": "the_percentile", "window": 10}
},
"the_percentile": {
"percentiles": {"field": "price", "percents": [1.0, 99.0]}
},
},
"date_histogram": {},
} == a.to_dict()
def test_normalize_aggregation():
a = aggs.Normalize(buckets_path="normalized", method="percent_of_sum")
assert {
"normalize": {"buckets_path": "normalized", "method": "percent_of_sum"}
} == a.to_dict()
@@ -0,0 +1,226 @@
# -*- coding: utf-8 -*-
# SPDX-License-Identifier: Apache-2.0
#
# The OpenSearch Contributors require contributions made to
# this file be licensed under the Apache-2.0 license or a
# compatible open source license.
#
# Modifications Copyright OpenSearch Contributors. See
# GitHub history for details.
#
# Licensed to Elasticsearch B.V. under one or more contributor
# license agreements. See the NOTICE file distributed with
# this work for additional information regarding copyright
# ownership. Elasticsearch B.V. licenses this file to you under
# the Apache License, Version 2.0 (the "License"); you may
# not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing,
# software distributed under the License is distributed on an
# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
# KIND, either express or implied. See the License for the
# specific language governing permissions and limitations
# under the License.
from pytest import raises
from opensearchpy.helpers import analysis
def test_analyzer_serializes_as_name():
a = analysis.analyzer("my_analyzer")
assert "my_analyzer" == a.to_dict()
def test_analyzer_has_definition():
a = analysis.CustomAnalyzer(
"my_analyzer", tokenizer="keyword", filter=["lowercase"]
)
assert {
"type": "custom",
"tokenizer": "keyword",
"filter": ["lowercase"],
} == a.get_definition()
def test_simple_multiplexer_filter():
a = analysis.analyzer(
"my_analyzer",
tokenizer="keyword",
filter=[
analysis.token_filter(
"my_multi", "multiplexer", filters=["lowercase", "lowercase, stop"]
)
],
)
assert {
"analyzer": {
"my_analyzer": {
"filter": ["my_multi"],
"tokenizer": "keyword",
"type": "custom",
}
},
"filter": {
"my_multi": {
"filters": ["lowercase", "lowercase, stop"],
"type": "multiplexer",
}
},
} == a.get_analysis_definition()
def test_multiplexer_with_custom_filter():
a = analysis.analyzer(
"my_analyzer",
tokenizer="keyword",
filter=[
analysis.token_filter(
"my_multi",
"multiplexer",
filters=[
[analysis.token_filter("en", "snowball", language="English")],
"lowercase, stop",
],
)
],
)
assert {
"analyzer": {
"my_analyzer": {
"filter": ["my_multi"],
"tokenizer": "keyword",
"type": "custom",
}
},
"filter": {
"en": {"type": "snowball", "language": "English"},
"my_multi": {"filters": ["en", "lowercase, stop"], "type": "multiplexer"},
},
} == a.get_analysis_definition()
def test_conditional_token_filter():
a = analysis.analyzer(
"my_cond",
tokenizer=analysis.tokenizer("keyword"),
filter=[
analysis.token_filter(
"testing",
"condition",
script={"source": "return true"},
filter=[
"lowercase",
analysis.token_filter("en", "snowball", language="English"),
],
),
"stop",
],
)
assert {
"analyzer": {
"my_cond": {
"filter": ["testing", "stop"],
"tokenizer": "keyword",
"type": "custom",
}
},
"filter": {
"en": {"language": "English", "type": "snowball"},
"testing": {
"script": {"source": "return true"},
"filter": ["lowercase", "en"],
"type": "condition",
},
},
} == a.get_analysis_definition()
def test_conflicting_nested_filters_cause_error():
a = analysis.analyzer(
"my_cond",
tokenizer=analysis.tokenizer("keyword"),
filter=[
analysis.token_filter("en", "stemmer", language="english"),
analysis.token_filter(
"testing",
"condition",
script={"source": "return true"},
filter=[
"lowercase",
analysis.token_filter("en", "snowball", language="English"),
],
),
],
)
with raises(ValueError):
a.get_analysis_definition()
def test_normalizer_serializes_as_name():
n = analysis.normalizer("my_normalizer")
assert "my_normalizer" == n.to_dict()
def test_normalizer_has_definition():
n = analysis.CustomNormalizer(
"my_normalizer", filter=["lowercase", "asciifolding"], char_filter=["quote"]
)
assert {
"type": "custom",
"filter": ["lowercase", "asciifolding"],
"char_filter": ["quote"],
} == n.get_definition()
def test_tokenizer():
t = analysis.tokenizer("trigram", "nGram", min_gram=3, max_gram=3)
assert t.to_dict() == "trigram"
assert {"type": "nGram", "min_gram": 3, "max_gram": 3} == t.get_definition()
def test_custom_analyzer_can_collect_custom_items():
trigram = analysis.tokenizer("trigram", "nGram", min_gram=3, max_gram=3)
my_stop = analysis.token_filter("my_stop", "stop", stopwords=["a", "b"])
umlauts = analysis.char_filter("umlauts", "pattern_replace", mappings=["ü=>ue"])
a = analysis.analyzer(
"my_analyzer",
tokenizer=trigram,
filter=["lowercase", my_stop],
char_filter=["html_strip", umlauts],
)
assert a.to_dict() == "my_analyzer"
assert {
"analyzer": {
"my_analyzer": {
"type": "custom",
"tokenizer": "trigram",
"filter": ["lowercase", "my_stop"],
"char_filter": ["html_strip", "umlauts"],
}
},
"tokenizer": {"trigram": trigram.get_definition()},
"filter": {"my_stop": my_stop.get_definition()},
"char_filter": {"umlauts": umlauts.get_definition()},
} == a.get_analysis_definition()
def test_stemmer_analyzer_can_pass_name():
t = analysis.token_filter(
"my_english_filter", name="minimal_english", type="stemmer"
)
assert t.to_dict() == "my_english_filter"
assert {"type": "stemmer", "name": "minimal_english"} == t.get_definition()
@@ -0,0 +1,640 @@
# SPDX-License-Identifier: Apache-2.0
#
# The OpenSearch Contributors require contributions made to
# this file be licensed under the Apache-2.0 license or a
# compatible open source license.
#
# Modifications Copyright OpenSearch Contributors. See
# GitHub history for details.
#
# Licensed to Elasticsearch B.V. under one or more contributor
# license agreements. See the NOTICE file distributed with
# this work for additional information regarding copyright
# ownership. Elasticsearch B.V. licenses this file to you under
# the Apache License, Version 2.0 (the "License"); you may
# not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing,
# software distributed under the License is distributed on an
# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
# KIND, either express or implied. See the License for the
# specific language governing permissions and limitations
# under the License.
from __future__ import unicode_literals
import codecs
import ipaddress
import pickle
from datetime import datetime
from hashlib import sha256
from pytest import raises
from opensearchpy import Index, InnerDoc, Mapping, Range, analyzer
from opensearchpy.exceptions import IllegalOperation, ValidationException
from opensearchpy.helpers import document, field, utils
class MyInner(InnerDoc):
old_field = field.Text()
class MyDoc(document.Document):
title = field.Keyword()
name = field.Text()
created_at = field.Date()
inner = field.Object(MyInner)
class MySubDoc(MyDoc):
name = field.Keyword()
class Index:
name = "default-index"
class MyDoc2(document.Document):
extra = field.Long()
class MyMultiSubDoc(MyDoc2, MySubDoc):
pass
class Comment(document.InnerDoc):
title = field.Text()
tags = field.Keyword(multi=True)
class DocWithNested(document.Document):
comments = field.Nested(Comment)
class Index:
name = "test-doc-with-nested"
class SimpleCommit(document.Document):
files = field.Text(multi=True)
class Index:
name = "test-git"
class Secret(str):
pass
class SecretField(field.CustomField):
builtin_type = "text"
def _serialize(self, data):
return codecs.encode(data, "rot_13")
def _deserialize(self, data):
if isinstance(data, Secret):
return data
return Secret(codecs.decode(data, "rot_13"))
class SecretDoc(document.Document):
title = SecretField(index="no")
class Index:
name = "test-secret-doc"
class NestedSecret(document.Document):
secrets = field.Nested(SecretDoc)
class Index:
name = "test-nested-secret"
class OptionalObjectWithRequiredField(document.Document):
comments = field.Nested(properties={"title": field.Keyword(required=True)})
class Index:
name = "test-required"
class Host(document.Document):
ip = field.Ip()
class Index:
name = "test-host"
def test_range_serializes_properly():
class D(document.Document):
lr = field.LongRange()
d = D(lr=Range(lt=42))
assert 40 in d.lr
assert 47 not in d.lr
assert {"lr": {"lt": 42}} == d.to_dict()
d = D(lr={"lt": 42})
assert {"lr": {"lt": 42}} == d.to_dict()
def test_range_deserializes_properly():
class D(document.InnerDoc):
lr = field.LongRange()
d = D.from_opensearch({"lr": {"lt": 42}}, True)
assert isinstance(d.lr, Range)
assert 40 in d.lr
assert 47 not in d.lr
def test_resolve_nested():
nested, field = NestedSecret._index.resolve_nested("secrets.title")
assert nested == ["secrets"]
assert field is NestedSecret._doc_type.mapping["secrets"]["title"]
def test_conflicting_mapping_raises_error_in_index_to_dict():
class A(document.Document):
name = field.Text()
class B(document.Document):
name = field.Keyword()
i = Index("i")
i.document(A)
i.document(B)
with raises(ValueError):
i.to_dict()
def test_ip_address_serializes_properly():
host = Host(ip=ipaddress.IPv4Address("10.0.0.1"))
assert {"ip": "10.0.0.1"} == host.to_dict()
def test_matches_uses_index():
assert SimpleCommit._matches({"_index": "test-git"})
assert not SimpleCommit._matches({"_index": "not-test-git"})
def test_matches_with_no_name_always_matches():
class D(document.Document):
pass
assert D._matches({})
assert D._matches({"_index": "whatever"})
def test_matches_accepts_wildcards():
class MyDoc(document.Document):
class Index:
name = "my-*"
assert MyDoc._matches({"_index": "my-index"})
assert not MyDoc._matches({"_index": "not-my-index"})
def test_assigning_attrlist_to_field():
sc = SimpleCommit()
ls = ["README", "README.rst"]
sc.files = utils.AttrList(ls)
assert sc.to_dict()["files"] is ls
def test_optional_inner_objects_are_not_validated_if_missing():
d = OptionalObjectWithRequiredField()
assert d.full_clean() is None
def test_custom_field():
s = SecretDoc(title=Secret("Hello"))
assert {"title": "Uryyb"} == s.to_dict()
assert s.title == "Hello"
s = SecretDoc.from_opensearch({"_source": {"title": "Uryyb"}})
assert s.title == "Hello"
assert isinstance(s.title, Secret)
def test_custom_field_mapping():
assert {
"properties": {"title": {"index": "no", "type": "text"}}
} == SecretDoc._doc_type.mapping.to_dict()
def test_custom_field_in_nested():
s = NestedSecret()
s.secrets.append(SecretDoc(title=Secret("Hello")))
assert {"secrets": [{"title": "Uryyb"}]} == s.to_dict()
assert s.secrets[0].title == "Hello"
def test_multi_works_after_doc_has_been_saved():
c = SimpleCommit()
c.full_clean()
c.files.append("setup.py")
assert c.to_dict() == {"files": ["setup.py"]}
def test_multi_works_in_nested_after_doc_has_been_serialized():
# Issue #359
c = DocWithNested(comments=[Comment(title="First!")])
assert [] == c.comments[0].tags
assert {"comments": [{"title": "First!"}]} == c.to_dict()
assert [] == c.comments[0].tags
def test_null_value_for_object():
d = MyDoc(inner=None)
assert d.inner is None
def test_inherited_doc_types_can_override_index():
class MyDocDifferentIndex(MySubDoc):
class Index:
name = "not-default-index"
settings = {"number_of_replicas": 0}
aliases = {"a": {}}
analyzers = [analyzer("my_analizer", tokenizer="keyword")]
assert MyDocDifferentIndex._index._name == "not-default-index"
assert MyDocDifferentIndex()._get_index() == "not-default-index"
assert MyDocDifferentIndex._index.to_dict() == {
"aliases": {"a": {}},
"mappings": {
"properties": {
"created_at": {"type": "date"},
"inner": {
"type": "object",
"properties": {"old_field": {"type": "text"}},
},
"name": {"type": "keyword"},
"title": {"type": "keyword"},
}
},
"settings": {
"analysis": {
"analyzer": {"my_analizer": {"tokenizer": "keyword", "type": "custom"}}
},
"number_of_replicas": 0,
},
}
def test_to_dict_with_meta():
d = MySubDoc(title="hello")
d.meta.routing = "some-parent"
assert {
"_index": "default-index",
"_routing": "some-parent",
"_source": {"title": "hello"},
} == d.to_dict(True)
def test_to_dict_with_meta_includes_custom_index():
d = MySubDoc(title="hello")
d.meta.index = "other-index"
assert {"_index": "other-index", "_source": {"title": "hello"}} == d.to_dict(True)
def test_to_dict_without_skip_empty_will_include_empty_fields():
d = MySubDoc(tags=[], title=None, inner={})
assert {} == d.to_dict()
assert {"tags": [], "title": None, "inner": {}} == d.to_dict(skip_empty=False)
def test_attribute_can_be_removed():
d = MyDoc(title="hello")
del d.title
assert "title" not in d._d_
def test_doc_type_can_be_correctly_pickled():
d = DocWithNested(
title="Hello World!", comments=[Comment(title="hellp")], meta={"id": 42}
)
s = pickle.dumps(d)
d2 = pickle.loads(s)
assert d2 == d
assert 42 == d2.meta.id
assert "Hello World!" == d2.title
assert [{"title": "hellp"}] == d2.comments
assert isinstance(d2.comments[0], Comment)
def test_meta_is_accessible_even_on_empty_doc():
d = MyDoc()
d.meta
d = MyDoc(title="aaa")
d.meta
def test_meta_field_mapping():
class User(document.Document):
username = field.Text()
class Meta:
all = document.MetaField(enabled=False)
_index = document.MetaField(enabled=True)
dynamic = document.MetaField("strict")
dynamic_templates = document.MetaField([42])
assert {
"properties": {"username": {"type": "text"}},
"_all": {"enabled": False},
"_index": {"enabled": True},
"dynamic": "strict",
"dynamic_templates": [42],
} == User._doc_type.mapping.to_dict()
def test_multi_value_fields():
class Blog(document.Document):
tags = field.Keyword(multi=True)
b = Blog()
assert [] == b.tags
b.tags.append("search")
b.tags.append("python")
assert ["search", "python"] == b.tags
def test_docs_with_properties():
class User(document.Document):
pwd_hash = field.Text()
def check_password(self, pwd):
return sha256(pwd).hexdigest() == self.pwd_hash
@property
def password(self):
raise AttributeError("readonly")
@password.setter
def password(self, pwd):
self.pwd_hash = sha256(pwd).hexdigest()
u = User(pwd_hash=sha256(b"secret").hexdigest())
assert u.check_password(b"secret")
assert not u.check_password(b"not-secret")
u.password = b"not-secret"
assert "password" not in u._d_
assert not u.check_password(b"secret")
assert u.check_password(b"not-secret")
with raises(AttributeError):
u.password
def test_nested_can_be_assigned_to():
d1 = DocWithNested(comments=[Comment(title="First!")])
d2 = DocWithNested()
d2.comments = d1.comments
assert isinstance(d1.comments[0], Comment)
assert d2.comments == [{"title": "First!"}]
assert {"comments": [{"title": "First!"}]} == d2.to_dict()
assert isinstance(d2.comments[0], Comment)
def test_nested_can_be_none():
d = DocWithNested(comments=None, title="Hello World!")
assert {"title": "Hello World!"} == d.to_dict()
def test_nested_defaults_to_list_and_can_be_updated():
md = DocWithNested()
assert [] == md.comments
md.comments.append({"title": "hello World!"})
assert {"comments": [{"title": "hello World!"}]} == md.to_dict()
def test_to_dict_is_recursive_and_can_cope_with_multi_values():
md = MyDoc(name=["a", "b", "c"])
md.inner = [MyInner(old_field="of1"), MyInner(old_field="of2")]
assert isinstance(md.inner[0], MyInner)
assert {
"name": ["a", "b", "c"],
"inner": [{"old_field": "of1"}, {"old_field": "of2"}],
} == md.to_dict()
def test_to_dict_ignores_empty_collections():
md = MySubDoc(name="", address={}, count=0, valid=False, tags=[])
assert {"name": "", "count": 0, "valid": False} == md.to_dict()
def test_declarative_mapping_definition():
assert issubclass(MyDoc, document.Document)
assert hasattr(MyDoc, "_doc_type")
assert {
"properties": {
"created_at": {"type": "date"},
"name": {"type": "text"},
"title": {"type": "keyword"},
"inner": {"type": "object", "properties": {"old_field": {"type": "text"}}},
}
} == MyDoc._doc_type.mapping.to_dict()
def test_you_can_supply_own_mapping_instance():
class MyD(document.Document):
title = field.Text()
class Meta:
mapping = Mapping()
mapping.meta("_all", enabled=False)
assert {
"_all": {"enabled": False},
"properties": {"title": {"type": "text"}},
} == MyD._doc_type.mapping.to_dict()
def test_document_can_be_created_dynamically():
n = datetime.now()
md = MyDoc(title="hello")
md.name = "My Fancy Document!"
md.created_at = n
inner = md.inner
# consistent returns
assert inner is md.inner
inner.old_field = "Already defined."
md.inner.new_field = ["undefined", "field"]
assert {
"title": "hello",
"name": "My Fancy Document!",
"created_at": n,
"inner": {"old_field": "Already defined.", "new_field": ["undefined", "field"]},
} == md.to_dict()
def test_invalid_date_will_raise_exception():
md = MyDoc()
md.created_at = "not-a-date"
with raises(ValidationException):
md.full_clean()
def test_document_inheritance():
assert issubclass(MySubDoc, MyDoc)
assert issubclass(MySubDoc, document.Document)
assert hasattr(MySubDoc, "_doc_type")
assert {
"properties": {
"created_at": {"type": "date"},
"name": {"type": "keyword"},
"title": {"type": "keyword"},
"inner": {"type": "object", "properties": {"old_field": {"type": "text"}}},
}
} == MySubDoc._doc_type.mapping.to_dict()
def test_child_class_can_override_parent():
class A(document.Document):
o = field.Object(dynamic=False, properties={"a": field.Text()})
class B(A):
o = field.Object(dynamic="strict", properties={"b": field.Text()})
assert {
"properties": {
"o": {
"dynamic": "strict",
"properties": {"a": {"type": "text"}, "b": {"type": "text"}},
"type": "object",
}
}
} == B._doc_type.mapping.to_dict()
def test_meta_fields_are_stored_in_meta_and_ignored_by_to_dict():
md = MySubDoc(meta={"id": 42}, name="My First doc!")
md.meta.index = "my-index"
assert md.meta.index == "my-index"
assert md.meta.id == 42
assert {"name": "My First doc!"} == md.to_dict()
assert {"id": 42, "index": "my-index"} == md.meta.to_dict()
def test_index_inheritance():
assert issubclass(MyMultiSubDoc, MySubDoc)
assert issubclass(MyMultiSubDoc, MyDoc2)
assert issubclass(MyMultiSubDoc, document.Document)
assert hasattr(MyMultiSubDoc, "_doc_type")
assert hasattr(MyMultiSubDoc, "_index")
assert {
"properties": {
"created_at": {"type": "date"},
"name": {"type": "keyword"},
"title": {"type": "keyword"},
"inner": {"type": "object", "properties": {"old_field": {"type": "text"}}},
"extra": {"type": "long"},
}
} == MyMultiSubDoc._doc_type.mapping.to_dict()
def test_meta_fields_can_be_set_directly_in_init():
p = object()
md = MyDoc(_id=p, title="Hello World!")
assert md.meta.id is p
def test_save_no_index(mock_client):
md = MyDoc()
with raises(ValidationException):
md.save(using="mock")
def test_delete_no_index(mock_client):
md = MyDoc()
with raises(ValidationException):
md.delete(using="mock")
def test_update_no_fields():
md = MyDoc()
with raises(IllegalOperation):
md.update()
def test_search_with_custom_alias_and_index(mock_client):
search_object = MyDoc.search(
using="staging", index=["custom_index1", "custom_index2"]
)
assert search_object._using == "staging"
assert search_object._index == ["custom_index1", "custom_index2"]
def test_from_opensearch_respects_underscored_non_meta_fields():
doc = {
"_index": "test-index",
"_id": "opensearch",
"_score": 12.0,
"fields": {"hello": "world", "_routing": "opensearch", "_tags": ["search"]},
"_source": {
"city": "Amsterdam",
"name": "OpenSearch",
"_tagline": "You know, for search",
},
}
class Company(document.Document):
class Index:
name = "test-company"
c = Company.from_opensearch(doc)
assert c.meta.fields._tags == ["search"]
assert c.meta.fields._routing == "opensearch"
assert c._tagline == "You know, for search"
def test_nested_and_object_inner_doc():
class MySubDocWithNested(MyDoc):
nested_inner = field.Nested(MyInner)
props = MySubDocWithNested._doc_type.mapping.to_dict()["properties"]
assert props == {
"created_at": {"type": "date"},
"inner": {"properties": {"old_field": {"type": "text"}}, "type": "object"},
"name": {"type": "text"},
"nested_inner": {
"properties": {"old_field": {"type": "text"}},
"type": "nested",
},
"title": {"type": "keyword"},
}
@@ -0,0 +1,204 @@
# SPDX-License-Identifier: Apache-2.0
#
# The OpenSearch Contributors require contributions made to
# this file be licensed under the Apache-2.0 license or a
# compatible open source license.
#
# Modifications Copyright OpenSearch Contributors. See
# GitHub history for details.
#
# Licensed to Elasticsearch B.V. under one or more contributor
# license agreements. See the NOTICE file distributed with
# this work for additional information regarding copyright
# ownership. Elasticsearch B.V. licenses this file to you under
# the Apache License, Version 2.0 (the "License"); you may
# not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing,
# software distributed under the License is distributed on an
# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
# KIND, either express or implied. See the License for the
# specific language governing permissions and limitations
# under the License.
from datetime import datetime
import pytest
from opensearchpy.helpers.faceted_search import (
DateHistogramFacet,
FacetedSearch,
TermsFacet,
)
class BlogSearch(FacetedSearch):
doc_types = ["user", "post"]
fields = (
"title^5",
"body",
)
facets = {
"category": TermsFacet(field="category.raw"),
"tags": TermsFacet(field="tags"),
}
def test_query_is_created_properly():
bs = BlogSearch("python search")
s = bs.build_search()
assert s._doc_type == ["user", "post"]
assert {
"aggs": {
"_filter_tags": {
"filter": {"match_all": {}},
"aggs": {"tags": {"terms": {"field": "tags"}}},
},
"_filter_category": {
"filter": {"match_all": {}},
"aggs": {"category": {"terms": {"field": "category.raw"}}},
},
},
"query": {
"multi_match": {"fields": ("title^5", "body"), "query": "python search"}
},
"highlight": {"fields": {"body": {}, "title": {}}},
} == s.to_dict()
def test_query_is_created_properly_with_sort_tuple():
bs = BlogSearch("python search", sort=("category", "-title"))
s = bs.build_search()
assert s._doc_type == ["user", "post"]
assert {
"aggs": {
"_filter_tags": {
"filter": {"match_all": {}},
"aggs": {"tags": {"terms": {"field": "tags"}}},
},
"_filter_category": {
"filter": {"match_all": {}},
"aggs": {"category": {"terms": {"field": "category.raw"}}},
},
},
"query": {
"multi_match": {"fields": ("title^5", "body"), "query": "python search"}
},
"highlight": {"fields": {"body": {}, "title": {}}},
"sort": ["category", {"title": {"order": "desc"}}],
} == s.to_dict()
def test_filter_is_applied_to_search_but_not_relevant_facet():
bs = BlogSearch("python search", filters={"category": "opensearch"})
s = bs.build_search()
assert {
"aggs": {
"_filter_tags": {
"filter": {"terms": {"category.raw": ["opensearch"]}},
"aggs": {"tags": {"terms": {"field": "tags"}}},
},
"_filter_category": {
"filter": {"match_all": {}},
"aggs": {"category": {"terms": {"field": "category.raw"}}},
},
},
"post_filter": {"terms": {"category.raw": ["opensearch"]}},
"query": {
"multi_match": {"fields": ("title^5", "body"), "query": "python search"}
},
"highlight": {"fields": {"body": {}, "title": {}}},
} == s.to_dict()
def test_filters_are_applied_to_search_ant_relevant_facets():
bs = BlogSearch(
"python search",
filters={"category": "opensearch", "tags": ["python", "django"]},
)
s = bs.build_search()
d = s.to_dict()
# we need to test post_filter without relying on order
f = d["post_filter"]["bool"].pop("must")
assert len(f) == 2
assert {"terms": {"category.raw": ["opensearch"]}} in f
assert {"terms": {"tags": ["python", "django"]}} in f
assert {
"aggs": {
"_filter_tags": {
"filter": {"terms": {"category.raw": ["opensearch"]}},
"aggs": {"tags": {"terms": {"field": "tags"}}},
},
"_filter_category": {
"filter": {"terms": {"tags": ["python", "django"]}},
"aggs": {"category": {"terms": {"field": "category.raw"}}},
},
},
"query": {
"multi_match": {"fields": ("title^5", "body"), "query": "python search"}
},
"post_filter": {"bool": {}},
"highlight": {"fields": {"body": {}, "title": {}}},
} == d
def test_date_histogram_facet_with_1970_01_01_date():
dhf = DateHistogramFacet()
assert dhf.get_value({"key": None}) == datetime(1970, 1, 1, 0, 0)
assert dhf.get_value({"key": 0}) == datetime(1970, 1, 1, 0, 0)
@pytest.mark.parametrize(
["interval_type", "interval"],
[
("interval", "year"),
("calendar_interval", "year"),
("interval", "month"),
("calendar_interval", "month"),
("interval", "week"),
("calendar_interval", "week"),
("interval", "day"),
("calendar_interval", "day"),
("fixed_interval", "day"),
("interval", "hour"),
("fixed_interval", "hour"),
("interval", "1Y"),
("calendar_interval", "1Y"),
("interval", "1M"),
("calendar_interval", "1M"),
("interval", "1w"),
("calendar_interval", "1w"),
("interval", "1d"),
("calendar_interval", "1d"),
("fixed_interval", "1d"),
("interval", "1h"),
("fixed_interval", "1h"),
],
)
def test_date_histogram_interval_types(interval_type, interval):
dhf = DateHistogramFacet(field="@timestamp", **{interval_type: interval})
assert dhf.get_aggregation().to_dict() == {
"date_histogram": {
"field": "@timestamp",
interval_type: interval,
"min_doc_count": 0,
}
}
dhf.get_value_filter(datetime.now())
def test_date_histogram_no_interval_keyerror():
dhf = DateHistogramFacet(field="@timestamp")
with pytest.raises(KeyError) as e:
dhf.get_value_filter(datetime.now())
assert str(e.value) == "'interval'"
@@ -0,0 +1,224 @@
# SPDX-License-Identifier: Apache-2.0
#
# The OpenSearch Contributors require contributions made to
# this file be licensed under the Apache-2.0 license or a
# compatible open source license.
#
# Modifications Copyright OpenSearch Contributors. See
# GitHub history for details.
#
# Licensed to Elasticsearch B.V. under one or more contributor
# license agreements. See the NOTICE file distributed with
# this work for additional information regarding copyright
# ownership. Elasticsearch B.V. licenses this file to you under
# the Apache License, Version 2.0 (the "License"); you may
# not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing,
# software distributed under the License is distributed on an
# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
# KIND, either express or implied. See the License for the
# specific language governing permissions and limitations
# under the License.
import base64
import sys
from datetime import datetime
from ipaddress import ip_address
import pytest
from dateutil import tz
from opensearchpy import InnerDoc, Range, ValidationException
from opensearchpy.helpers import field
def test_date_range_deserialization():
data = {"lt": "2018-01-01T00:30:10"}
r = field.DateRange().deserialize(data)
assert isinstance(r, Range)
assert r.lt == datetime(2018, 1, 1, 0, 30, 10)
def test_boolean_deserialization():
bf = field.Boolean()
assert not bf.deserialize("false")
assert not bf.deserialize(False)
assert not bf.deserialize("")
assert not bf.deserialize(0)
assert bf.deserialize(True)
assert bf.deserialize("true")
assert bf.deserialize(1)
def test_date_field_can_have_default_tz():
f = field.Date(default_timezone="UTC")
now = datetime.now()
now_with_tz = f._deserialize(now)
assert now_with_tz.tzinfo == tz.gettz("UTC")
assert now.isoformat() + "+00:00" == now_with_tz.isoformat()
now_with_tz = f._deserialize(now.isoformat())
assert now_with_tz.tzinfo == tz.gettz("UTC")
assert now.isoformat() + "+00:00" == now_with_tz.isoformat()
def test_custom_field_car_wrap_other_field():
class MyField(field.CustomField):
@property
def builtin_type(self):
return field.Text(**self._params)
assert {"type": "text", "index": "not_analyzed"} == MyField(
index="not_analyzed"
).to_dict()
def test_field_from_dict():
f = field.construct_field({"type": "text", "index": "not_analyzed"})
assert isinstance(f, field.Text)
assert {"type": "text", "index": "not_analyzed"} == f.to_dict()
def test_multi_fields_are_accepted_and_parsed():
f = field.construct_field(
"text",
fields={"raw": {"type": "keyword"}, "eng": field.Text(analyzer="english")},
)
assert isinstance(f, field.Text)
assert {
"type": "text",
"fields": {
"raw": {"type": "keyword"},
"eng": {"type": "text", "analyzer": "english"},
},
} == f.to_dict()
def test_nested_provides_direct_access_to_its_fields():
f = field.Nested(properties={"name": {"type": "text", "index": "not_analyzed"}})
assert "name" in f
assert f["name"] == field.Text(index="not_analyzed")
def test_field_supports_multiple_analyzers():
f = field.Text(analyzer="snowball", search_analyzer="keyword")
assert {
"analyzer": "snowball",
"search_analyzer": "keyword",
"type": "text",
} == f.to_dict()
def test_multifield_supports_multiple_analyzers():
f = field.Text(
fields={
"f1": field.Text(search_analyzer="keyword", analyzer="snowball"),
"f2": field.Text(analyzer="keyword"),
}
)
assert {
"fields": {
"f1": {
"analyzer": "snowball",
"search_analyzer": "keyword",
"type": "text",
},
"f2": {"analyzer": "keyword", "type": "text"},
},
"type": "text",
} == f.to_dict()
def test_scaled_float():
with pytest.raises(TypeError):
field.ScaledFloat()
f = field.ScaledFloat(123)
assert f.to_dict() == {"scaling_factor": 123, "type": "scaled_float"}
@pytest.mark.skipif(sys.version_info < (3, 6), reason="requires python3.6 or higher")
def test_ipaddress():
f = field.Ip()
assert f.deserialize("127.0.0.1") == ip_address("127.0.0.1")
assert f.deserialize("::1") == ip_address("::1")
assert f.serialize(f.deserialize("::1")) == "::1"
assert f.deserialize(None) is None
with pytest.raises(ValueError):
assert f.deserialize("not_an_ipaddress")
def test_float():
f = field.Float()
assert f.deserialize("42") == 42.0
assert f.deserialize(None) is None
with pytest.raises(ValueError):
assert f.deserialize("not_a_float")
def test_integer():
f = field.Integer()
assert f.deserialize("42") == 42
assert f.deserialize(None) is None
with pytest.raises(ValueError):
assert f.deserialize("not_an_integer")
def test_binary():
f = field.Binary()
assert f.deserialize(base64.b64encode(b"42")) == b"42"
assert f.deserialize(f.serialize(b"42")) == b"42"
assert f.deserialize(None) is None
def test_constant_keyword():
f = field.ConstantKeyword()
assert f.to_dict() == {"type": "constant_keyword"}
def test_rank_features():
f = field.RankFeatures()
assert f.to_dict() == {"type": "rank_features"}
def test_object_dynamic_values():
for dynamic in True, False, "strict":
f = field.Object(dynamic=dynamic)
assert f.to_dict()["dynamic"] == dynamic
def test_object_disabled():
f = field.Object(enabled=False)
assert f.to_dict() == {"type": "object", "enabled": False}
def test_object_constructor():
expected = {"type": "object", "properties": {"inner_int": {"type": "integer"}}}
class Inner(InnerDoc):
inner_int = field.Integer()
obj_from_doc = field.Object(doc_class=Inner)
assert obj_from_doc.to_dict() == expected
obj_from_props = field.Object(properties={"inner_int": field.Integer()})
assert obj_from_props.to_dict() == expected
with pytest.raises(ValidationException):
field.Object(doc_class=Inner, properties={"inner_int": field.Integer()})
with pytest.raises(ValidationException):
field.Object(doc_class=Inner, dynamic=False)
@@ -0,0 +1,196 @@
# SPDX-License-Identifier: Apache-2.0
#
# The OpenSearch Contributors require contributions made to
# this file be licensed under the Apache-2.0 license or a
# compatible open source license.
#
# Modifications Copyright OpenSearch Contributors. See
# GitHub history for details.
#
# Licensed to Elasticsearch B.V. under one or more contributor
# license agreements. See the NOTICE file distributed with
# this work for additional information regarding copyright
# ownership. Elasticsearch B.V. licenses this file to you under
# the Apache License, Version 2.0 (the "License"); you may
# not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing,
# software distributed under the License is distributed on an
# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
# KIND, either express or implied. See the License for the
# specific language governing permissions and limitations
# under the License.
import string
from random import choice
from pytest import raises
from opensearchpy import Date, Document, Index, IndexTemplate, Text, analyzer
class Post(Document):
title = Text()
published_from = Date()
def test_multiple_doc_types_will_combine_mappings():
class User(Document):
username = Text()
i = Index("i")
i.document(Post)
i.document(User)
assert {
"mappings": {
"properties": {
"title": {"type": "text"},
"username": {"type": "text"},
"published_from": {"type": "date"},
}
}
} == i.to_dict()
def test_search_is_limited_to_index_name():
i = Index("my-index")
s = i.search()
assert s._index == ["my-index"]
def test_cloned_index_has_copied_settings_and_using():
client = object()
i = Index("my-index", using=client)
i.settings(number_of_shards=1)
i2 = i.clone("my-other-index")
assert "my-other-index" == i2._name
assert client is i2._using
assert i._settings == i2._settings
assert i._settings is not i2._settings
def test_cloned_index_has_analysis_attribute():
"""
Regression test for Issue #582 in which `Index.clone()` was not copying
over the `_analysis` attribute.
"""
client = object()
i = Index("my-index", using=client)
random_analyzer_name = "".join((choice(string.ascii_letters) for _ in range(100)))
random_analyzer = analyzer(
random_analyzer_name, tokenizer="standard", filter="standard"
)
i.analyzer(random_analyzer)
i2 = i.clone("my-clone-index")
assert i.to_dict()["settings"]["analysis"] == i2.to_dict()["settings"]["analysis"]
def test_settings_are_saved():
i = Index("i")
i.settings(number_of_replicas=0)
i.settings(number_of_shards=1)
assert {"settings": {"number_of_shards": 1, "number_of_replicas": 0}} == i.to_dict()
def test_registered_doc_type_included_in_to_dict():
i = Index("i", using="alias")
i.document(Post)
assert {
"mappings": {
"properties": {
"title": {"type": "text"},
"published_from": {"type": "date"},
}
}
} == i.to_dict()
def test_registered_doc_type_included_in_search():
i = Index("i", using="alias")
i.document(Post)
s = i.search()
assert s._doc_type == [Post]
def test_aliases_add_to_object():
random_alias = "".join((choice(string.ascii_letters) for _ in range(100)))
alias_dict = {random_alias: {}}
index = Index("i", using="alias")
index.aliases(**alias_dict)
assert index._aliases == alias_dict
def test_aliases_returned_from_to_dict():
random_alias = "".join((choice(string.ascii_letters) for _ in range(100)))
alias_dict = {random_alias: {}}
index = Index("i", using="alias")
index.aliases(**alias_dict)
assert index._aliases == index.to_dict()["aliases"] == alias_dict
def test_analyzers_added_to_object():
random_analyzer_name = "".join((choice(string.ascii_letters) for _ in range(100)))
random_analyzer = analyzer(
random_analyzer_name, tokenizer="standard", filter="standard"
)
index = Index("i", using="alias")
index.analyzer(random_analyzer)
assert index._analysis["analyzer"][random_analyzer_name] == {
"filter": ["standard"],
"type": "custom",
"tokenizer": "standard",
}
def test_analyzers_returned_from_to_dict():
random_analyzer_name = "".join((choice(string.ascii_letters) for _ in range(100)))
random_analyzer = analyzer(
random_analyzer_name, tokenizer="standard", filter="standard"
)
index = Index("i", using="alias")
index.analyzer(random_analyzer)
assert index.to_dict()["settings"]["analysis"]["analyzer"][
random_analyzer_name
] == {"filter": ["standard"], "type": "custom", "tokenizer": "standard"}
def test_conflicting_analyzer_raises_error():
i = Index("i")
i.analyzer("my_analyzer", tokenizer="whitespace", filter=["lowercase", "stop"])
with raises(ValueError):
i.analyzer("my_analyzer", tokenizer="keyword", filter=["lowercase", "stop"])
def test_index_template_can_have_order():
i = Index("i-*")
it = i.as_template("i", order=2)
assert {"index_patterns": ["i-*"], "order": 2} == it.to_dict()
def test_index_template_save_result(mock_client):
it = IndexTemplate("test-template", "test-*")
assert it.save(using="mock") == mock_client.indices.put_template()
@@ -0,0 +1,232 @@
# SPDX-License-Identifier: Apache-2.0
#
# The OpenSearch Contributors require contributions made to
# this file be licensed under the Apache-2.0 license or a
# compatible open source license.
#
# Modifications Copyright OpenSearch Contributors. See
# GitHub history for details.
#
# Licensed to Elasticsearch B.V. under one or more contributor
# license agreements. See the NOTICE file distributed with
# this work for additional information regarding copyright
# ownership. Elasticsearch B.V. licenses this file to you under
# the Apache License, Version 2.0 (the "License"); you may
# not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing,
# software distributed under the License is distributed on an
# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
# KIND, either express or implied. See the License for the
# specific language governing permissions and limitations
# under the License.
import json
from opensearchpy import Keyword, Nested, Text
from opensearchpy.helpers import analysis, mapping
def test_mapping_can_has_fields():
m = mapping.Mapping()
m.field("name", "text").field("tags", "keyword")
assert {
"properties": {"name": {"type": "text"}, "tags": {"type": "keyword"}}
} == m.to_dict()
def test_mapping_update_is_recursive():
m1 = mapping.Mapping()
m1.field("title", "text")
m1.field("author", "object")
m1.field("author", "object", properties={"name": {"type": "text"}})
m1.meta("_all", enabled=False)
m1.meta("dynamic", False)
m2 = mapping.Mapping()
m2.field("published_from", "date")
m2.field("author", "object", properties={"email": {"type": "text"}})
m2.field("title", "text")
m2.field("lang", "keyword")
m2.meta("_analyzer", path="lang")
m1.update(m2, update_only=True)
assert {
"_all": {"enabled": False},
"_analyzer": {"path": "lang"},
"dynamic": False,
"properties": {
"published_from": {"type": "date"},
"title": {"type": "text"},
"lang": {"type": "keyword"},
"author": {
"type": "object",
"properties": {"name": {"type": "text"}, "email": {"type": "text"}},
},
},
} == m1.to_dict()
def test_properties_can_iterate_over_all_the_fields():
m = mapping.Mapping()
m.field("f1", "text", test_attr="f1", fields={"f2": Keyword(test_attr="f2")})
m.field("f3", Nested(test_attr="f3", properties={"f4": Text(test_attr="f4")}))
assert {"f1", "f2", "f3", "f4"} == {
f.test_attr for f in m.properties._collect_fields()
}
def test_mapping_can_collect_all_analyzers_and_normalizers():
a1 = analysis.analyzer(
"my_analyzer1",
tokenizer="keyword",
filter=[
"lowercase",
analysis.token_filter("my_filter1", "stop", stopwords=["a", "b"]),
],
)
a2 = analysis.analyzer("english")
a3 = analysis.analyzer("unknown_custom")
a4 = analysis.analyzer(
"my_analyzer2",
tokenizer=analysis.tokenizer("trigram", "nGram", min_gram=3, max_gram=3),
filter=[analysis.token_filter("my_filter2", "stop", stopwords=["c", "d"])],
)
a5 = analysis.analyzer("my_analyzer3", tokenizer="keyword")
n1 = analysis.normalizer("my_normalizer1", filter=["lowercase"])
n2 = analysis.normalizer(
"my_normalizer2",
filter=[
"my_filter1",
"my_filter2",
analysis.token_filter("my_filter3", "stop", stopwords=["e", "f"]),
],
)
n3 = analysis.normalizer("unknown_custom")
m = mapping.Mapping()
m.field(
"title",
"text",
analyzer=a1,
fields={"english": Text(analyzer=a2), "unknown": Keyword(search_analyzer=a3)},
)
m.field("comments", Nested(properties={"author": Text(analyzer=a4)}))
m.field("normalized_title", "keyword", normalizer=n1)
m.field("normalized_comment", "keyword", normalizer=n2)
m.field("unknown", "keyword", normalizer=n3)
m.meta("_all", analyzer=a5)
assert {
"analyzer": {
"my_analyzer1": {
"filter": ["lowercase", "my_filter1"],
"tokenizer": "keyword",
"type": "custom",
},
"my_analyzer2": {
"filter": ["my_filter2"],
"tokenizer": "trigram",
"type": "custom",
},
"my_analyzer3": {"tokenizer": "keyword", "type": "custom"},
},
"normalizer": {
"my_normalizer1": {"filter": ["lowercase"], "type": "custom"},
"my_normalizer2": {
"filter": ["my_filter1", "my_filter2", "my_filter3"],
"type": "custom",
},
},
"filter": {
"my_filter1": {"stopwords": ["a", "b"], "type": "stop"},
"my_filter2": {"stopwords": ["c", "d"], "type": "stop"},
"my_filter3": {"stopwords": ["e", "f"], "type": "stop"},
},
"tokenizer": {"trigram": {"max_gram": 3, "min_gram": 3, "type": "nGram"}},
} == m._collect_analysis()
assert json.loads(json.dumps(m.to_dict())) == m.to_dict()
def test_mapping_can_collect_multiple_analyzers():
a1 = analysis.analyzer(
"my_analyzer1",
tokenizer="keyword",
filter=[
"lowercase",
analysis.token_filter("my_filter1", "stop", stopwords=["a", "b"]),
],
)
a2 = analysis.analyzer(
"my_analyzer2",
tokenizer=analysis.tokenizer("trigram", "nGram", min_gram=3, max_gram=3),
filter=[analysis.token_filter("my_filter2", "stop", stopwords=["c", "d"])],
)
m = mapping.Mapping()
m.field("title", "text", analyzer=a1, search_analyzer=a2)
m.field(
"text",
"text",
analyzer=a1,
fields={
"english": Text(analyzer=a1),
"unknown": Keyword(analyzer=a1, search_analyzer=a2),
},
)
assert {
"analyzer": {
"my_analyzer1": {
"filter": ["lowercase", "my_filter1"],
"tokenizer": "keyword",
"type": "custom",
},
"my_analyzer2": {
"filter": ["my_filter2"],
"tokenizer": "trigram",
"type": "custom",
},
},
"filter": {
"my_filter1": {"stopwords": ["a", "b"], "type": "stop"},
"my_filter2": {"stopwords": ["c", "d"], "type": "stop"},
},
"tokenizer": {"trigram": {"max_gram": 3, "min_gram": 3, "type": "nGram"}},
} == m._collect_analysis()
def test_even_non_custom_analyzers_can_have_params():
a1 = analysis.analyzer("whitespace", type="pattern", pattern=r"\\s+")
m = mapping.Mapping()
m.field("title", "text", analyzer=a1)
assert {
"analyzer": {"whitespace": {"type": "pattern", "pattern": r"\\s+"}}
} == m._collect_analysis()
def test_resolve_field_can_resolve_multifields():
m = mapping.Mapping()
m.field("title", "text", fields={"keyword": Keyword()})
assert isinstance(m.resolve_field("title.keyword"), Keyword)
def test_resolve_nested():
m = mapping.Mapping()
m.field("n1", "nested", properties={"n2": Nested(properties={"k1": Keyword()})})
m.field("k2", "keyword")
nested, field = m.resolve_nested("n1.n2.k1")
assert nested == ["n1", "n1.n2"]
assert isinstance(field, Keyword)
nested, field = m.resolve_nested("k2")
assert nested == []
assert isinstance(field, Keyword)
@@ -0,0 +1,564 @@
# SPDX-License-Identifier: Apache-2.0
#
# The OpenSearch Contributors require contributions made to
# this file be licensed under the Apache-2.0 license or a
# compatible open source license.
#
# Modifications Copyright OpenSearch Contributors. See
# GitHub history for details.
#
# Licensed to Elasticsearch B.V. under one or more contributor
# license agreements. See the NOTICE file distributed with
# this work for additional information regarding copyright
# ownership. Elasticsearch B.V. licenses this file to you under
# the Apache License, Version 2.0 (the "License"); you may
# not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing,
# software distributed under the License is distributed on an
# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
# KIND, either express or implied. See the License for the
# specific language governing permissions and limitations
# under the License.
from pytest import raises
from opensearchpy.helpers import function, query
def test_empty_Q_is_match_all():
q = query.Q()
assert isinstance(q, query.MatchAll)
assert query.MatchAll() == q
def test_match_to_dict():
assert {"match": {"f": "value"}} == query.Match(f="value").to_dict()
def test_match_to_dict_extra():
assert {"match": {"f": "value", "boost": 2}} == query.Match(
f="value", boost=2
).to_dict()
def test_fuzzy_to_dict():
assert {"fuzzy": {"f": "value"}} == query.Fuzzy(f="value").to_dict()
def test_prefix_to_dict():
assert {"prefix": {"f": "value"}} == query.Prefix(f="value").to_dict()
def test_term_to_dict():
assert {"term": {"_type": "article"}} == query.Term(_type="article").to_dict()
def test_bool_to_dict():
bool = query.Bool(must=[query.Match(f="value")], should=[])
assert {"bool": {"must": [{"match": {"f": "value"}}]}} == bool.to_dict()
def test_dismax_to_dict():
assert {"dis_max": {"queries": [{"term": {"_type": "article"}}]}} == query.DisMax(
queries=[query.Term(_type="article")]
).to_dict()
def test_bool_from_dict_issue_318():
d = {"bool": {"must_not": {"match": {"field": "value"}}}}
q = query.Q(d)
assert q == ~query.Match(field="value")
def test_repr():
bool = query.Bool(must=[query.Match(f="value")], should=[])
assert "Bool(must=[Match(f='value')])" == repr(bool)
def test_query_clone():
bool = query.Bool(
must=[query.Match(x=42)],
should=[query.Match(g="v2")],
must_not=[query.Match(title="value")],
)
bool_clone = bool._clone()
assert bool == bool_clone
assert bool is not bool_clone
def test_bool_converts_its_init_args_to_queries():
q = query.Bool(must=[{"match": {"f": "value"}}])
assert len(q.must) == 1
assert q.must[0] == query.Match(f="value")
def test_two_queries_make_a_bool():
q1 = query.Match(f="value1")
q2 = query.Match(message={"query": "this is a test", "opeartor": "and"})
q = q1 & q2
assert isinstance(q, query.Bool)
assert [q1, q2] == q.must
def test_other_and_bool_appends_other_to_must():
q1 = query.Match(f="value1")
qb = query.Bool()
q = q1 & qb
assert q is not qb
assert q.must[0] == q1
def test_bool_and_other_appends_other_to_must():
q1 = query.Match(f="value1")
qb = query.Bool()
q = qb & q1
assert q is not qb
assert q.must[0] == q1
def test_bool_and_other_sets_min_should_match_if_needed():
q1 = query.Q("term", category=1)
q2 = query.Q(
"bool", should=[query.Q("term", name="aaa"), query.Q("term", name="bbb")]
)
q = q1 & q2
assert q == query.Bool(
must=[q1],
should=[query.Q("term", name="aaa"), query.Q("term", name="bbb")],
minimum_should_match=1,
)
def test_bool_with_different_minimum_should_match_should_not_be_combined():
q1 = query.Q(
"bool",
minimum_should_match=2,
should=[
query.Q("term", field="aa1"),
query.Q("term", field="aa2"),
query.Q("term", field="aa3"),
query.Q("term", field="aa4"),
],
)
q2 = query.Q(
"bool",
minimum_should_match=3,
should=[
query.Q("term", field="bb1"),
query.Q("term", field="bb2"),
query.Q("term", field="bb3"),
query.Q("term", field="bb4"),
],
)
q3 = query.Q(
"bool",
minimum_should_match=4,
should=[
query.Q("term", field="cc1"),
query.Q("term", field="cc2"),
query.Q("term", field="cc3"),
query.Q("term", field="cc4"),
],
)
q4 = q1 | q2
assert q4 == query.Bool(should=[q1, q2])
q5 = q1 | q2 | q3
assert q5 == query.Bool(should=[q1, q2, q3])
def test_empty_bool_has_min_should_match_0():
assert 0 == query.Bool()._min_should_match
def test_query_and_query_creates_bool():
q1 = query.Match(f=42)
q2 = query.Match(g=47)
q = q1 & q2
assert isinstance(q, query.Bool)
assert q.must == [q1, q2]
def test_match_all_and_query_equals_other():
q1 = query.Match(f=42)
q2 = query.MatchAll()
q = q1 & q2
assert q1 == q
def test_not_match_all_is_match_none():
q = query.MatchAll()
assert ~q == query.MatchNone()
def test_not_match_none_is_match_all():
q = query.MatchNone()
assert ~q == query.MatchAll()
def test_invert_empty_bool_is_match_none():
q = query.Bool()
assert ~q == query.MatchNone()
def test_match_none_or_query_equals_query():
q1 = query.Match(f=42)
q2 = query.MatchNone()
assert q1 | q2 == query.Match(f=42)
def test_match_none_and_query_equals_match_none():
q1 = query.Match(f=42)
q2 = query.MatchNone()
assert q1 & q2 == query.MatchNone()
def test_bool_and_bool():
qt1, qt2, qt3 = query.Match(f=1), query.Match(f=2), query.Match(f=3)
q1 = query.Bool(must=[qt1], should=[qt2])
q2 = query.Bool(must_not=[qt3])
assert q1 & q2 == query.Bool(
must=[qt1], must_not=[qt3], should=[qt2], minimum_should_match=0
)
q1 = query.Bool(must=[qt1], should=[qt1, qt2])
q2 = query.Bool(should=[qt3])
assert q1 & q2 == query.Bool(
must=[qt1, qt3], should=[qt1, qt2], minimum_should_match=0
)
def test_bool_and_bool_with_min_should_match():
qt1, qt2 = query.Match(f=1), query.Match(f=2)
q1 = query.Q("bool", minimum_should_match=1, should=[qt1])
q2 = query.Q("bool", minimum_should_match=1, should=[qt2])
assert query.Q("bool", must=[qt1, qt2]) == q1 & q2
def test_inverted_query_becomes_bool_with_must_not():
q = query.Match(f=42)
assert ~q == query.Bool(must_not=[query.Match(f=42)])
def test_inverted_query_with_must_not_become_should():
q = query.Q("bool", must_not=[query.Q("match", f=1), query.Q("match", f=2)])
assert ~q == query.Q("bool", should=[query.Q("match", f=1), query.Q("match", f=2)])
def test_inverted_query_with_must_and_must_not():
q = query.Q(
"bool",
must=[query.Q("match", f=3), query.Q("match", f=4)],
must_not=[query.Q("match", f=1), query.Q("match", f=2)],
)
print((~q).to_dict())
assert ~q == query.Q(
"bool",
should=[
# negation of must
query.Q("bool", must_not=[query.Q("match", f=3)]),
query.Q("bool", must_not=[query.Q("match", f=4)]),
# negation of must_not
query.Q("match", f=1),
query.Q("match", f=2),
],
)
def test_double_invert_returns_original_query():
q = query.Match(f=42)
assert q == ~~q
def test_bool_query_gets_inverted_internally():
q = query.Bool(must_not=[query.Match(f=42)], must=[query.Match(g="v")])
assert ~q == query.Bool(
should=[
# negating must
query.Bool(must_not=[query.Match(g="v")]),
# negating must_not
query.Match(f=42),
]
)
def test_match_all_or_something_is_match_all():
q1 = query.MatchAll()
q2 = query.Match(f=42)
assert (q1 | q2) == query.MatchAll()
assert (q2 | q1) == query.MatchAll()
def test_or_produces_bool_with_should():
q1 = query.Match(f=42)
q2 = query.Match(g="v")
q = q1 | q2
assert q == query.Bool(should=[q1, q2])
def test_or_bool_doesnt_loop_infinitely_issue_37():
q = query.Match(f=42) | ~query.Match(f=47)
assert q == query.Bool(
should=[query.Bool(must_not=[query.Match(f=47)]), query.Match(f=42)]
)
def test_or_bool_doesnt_loop_infinitely_issue_96():
q = ~query.Match(f=42) | ~query.Match(f=47)
assert q == query.Bool(
should=[
query.Bool(must_not=[query.Match(f=42)]),
query.Bool(must_not=[query.Match(f=47)]),
]
)
def test_bool_will_append_another_query_with_or():
qb = query.Bool(should=[query.Match(f="v"), query.Match(f="v2")])
q = query.Match(g=42)
assert (q | qb) == query.Bool(should=[query.Match(f="v"), query.Match(f="v2"), q])
def test_bool_queries_with_only_should_get_concatenated():
q1 = query.Bool(should=[query.Match(f=1), query.Match(f=2)])
q2 = query.Bool(should=[query.Match(f=3), query.Match(f=4)])
assert (q1 | q2) == query.Bool(
should=[query.Match(f=1), query.Match(f=2), query.Match(f=3), query.Match(f=4)]
)
def test_two_bool_queries_append_one_to_should_if_possible():
q1 = query.Bool(should=[query.Match(f="v")])
q2 = query.Bool(must=[query.Match(f="v")])
assert (q1 | q2) == query.Bool(
should=[query.Match(f="v"), query.Bool(must=[query.Match(f="v")])]
)
assert (q2 | q1) == query.Bool(
should=[query.Match(f="v"), query.Bool(must=[query.Match(f="v")])]
)
def test_queries_are_registered():
assert "match" in query.Query._classes
assert query.Query._classes["match"] is query.Match
def test_defining_query_registers_it():
class MyQuery(query.Query):
name = "my_query"
assert "my_query" in query.Query._classes
assert query.Query._classes["my_query"] is MyQuery
def test_Q_passes_query_through():
q = query.Match(f="value1")
assert query.Q(q) is q
def test_Q_constructs_query_by_name():
q = query.Q("match", f="value")
assert isinstance(q, query.Match)
assert {"f": "value"} == q._params
def test_Q_translates_double_underscore_to_dots_in_param_names():
q = query.Q("match", comment__author="honza")
assert {"comment.author": "honza"} == q._params
def test_Q_doesn_translate_double_underscore_to_dots_in_param_names():
q = query.Q("match", comment__author="honza", _expand__to_dot=False)
assert {"comment__author": "honza"} == q._params
def test_Q_constructs_simple_query_from_dict():
q = query.Q({"match": {"f": "value"}})
assert isinstance(q, query.Match)
assert {"f": "value"} == q._params
def test_Q_constructs_compound_query_from_dict():
q = query.Q({"bool": {"must": [{"match": {"f": "value"}}]}})
assert q == query.Bool(must=[query.Match(f="value")])
def test_Q_raises_error_when_passed_in_dict_and_params():
with raises(Exception):
query.Q({"match": {"f": "value"}}, f="value")
def test_Q_raises_error_when_passed_in_query_and_params():
q = query.Match(f="value1")
with raises(Exception):
query.Q(q, f="value")
def test_Q_raises_error_on_unknown_query():
with raises(Exception):
query.Q("not a query", f="value")
def test_match_all_and_anything_is_anything():
q = query.MatchAll()
s = query.Match(f=42)
assert q & s == s
assert s & q == s
def test_function_score_with_functions():
q = query.Q(
"function_score",
functions=[query.SF("script_score", script="doc['comment_count'] * _score")],
)
assert {
"function_score": {
"functions": [{"script_score": {"script": "doc['comment_count'] * _score"}}]
}
} == q.to_dict()
def test_function_score_with_no_function_is_boost_factor():
q = query.Q(
"function_score",
functions=[query.SF({"weight": 20, "filter": query.Q("term", f=42)})],
)
assert {
"function_score": {"functions": [{"filter": {"term": {"f": 42}}, "weight": 20}]}
} == q.to_dict()
def test_function_score_to_dict():
q = query.Q(
"function_score",
query=query.Q("match", title="python"),
functions=[
query.SF("random_score"),
query.SF(
"field_value_factor",
field="comment_count",
filter=query.Q("term", tags="python"),
),
],
)
d = {
"function_score": {
"query": {"match": {"title": "python"}},
"functions": [
{"random_score": {}},
{
"filter": {"term": {"tags": "python"}},
"field_value_factor": {"field": "comment_count"},
},
],
}
}
assert d == q.to_dict()
def test_function_score_with_single_function():
d = {
"function_score": {
"filter": {"term": {"tags": "python"}},
"script_score": {"script": "doc['comment_count'] * _score"},
}
}
q = query.Q(d)
assert isinstance(q, query.FunctionScore)
assert isinstance(q.filter, query.Term)
assert len(q.functions) == 1
sf = q.functions[0]
assert isinstance(sf, function.ScriptScore)
assert "doc['comment_count'] * _score" == sf.script
def test_function_score_from_dict():
d = {
"function_score": {
"filter": {"term": {"tags": "python"}},
"functions": [
{
"filter": {"terms": {"tags": "python"}},
"script_score": {"script": "doc['comment_count'] * _score"},
},
{"boost_factor": 6},
],
}
}
q = query.Q(d)
assert isinstance(q, query.FunctionScore)
assert isinstance(q.filter, query.Term)
assert len(q.functions) == 2
sf = q.functions[0]
assert isinstance(sf, function.ScriptScore)
assert isinstance(sf.filter, query.Terms)
sf = q.functions[1]
assert isinstance(sf, function.BoostFactor)
assert 6 == sf.value
assert {"boost_factor": 6} == sf.to_dict()
def test_script_score():
d = {
"script_score": {
"query": {"match_all": {}},
"script": {"source": "...", "params": {}},
}
}
q = query.Q(d)
assert isinstance(q, query.ScriptScore)
assert isinstance(q.query, query.MatchAll)
assert q.script == {"source": "...", "params": {}}
assert q.to_dict() == d
@@ -0,0 +1,204 @@
# SPDX-License-Identifier: Apache-2.0
#
# The OpenSearch Contributors require contributions made to
# this file be licensed under the Apache-2.0 license or a
# compatible open source license.
#
# Modifications Copyright OpenSearch Contributors. See
# GitHub history for details.
#
# Licensed to Elasticsearch B.V. under one or more contributor
# license agreements. See the NOTICE file distributed with
# this work for additional information regarding copyright
# ownership. Elasticsearch B.V. licenses this file to you under
# the Apache License, Version 2.0 (the "License"); you may
# not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing,
# software distributed under the License is distributed on an
# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
# KIND, either express or implied. See the License for the
# specific language governing permissions and limitations
# under the License.
import pickle
from datetime import date
from pytest import fixture, raises
from opensearchpy import Date, Document, Object, Search
from opensearchpy.helpers import response
from opensearchpy.helpers.aggs import Terms
from opensearchpy.helpers.response.aggs import AggResponse, Bucket, BucketData
@fixture
def agg_response(aggs_search, aggs_data):
return response.Response(aggs_search, aggs_data)
def test_agg_response_is_pickleable(agg_response):
agg_response.hits
r = pickle.loads(pickle.dumps(agg_response))
assert r == agg_response
assert r._search == agg_response._search
assert r.hits == agg_response.hits
def test_response_is_pickleable(dummy_response):
res = response.Response(Search(), dummy_response)
res.hits
r = pickle.loads(pickle.dumps(res))
assert r == res
assert r._search == res._search
assert r.hits == res.hits
def test_hit_is_pickleable(dummy_response):
res = response.Response(Search(), dummy_response)
hits = pickle.loads(pickle.dumps(res.hits))
assert hits == res.hits
assert hits[0].meta == res.hits[0].meta
def test_response_stores_search(dummy_response):
s = Search()
r = response.Response(s, dummy_response)
assert r._search is s
def test_interactive_helpers(dummy_response):
res = response.Response(Search(), dummy_response)
hits = res.hits
h = hits[0]
rhits = (
"[<Hit(test-index/opensearch): {}>, <Hit(test-index/42): {}...}}>, "
"<Hit(test-index/47): {}...}}>, <Hit(test-index/53): {{}}>]"
).format(
repr(dummy_response["hits"]["hits"][0]["_source"]),
repr(dummy_response["hits"]["hits"][1]["_source"])[:60],
repr(dummy_response["hits"]["hits"][2]["_source"])[:60],
)
assert res
assert "<Response: %s>" % rhits == repr(res)
assert rhits == repr(hits)
assert {"meta", "city", "name"} == set(dir(h))
assert "<Hit(test-index/opensearch): %r>" % dummy_response["hits"]["hits"][0][
"_source"
] == repr(h)
def test_empty_response_is_false(dummy_response):
dummy_response["hits"]["hits"] = []
res = response.Response(Search(), dummy_response)
assert not res
def test_len_response(dummy_response):
res = response.Response(Search(), dummy_response)
assert len(res) == 4
def test_iterating_over_response_gives_you_hits(dummy_response):
res = response.Response(Search(), dummy_response)
hits = list(h for h in res)
assert res.success()
assert 123 == res.took
assert 4 == len(hits)
assert all(isinstance(h, response.Hit) for h in hits)
h = hits[0]
assert "test-index" == h.meta.index
assert "opensearch" == h.meta.id
assert 12 == h.meta.score
assert hits[1].meta.routing == "opensearch"
def test_hits_get_wrapped_to_contain_additional_attrs(dummy_response):
res = response.Response(Search(), dummy_response)
hits = res.hits
assert 123 == hits.total
assert 12.0 == hits.max_score
def test_hits_provide_dot_and_bracket_access_to_attrs(dummy_response):
res = response.Response(Search(), dummy_response)
h = res.hits[0]
assert "OpenSearch" == h.name
assert "OpenSearch" == h["name"]
assert "Honza" == res.hits[2].name.first
with raises(KeyError):
h["not_there"]
with raises(AttributeError):
h.not_there
def test_slicing_on_response_slices_on_hits(dummy_response):
res = response.Response(Search(), dummy_response)
assert res[0] is res.hits[0]
assert res[::-1] == res.hits[::-1]
def test_aggregation_base(agg_response):
assert agg_response.aggs is agg_response.aggregations
assert isinstance(agg_response.aggs, response.AggResponse)
def test_metric_agg_works(agg_response):
assert 25052.0 == agg_response.aggs.sum_lines.value
def test_aggregations_can_be_iterated_over(agg_response):
aggs = [a for a in agg_response.aggs]
assert len(aggs) == 3
assert all(map(lambda a: isinstance(a, AggResponse), aggs))
def test_aggregations_can_be_retrieved_by_name(agg_response, aggs_search):
a = agg_response.aggs["popular_files"]
assert isinstance(a, BucketData)
assert isinstance(a._meta["aggs"], Terms)
assert a._meta["aggs"] is aggs_search.aggs.aggs["popular_files"]
def test_bucket_response_can_be_iterated_over(agg_response):
popular_files = agg_response.aggregations.popular_files
buckets = [b for b in popular_files]
assert all(isinstance(b, Bucket) for b in buckets)
assert buckets == popular_files.buckets
def test_bucket_keys_get_deserialized(aggs_data, aggs_search):
class Commit(Document):
info = Object(properties={"committed_date": Date()})
class Index:
name = "test-commit"
aggs_search = aggs_search.doc_type(Commit)
agg_response = response.Response(aggs_search, aggs_data)
per_month = agg_response.aggregations.per_month
for b in per_month:
assert isinstance(b.key, date)
@@ -0,0 +1,589 @@
# SPDX-License-Identifier: Apache-2.0
#
# The OpenSearch Contributors require contributions made to
# this file be licensed under the Apache-2.0 license or a
# compatible open source license.
#
# Modifications Copyright OpenSearch Contributors. See
# GitHub history for details.
#
# Licensed to Elasticsearch B.V. under one or more contributor
# license agreements. See the NOTICE file distributed with
# this work for additional information regarding copyright
# ownership. Elasticsearch B.V. licenses this file to you under
# the Apache License, Version 2.0 (the "License"); you may
# not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing,
# software distributed under the License is distributed on an
# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
# KIND, either express or implied. See the License for the
# specific language governing permissions and limitations
# under the License.
from copy import deepcopy
from pytest import raises
from opensearchpy import Document, Q
from opensearchpy.exceptions import IllegalOperation
from opensearchpy.helpers import query, search
def test_expand__to_dot_is_respected():
s = search.Search().query("match", a__b=42, _expand__to_dot=False)
assert {"query": {"match": {"a__b": 42}}} == s.to_dict()
def test_execute_uses_cache():
s = search.Search()
r = object()
s._response = r
assert r is s.execute()
def test_cache_can_be_ignored(mock_client):
s = search.Search(using="mock")
r = object()
s._response = r
s.execute(ignore_cache=True)
mock_client.search.assert_called_once_with(index=None, body={})
def test_iter_iterates_over_hits():
s = search.Search()
s._response = [1, 2, 3]
assert [1, 2, 3] == list(s)
def test_cache_isnt_cloned():
s = search.Search()
s._response = object()
assert not hasattr(s._clone(), "_response")
def test_search_starts_with_no_query():
s = search.Search()
assert s.query._proxied is None
def test_search_query_combines_query():
s = search.Search()
s2 = s.query("match", f=42)
assert s2.query._proxied == query.Match(f=42)
assert s.query._proxied is None
s3 = s2.query("match", f=43)
assert s2.query._proxied == query.Match(f=42)
assert s3.query._proxied == query.Bool(must=[query.Match(f=42), query.Match(f=43)])
def test_query_can_be_assigned_to():
s = search.Search()
q = Q("match", title="python")
s.query = q
assert s.query._proxied is q
def test_query_can_be_wrapped():
s = search.Search().query("match", title="python")
s.query = Q("function_score", query=s.query, field_value_factor={"field": "rating"})
assert {
"query": {
"function_score": {
"functions": [{"field_value_factor": {"field": "rating"}}],
"query": {"match": {"title": "python"}},
}
}
} == s.to_dict()
def test_using():
o = object()
o2 = object()
s = search.Search(using=o)
assert s._using is o
s2 = s.using(o2)
assert s._using is o
assert s2._using is o2
def test_methods_are_proxied_to_the_query():
s = search.Search().query("match_all")
assert s.query.to_dict() == {"match_all": {}}
def test_query_always_returns_search():
s = search.Search()
assert isinstance(s.query("match", f=42), search.Search)
def test_source_copied_on_clone():
s = search.Search().source(False)
assert s._clone()._source == s._source
assert s._clone()._source is False
s2 = search.Search().source([])
assert s2._clone()._source == s2._source
assert s2._source == []
s3 = search.Search().source(["some", "fields"])
assert s3._clone()._source == s3._source
assert s3._clone()._source == ["some", "fields"]
def test_copy_clones():
from copy import copy
s1 = search.Search().source(["some", "fields"])
s2 = copy(s1)
assert s1 == s2
assert s1 is not s2
def test_aggs_allow_two_metric():
s = search.Search()
s.aggs.metric("a", "max", field="a").metric("b", "max", field="b")
assert s.to_dict() == {
"aggs": {"a": {"max": {"field": "a"}}, "b": {"max": {"field": "b"}}}
}
def test_aggs_get_copied_on_change():
s = search.Search().query("match_all")
s.aggs.bucket("per_tag", "terms", field="f").metric(
"max_score", "max", field="score"
)
s2 = s.query("match_all")
s2.aggs.bucket("per_month", "date_histogram", field="date", interval="month")
s3 = s2.query("match_all")
s3.aggs["per_month"].metric("max_score", "max", field="score")
s4 = s3._clone()
s4.aggs.metric("max_score", "max", field="score")
d = {
"query": {"match_all": {}},
"aggs": {
"per_tag": {
"terms": {"field": "f"},
"aggs": {"max_score": {"max": {"field": "score"}}},
}
},
}
assert d == s.to_dict()
d["aggs"]["per_month"] = {"date_histogram": {"field": "date", "interval": "month"}}
assert d == s2.to_dict()
d["aggs"]["per_month"]["aggs"] = {"max_score": {"max": {"field": "score"}}}
assert d == s3.to_dict()
d["aggs"]["max_score"] = {"max": {"field": "score"}}
assert d == s4.to_dict()
def test_search_index():
s = search.Search(index="i")
assert s._index == ["i"]
s = s.index("i2")
assert s._index == ["i", "i2"]
s = s.index("i3")
assert s._index == ["i", "i2", "i3"]
s = s.index()
assert s._index is None
s = search.Search(index=("i", "i2"))
assert s._index == ["i", "i2"]
s = search.Search(index=["i", "i2"])
assert s._index == ["i", "i2"]
s = search.Search()
s = s.index("i", "i2")
assert s._index == ["i", "i2"]
s2 = s.index("i3")
assert s._index == ["i", "i2"]
assert s2._index == ["i", "i2", "i3"]
s = search.Search()
s = s.index(["i", "i2"], "i3")
assert s._index == ["i", "i2", "i3"]
s2 = s.index("i4")
assert s._index == ["i", "i2", "i3"]
assert s2._index == ["i", "i2", "i3", "i4"]
s2 = s.index(["i4"])
assert s2._index == ["i", "i2", "i3", "i4"]
s2 = s.index(("i4", "i5"))
assert s2._index == ["i", "i2", "i3", "i4", "i5"]
def test_doc_type_document_class():
class MyDocument(Document):
pass
s = search.Search(doc_type=MyDocument)
assert s._doc_type == [MyDocument]
assert s._doc_type_map == {}
s = search.Search().doc_type(MyDocument)
assert s._doc_type == [MyDocument]
assert s._doc_type_map == {}
def test_sort():
s = search.Search()
s = s.sort("fielda", "-fieldb")
assert ["fielda", {"fieldb": {"order": "desc"}}] == s._sort
assert {"sort": ["fielda", {"fieldb": {"order": "desc"}}]} == s.to_dict()
s = s.sort()
assert [] == s._sort
assert search.Search().to_dict() == s.to_dict()
def test_sort_by_score():
s = search.Search()
s = s.sort("_score")
assert {"sort": ["_score"]} == s.to_dict()
s = search.Search()
with raises(IllegalOperation):
s.sort("-_score")
def test_slice():
s = search.Search()
assert {"from": 3, "size": 7} == s[3:10].to_dict()
assert {"from": 0, "size": 5} == s[:5].to_dict()
assert {"from": 3, "size": 10} == s[3:].to_dict()
assert {"from": 0, "size": 0} == s[0:0].to_dict()
assert {"from": 20, "size": 0} == s[20:0].to_dict()
def test_index():
s = search.Search()
assert {"from": 3, "size": 1} == s[3].to_dict()
def test_search_to_dict():
s = search.Search()
assert {} == s.to_dict()
s = s.query("match", f=42)
assert {"query": {"match": {"f": 42}}} == s.to_dict()
assert {"query": {"match": {"f": 42}}, "size": 10} == s.to_dict(size=10)
s.aggs.bucket("per_tag", "terms", field="f").metric(
"max_score", "max", field="score"
)
d = {
"aggs": {
"per_tag": {
"terms": {"field": "f"},
"aggs": {"max_score": {"max": {"field": "score"}}},
}
},
"query": {"match": {"f": 42}},
}
assert d == s.to_dict()
s = search.Search(extra={"size": 5})
assert {"size": 5} == s.to_dict()
s = s.extra(from_=42)
assert {"size": 5, "from": 42} == s.to_dict()
def test_complex_example():
s = search.Search()
s = (
s.query("match", title="python")
.query(~Q("match", title="ruby"))
.filter(Q("term", category="meetup") | Q("term", category="conference"))
.post_filter("terms", tags=["prague", "czech"])
.script_fields(more_attendees="doc['attendees'].value + 42")
)
s.aggs.bucket("per_country", "terms", field="country").metric(
"avg_attendees", "avg", field="attendees"
)
s.query.minimum_should_match = 2
s = s.highlight_options(order="score").highlight("title", "body", fragment_size=50)
assert {
"query": {
"bool": {
"filter": [
{
"bool": {
"should": [
{"term": {"category": "meetup"}},
{"term": {"category": "conference"}},
]
}
}
],
"must": [{"match": {"title": "python"}}],
"must_not": [{"match": {"title": "ruby"}}],
"minimum_should_match": 2,
}
},
"post_filter": {"terms": {"tags": ["prague", "czech"]}},
"aggs": {
"per_country": {
"terms": {"field": "country"},
"aggs": {"avg_attendees": {"avg": {"field": "attendees"}}},
}
},
"highlight": {
"order": "score",
"fields": {"title": {"fragment_size": 50}, "body": {"fragment_size": 50}},
},
"script_fields": {"more_attendees": {"script": "doc['attendees'].value + 42"}},
} == s.to_dict()
def test_reverse():
d = {
"query": {
"filtered": {
"filter": {
"bool": {
"should": [
{"term": {"category": "meetup"}},
{"term": {"category": "conference"}},
]
}
},
"query": {
"bool": {
"must": [{"match": {"title": "python"}}],
"must_not": [{"match": {"title": "ruby"}}],
"minimum_should_match": 2,
}
},
}
},
"post_filter": {"bool": {"must": [{"terms": {"tags": ["prague", "czech"]}}]}},
"aggs": {
"per_country": {
"terms": {"field": "country"},
"aggs": {"avg_attendees": {"avg": {"field": "attendees"}}},
}
},
"sort": ["title", {"category": {"order": "desc"}}, "_score"],
"size": 5,
"highlight": {"order": "score", "fields": {"title": {"fragment_size": 50}}},
"suggest": {
"my-title-suggestions-1": {
"text": "devloping distibutd saerch engies",
"term": {"size": 3, "field": "title"},
}
},
"script_fields": {"more_attendees": {"script": "doc['attendees'].value + 42"}},
}
d2 = deepcopy(d)
s = search.Search.from_dict(d)
# make sure we haven't modified anything in place
assert d == d2
assert {"size": 5} == s._extra
assert d == s.to_dict()
def test_from_dict_doesnt_need_query():
s = search.Search.from_dict({"size": 5})
assert {"size": 5} == s.to_dict()
def test_params_being_passed_to_search(mock_client):
s = search.Search(using="mock")
s = s.params(routing="42")
s.execute()
mock_client.search.assert_called_once_with(index=None, body={}, routing="42")
def test_source():
assert {} == search.Search().source().to_dict()
assert {
"_source": {"includes": ["foo.bar.*"], "excludes": ["foo.one"]}
} == search.Search().source(includes=["foo.bar.*"], excludes=["foo.one"]).to_dict()
assert {"_source": False} == search.Search().source(False).to_dict()
assert {"_source": ["f1", "f2"]} == search.Search().source(
includes=["foo.bar.*"], excludes=["foo.one"]
).source(["f1", "f2"]).to_dict()
def test_source_on_clone():
assert {
"_source": {"includes": ["foo.bar.*"], "excludes": ["foo.one"]},
"query": {"bool": {"filter": [{"term": {"title": "python"}}]}},
} == search.Search().source(includes=["foo.bar.*"]).source(
excludes=["foo.one"]
).filter(
"term", title="python"
).to_dict()
assert {
"_source": False,
"query": {"bool": {"filter": [{"term": {"title": "python"}}]}},
} == search.Search().source(False).filter("term", title="python").to_dict()
def test_source_on_clear():
assert (
{}
== search.Search()
.source(includes=["foo.bar.*"])
.source(includes=None, excludes=None)
.to_dict()
)
def test_suggest_accepts_global_text():
s = search.Search.from_dict(
{
"suggest": {
"text": "the amsterdma meetpu",
"my-suggest-1": {"term": {"field": "title"}},
"my-suggest-2": {"text": "other", "term": {"field": "body"}},
}
}
)
assert {
"suggest": {
"my-suggest-1": {
"term": {"field": "title"},
"text": "the amsterdma meetpu",
},
"my-suggest-2": {"term": {"field": "body"}, "text": "other"},
}
} == s.to_dict()
def test_suggest():
s = search.Search()
s = s.suggest("my_suggestion", "pyhton", term={"field": "title"})
assert {
"suggest": {"my_suggestion": {"term": {"field": "title"}, "text": "pyhton"}}
} == s.to_dict()
def test_exclude():
s = search.Search()
s = s.exclude("match", title="python")
assert {
"query": {
"bool": {
"filter": [{"bool": {"must_not": [{"match": {"title": "python"}}]}}]
}
}
} == s.to_dict()
def test_delete_by_query(mock_client):
s = search.Search(using="mock").query("match", lang="java")
s.delete()
mock_client.delete_by_query.assert_called_once_with(
index=None, body={"query": {"match": {"lang": "java"}}}
)
def test_update_from_dict():
s = search.Search()
s.update_from_dict({"indices_boost": [{"important-documents": 2}]})
s.update_from_dict({"_source": ["id", "name"]})
assert {
"indices_boost": [{"important-documents": 2}],
"_source": ["id", "name"],
} == s.to_dict()
def test_rescore_query_to_dict():
s = search.Search(index="index-name")
positive_query = Q(
"function_score",
query=Q("term", tags="a"),
script_score={"script": "_score * 1"},
)
negative_query = Q(
"function_score",
query=Q("term", tags="b"),
script_score={"script": "_score * -100"},
)
s = s.query(positive_query)
s = s.extra(
rescore={"window_size": 100, "query": {"rescore_query": negative_query}}
)
assert s.to_dict() == {
"query": {
"function_score": {
"query": {"term": {"tags": "a"}},
"functions": [{"script_score": {"script": "_score * 1"}}],
}
},
"rescore": {
"window_size": 100,
"query": {
"rescore_query": {
"function_score": {
"query": {"term": {"tags": "b"}},
"functions": [{"script_score": {"script": "_score * -100"}}],
}
}
},
},
}
assert s.to_dict(
rescore={"window_size": 10, "query": {"rescore_query": positive_query}}
) == {
"query": {
"function_score": {
"query": {"term": {"tags": "a"}},
"functions": [{"script_score": {"script": "_score * 1"}}],
}
},
"rescore": {
"window_size": 10,
"query": {
"rescore_query": {
"function_score": {
"query": {"term": {"tags": "a"}},
"functions": [{"script_score": {"script": "_score * 1"}}],
}
}
},
},
}
@@ -0,0 +1,182 @@
# SPDX-License-Identifier: Apache-2.0
#
# The OpenSearch Contributors require contributions made to
# this file be licensed under the Apache-2.0 license or a
# compatible open source license.
#
# Modifications Copyright OpenSearch Contributors. See
# GitHub history for details.
#
# Licensed to Elasticsearch B.V. under one or more contributor
# license agreements. See the NOTICE file distributed with
# this work for additional information regarding copyright
# ownership. Elasticsearch B.V. licenses this file to you under
# the Apache License, Version 2.0 (the "License"); you may
# not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing,
# software distributed under the License is distributed on an
# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
# KIND, either express or implied. See the License for the
# specific language governing permissions and limitations
# under the License.
from copy import deepcopy
from opensearchpy import Q, UpdateByQuery
from opensearchpy.helpers.response import UpdateByQueryResponse
def test_ubq_starts_with_no_query():
ubq = UpdateByQuery()
assert ubq.query._proxied is None
def test_ubq_to_dict():
ubq = UpdateByQuery()
assert {} == ubq.to_dict()
ubq = ubq.query("match", f=42)
assert {"query": {"match": {"f": 42}}} == ubq.to_dict()
assert {"query": {"match": {"f": 42}}, "size": 10} == ubq.to_dict(size=10)
ubq = UpdateByQuery(extra={"size": 5})
assert {"size": 5} == ubq.to_dict()
ubq = UpdateByQuery(extra={"extra_q": Q("term", category="conference")})
assert {"extra_q": {"term": {"category": "conference"}}} == ubq.to_dict()
def test_complex_example():
ubq = UpdateByQuery()
ubq = (
ubq.query("match", title="python")
.query(~Q("match", title="ruby"))
.filter(Q("term", category="meetup") | Q("term", category="conference"))
.script(
source="ctx._source.likes += params.f", lang="painless", params={"f": 3}
)
)
ubq.query.minimum_should_match = 2
assert {
"query": {
"bool": {
"filter": [
{
"bool": {
"should": [
{"term": {"category": "meetup"}},
{"term": {"category": "conference"}},
]
}
}
],
"must": [{"match": {"title": "python"}}],
"must_not": [{"match": {"title": "ruby"}}],
"minimum_should_match": 2,
}
},
"script": {
"source": "ctx._source.likes += params.f",
"lang": "painless",
"params": {"f": 3},
},
} == ubq.to_dict()
def test_exclude():
ubq = UpdateByQuery()
ubq = ubq.exclude("match", title="python")
assert {
"query": {
"bool": {
"filter": [{"bool": {"must_not": [{"match": {"title": "python"}}]}}]
}
}
} == ubq.to_dict()
def test_reverse():
d = {
"query": {
"filtered": {
"filter": {
"bool": {
"should": [
{"term": {"category": "meetup"}},
{"term": {"category": "conference"}},
]
}
},
"query": {
"bool": {
"must": [{"match": {"title": "python"}}],
"must_not": [{"match": {"title": "ruby"}}],
"minimum_should_match": 2,
}
},
}
},
"script": {
"source": "ctx._source.likes += params.f",
"lang": "painless",
"params": {"f": 3},
},
}
d2 = deepcopy(d)
ubq = UpdateByQuery.from_dict(d)
assert d == d2
assert d == ubq.to_dict()
def test_from_dict_doesnt_need_query():
ubq = UpdateByQuery.from_dict({"script": {"source": "test"}})
assert {"script": {"source": "test"}} == ubq.to_dict()
def test_params_being_passed_to_search(mock_client):
ubq = UpdateByQuery(using="mock")
ubq = ubq.params(routing="42")
ubq.execute()
mock_client.update_by_query.assert_called_once_with(
index=None, body={}, routing="42"
)
def test_overwrite_script():
ubq = UpdateByQuery()
ubq = ubq.script(
source="ctx._source.likes += params.f", lang="painless", params={"f": 3}
)
assert {
"script": {
"source": "ctx._source.likes += params.f",
"lang": "painless",
"params": {"f": 3},
}
} == ubq.to_dict()
ubq = ubq.script(source="ctx._source.likes++")
assert {"script": {"source": "ctx._source.likes++"}} == ubq.to_dict()
def test_update_by_query_response_success():
ubqr = UpdateByQueryResponse({}, {"timed_out": False, "failures": []})
assert ubqr.success()
ubqr = UpdateByQueryResponse({}, {"timed_out": True, "failures": []})
assert not ubqr.success()
ubqr = UpdateByQueryResponse({}, {"timed_out": False, "failures": [{}]})
assert not ubqr.success()
@@ -0,0 +1,121 @@
# SPDX-License-Identifier: Apache-2.0
#
# The OpenSearch Contributors require contributions made to
# this file be licensed under the Apache-2.0 license or a
# compatible open source license.
#
# Modifications Copyright OpenSearch Contributors. See
# GitHub history for details.
#
# Licensed to Elasticsearch B.V. under one or more contributor
# license agreements. See the NOTICE file distributed with
# this work for additional information regarding copyright
# ownership. Elasticsearch B.V. licenses this file to you under
# the Apache License, Version 2.0 (the "License"); you may
# not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing,
# software distributed under the License is distributed on an
# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
# KIND, either express or implied. See the License for the
# specific language governing permissions and limitations
# under the License.
import pickle
from pytest import raises
from opensearchpy import Q, serializer
from opensearchpy.helpers import utils
def test_attrdict_pickle():
ad = utils.AttrDict({})
pickled_ad = pickle.dumps(ad)
assert ad == pickle.loads(pickled_ad)
def test_attrlist_pickle():
al = utils.AttrList([])
pickled_al = pickle.dumps(al)
assert al == pickle.loads(pickled_al)
def test_attrlist_slice():
class MyAttrDict(utils.AttrDict):
pass
ls = utils.AttrList([{}, {}], obj_wrapper=MyAttrDict)
assert isinstance(ls[:][0], MyAttrDict)
def test_merge():
a = utils.AttrDict({"a": {"b": 42, "c": 47}})
b = {"a": {"b": 123, "d": -12}, "e": [1, 2, 3]}
utils.merge(a, b)
assert a == {"a": {"b": 123, "c": 47, "d": -12}, "e": [1, 2, 3]}
def test_merge_conflict():
for d in (
{"a": 42},
{"a": {"b": 47}},
):
utils.merge({"a": {"b": 42}}, d)
with raises(ValueError):
utils.merge({"a": {"b": 42}}, d, True)
def test_attrdict_bool():
d = utils.AttrDict({})
assert not d
d.title = "Title"
assert d
def test_attrlist_items_get_wrapped_during_iteration():
al = utils.AttrList([1, object(), [1], {}])
ls = list(iter(al))
assert isinstance(ls[2], utils.AttrList)
assert isinstance(ls[3], utils.AttrDict)
def test_serializer_deals_with_Attr_versions():
d = utils.AttrDict({"key": utils.AttrList([1, 2, 3])})
assert serializer.serializer.dumps(d) == serializer.serializer.dumps(
{"key": [1, 2, 3]}
)
def test_serializer_deals_with_objects_with_to_dict():
class MyClass(object):
def to_dict(self):
return 42
assert serializer.serializer.dumps(MyClass()) == "42"
def test_recursive_to_dict():
assert utils.recursive_to_dict({"k": [1, (1.0, {"v": Q("match", key="val")})]}) == {
"k": [1, (1.0, {"v": {"match": {"key": "val"}}})]
}
def test_attrdict_get():
a = utils.AttrDict({"a": {"b": 42, "c": 47}})
assert a.get("a", {}).get("b", 0) == 42
assert a.get("a", {}).get("e", 0) == 0
assert a.get("d", {}) == {}
with raises(AttributeError):
assert a.get("d")
@@ -0,0 +1,171 @@
# SPDX-License-Identifier: Apache-2.0
#
# The OpenSearch Contributors require contributions made to
# this file be licensed under the Apache-2.0 license or a
# compatible open source license.
#
# Modifications Copyright OpenSearch Contributors. See
# GitHub history for details.
#
# Licensed to Elasticsearch B.V. under one or more contributor
# license agreements. See the NOTICE file distributed with
# this work for additional information regarding copyright
# ownership. Elasticsearch B.V. licenses this file to you under
# the Apache License, Version 2.0 (the "License"); you may
# not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing,
# software distributed under the License is distributed on an
# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
# KIND, either express or implied. See the License for the
# specific language governing permissions and limitations
# under the License.
from datetime import datetime
from pytest import raises
from opensearchpy import (
Boolean,
Date,
Document,
InnerDoc,
Integer,
Nested,
Object,
Text,
)
from opensearchpy.exceptions import ValidationException
class Author(InnerDoc):
name = Text(required=True)
email = Text(required=True)
def clean(self):
print(self, type(self), self.name)
if self.name.lower() not in self.email:
raise ValidationException("Invalid email!")
class BlogPost(Document):
authors = Nested(Author, required=True)
created = Date()
inner = Object()
class BlogPostWithStatus(Document):
published = Boolean(required=True)
class AutoNowDate(Date):
def clean(self, data):
if data is None:
data = datetime.now()
return super(AutoNowDate, self).clean(data)
class Log(Document):
timestamp = AutoNowDate(required=True)
data = Text()
def test_required_int_can_be_0():
class DT(Document):
i = Integer(required=True)
dt = DT(i=0)
assert dt.full_clean() is None
def test_required_field_cannot_be_empty_list():
class DT(Document):
i = Integer(required=True)
dt = DT(i=[])
with raises(ValidationException):
dt.full_clean()
def test_validation_works_for_lists_of_values():
class DT(Document):
i = Date(required=True)
dt = DT(i=[datetime.now(), "not date"])
with raises(ValidationException):
dt.full_clean()
dt = DT(i=[datetime.now(), datetime.now()])
assert None is dt.full_clean()
def test_field_with_custom_clean():
ls = Log()
ls.full_clean()
assert isinstance(ls.timestamp, datetime)
def test_empty_object():
d = BlogPost(authors=[{"name": "Guian", "email": "guiang@bitquilltech.com"}])
d.inner = {}
d.full_clean()
def test_missing_required_field_raises_validation_exception():
d = BlogPost()
with raises(ValidationException):
d.full_clean()
d = BlogPost()
d.authors.append({"name": "Guian"})
with raises(ValidationException):
d.full_clean()
d = BlogPost()
d.authors.append({"name": "Guian", "email": "guiang@bitquilltech.com"})
d.full_clean()
def test_boolean_doesnt_treat_false_as_empty():
d = BlogPostWithStatus()
with raises(ValidationException):
d.full_clean()
d.published = False
d.full_clean()
d.published = True
d.full_clean()
def test_custom_validation_on_nested_gets_run():
d = BlogPost(authors=[Author(name="Guian", email="king@example.com")], created=None)
assert isinstance(d.authors[0], Author)
with raises(ValidationException):
d.full_clean()
def test_accessing_known_fields_returns_empty_value():
d = BlogPost()
assert [] == d.authors
d.authors.append({})
assert None is d.authors[0].name
assert None is d.authors[0].email
def test_empty_values_are_not_serialized():
d = BlogPost(
authors=[{"name": "Guian", "email": "guiang@bitquilltech.com"}], created=None
)
d.full_clean()
assert d.to_dict() == {
"authors": [{"name": "Guian", "email": "guiang@bitquilltech.com"}]
}
@@ -0,0 +1,102 @@
# SPDX-License-Identifier: Apache-2.0
#
# The OpenSearch Contributors require contributions made to
# this file be licensed under the Apache-2.0 license or a
# compatible open source license.
#
# Modifications Copyright OpenSearch Contributors. See
# GitHub history for details.
#
# Licensed to Elasticsearch B.V. under one or more contributor
# license agreements. See the NOTICE file distributed with
# this work for additional information regarding copyright
# ownership. Elasticsearch B.V. licenses this file to you under
# the Apache License, Version 2.0 (the "License"); you may
# not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing,
# software distributed under the License is distributed on an
# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
# KIND, either express or implied. See the License for the
# specific language governing permissions and limitations
# under the License.
from datetime import datetime, timedelta
import pytest
from opensearchpy import Range
@pytest.mark.parametrize(
"kwargs, item",
[
({}, 1),
({}, -1),
({"gte": -1}, -1),
({"lte": 4}, 4),
({"lte": 4, "gte": 2}, 4),
({"lte": 4, "gte": 2}, 2),
({"gt": datetime.now() - timedelta(seconds=10)}, datetime.now()),
],
)
def test_range_contains(kwargs, item):
assert item in Range(**kwargs)
@pytest.mark.parametrize(
"kwargs, item",
[
({"gt": -1}, -1),
({"lt": 4}, 4),
({"lt": 4}, 42),
({"lte": 4, "gte": 2}, 1),
({"lte": datetime.now() - timedelta(seconds=10)}, datetime.now()),
],
)
def test_range_not_contains(kwargs, item):
assert item not in Range(**kwargs)
@pytest.mark.parametrize(
"args,kwargs",
[
(({},), {"lt": 42}),
((), {"not_lt": 42}),
((object(),), {}),
((), {"lt": 1, "lte": 1}),
((), {"gt": 1, "gte": 1}),
],
)
def test_range_raises_value_error_on_wrong_params(args, kwargs):
with pytest.raises(ValueError):
Range(*args, **kwargs)
@pytest.mark.parametrize(
"range,lower,inclusive",
[
(Range(gt=1), 1, False),
(Range(gte=1), 1, True),
(Range(), None, False),
(Range(lt=42), None, False),
],
)
def test_range_lower(range, lower, inclusive):
assert (lower, inclusive) == range.lower
@pytest.mark.parametrize(
"range,upper,inclusive",
[
(Range(lt=1), 1, False),
(Range(lte=1), 1, True),
(Range(), None, False),
(Range(gt=42), None, False),
],
)
def test_range_upper(range, upper, inclusive):
assert (upper, inclusive) == range.upper
@@ -0,0 +1,25 @@
# SPDX-License-Identifier: Apache-2.0
#
# The OpenSearch Contributors require contributions made to
# this file be licensed under the Apache-2.0 license or a
# compatible open source license.
#
# Modifications Copyright OpenSearch Contributors. See
# GitHub history for details.
#
# Licensed to Elasticsearch B.V. under one or more contributor
# license agreements. See the NOTICE file distributed with
# this work for additional information regarding copyright
# ownership. Elasticsearch B.V. licenses this file to you under
# the Apache License, Version 2.0 (the "License"); you may
# not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing,
# software distributed under the License is distributed on an
# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
# KIND, either express or implied. See the License for the
# specific language governing permissions and limitations
# under the License.
@@ -0,0 +1,113 @@
# -*- coding: utf-8 -*-
# SPDX-License-Identifier: Apache-2.0
#
# The OpenSearch Contributors require contributions made to
# this file be licensed under the Apache-2.0 license or a
# compatible open source license.
#
# Modifications Copyright OpenSearch Contributors. See
# GitHub history for details.
#
# Licensed to Elasticsearch B.V. under one or more contributor
# license agreements. See the NOTICE file distributed with
# this work for additional information regarding copyright
# ownership. Elasticsearch B.V. licenses this file to you under
# the Apache License, Version 2.0 (the "License"); you may
# not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing,
# software distributed under the License is distributed on an
# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
# KIND, either express or implied. See the License for the
# specific language governing permissions and limitations
# under the License.
import re
from datetime import datetime
from pytest import fixture
from opensearchpy.connection.connections import add_connection
from opensearchpy.helpers import bulk
from opensearchpy.helpers.test import get_test_client
from .test_data import (
DATA,
FLAT_DATA,
TEST_GIT_DATA,
create_flat_git_index,
create_git_index,
)
from .test_document import Comment, History, PullRequest, User
@fixture(scope="session")
def client():
client = get_test_client(verify_certs=False, http_auth=("admin", "admin"))
add_connection("default", client)
return client
@fixture(scope="session")
def opensearch_version(client):
info = client.info()
print(info)
yield tuple(
int(x)
for x in re.match(r"^([0-9.]+)", info["version"]["number"]).group(1).split(".")
)
@fixture
def write_client(client):
yield client
client.indices.delete("test-*", ignore=404)
client.indices.delete_template("test-template", ignore=404)
@fixture(scope="session")
def data_client(client):
# create mappings
create_git_index(client, "git")
create_flat_git_index(client, "flat-git")
# load data
bulk(client, DATA, raise_on_error=True, refresh=True)
bulk(client, FLAT_DATA, raise_on_error=True, refresh=True)
yield client
client.indices.delete("git", ignore=404)
client.indices.delete("flat-git", ignore=404)
@fixture
def pull_request(write_client):
PullRequest.init()
pr = PullRequest(
_id=42,
comments=[
Comment(
content="Hello World!",
author=User(name="honzakral"),
created_at=datetime(2018, 1, 9, 10, 17, 3, 21184),
history=[
History(
timestamp=datetime(2012, 1, 1),
diff="-Ahoj Svete!\n+Hello World!",
)
],
),
],
created_at=datetime(2018, 1, 9, 9, 17, 3, 21184),
)
pr.save(refresh=True)
return pr
@fixture
def setup_ubq_tests(client):
index = "test-git"
create_git_index(client, index)
bulk(client, TEST_GIT_DATA, raise_on_error=True, refresh=True)
return index
@@ -30,8 +30,8 @@ from mock import patch
from opensearchpy import TransportError, helpers
from opensearchpy.helpers import ScanError
from ..test_cases import SkipTest
from . import OpenSearchTestCase
from ...test_cases import SkipTest
from .. import OpenSearchTestCase
class FailingBulkClient(object):
@@ -87,7 +87,7 @@ class TestStreamingBulk(OpenSearchTestCase):
self.client, [{"a": "b"}, {"a": "c"}], index="i", raise_on_error=True
):
self.assertTrue(ok)
except helpers.BulkIndexError as e:
except helpers.errors.BulkIndexError as e:
self.assertEqual(2, len(e.errors))
else:
assert False, "exception should have been raised"
@@ -303,7 +303,7 @@ class TestBulk(OpenSearchTestCase):
self.client.cluster.health(wait_for_status="yellow")
self.assertRaises(
helpers.BulkIndexError,
helpers.errors.BulkIndexError,
helpers.bulk,
self.client,
[{"a": 42}, {"a": "c"}],
@@ -331,7 +331,7 @@ class TestBulk(OpenSearchTestCase):
# ignore only the status code in the `ignore_status` argument
self.assertRaises(
helpers.BulkIndexError,
helpers.errors.BulkIndexError,
helpers.bulk,
self.client,
[{"a": 42}, {"a": "c"}],
@@ -0,0 +1,55 @@
# SPDX-License-Identifier: Apache-2.0
#
# The OpenSearch Contributors require contributions made to
# this file be licensed under the Apache-2.0 license or a
# compatible open source license.
#
# Modifications Copyright OpenSearch Contributors. See
# GitHub history for details.
#
# Licensed to Elasticsearch B.V. under one or more contributor
# license agreements. See the NOTICE file distributed with
# this work for additional information regarding copyright
# ownership. Elasticsearch B.V. licenses this file to you under
# the Apache License, Version 2.0 (the "License"); you may
# not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing,
# software distributed under the License is distributed on an
# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
# KIND, either express or implied. See the License for the
# specific language governing permissions and limitations
# under the License.
from opensearchpy import analyzer, token_filter, tokenizer
def test_simulate_with_just__builtin_tokenizer(client):
a = analyzer("my-analyzer", tokenizer="keyword")
tokens = a.simulate("Hello World!", using=client).tokens
assert len(tokens) == 1
assert tokens[0].token == "Hello World!"
def test_simulate_complex(client):
a = analyzer(
"my-analyzer",
tokenizer=tokenizer("split_words", "simple_pattern_split", pattern=":"),
filter=["lowercase", token_filter("no-ifs", "stop", stopwords=["if"])],
)
tokens = a.simulate("if:this:works", using=client).tokens
assert len(tokens) == 2
assert ["this", "works"] == [t.token for t in tokens]
def test_simulate_builtin(client):
a = analyzer("my-analyzer", "english")
tokens = a.simulate("fixes running").tokens
assert ["fix", "run"] == [t.token for t in tokens]
@@ -0,0 +1,51 @@
# SPDX-License-Identifier: Apache-2.0
#
# The OpenSearch Contributors require contributions made to
# this file be licensed under the Apache-2.0 license or a
# compatible open source license.
#
# Modifications Copyright OpenSearch Contributors. See
# GitHub history for details.
#
# Licensed to Elasticsearch B.V. under one or more contributor
# license agreements. See the NOTICE file distributed with
# this work for additional information regarding copyright
# ownership. Elasticsearch B.V. licenses this file to you under
# the Apache License, Version 2.0 (the "License"); you may
# not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing,
# software distributed under the License is distributed on an
# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
# KIND, either express or implied. See the License for the
# specific language governing permissions and limitations
# under the License.
from opensearchpy.helpers.search import Q, Search
def test_count_all(data_client):
s = Search(using=data_client).index("git")
assert 53 == s.count()
def test_count_prefetch(data_client, mocker):
mocker.spy(data_client, "count")
search = Search(using=data_client).index("git")
search.execute()
assert search.count() == 53
assert data_client.count.call_count == 0
search._response.hits.total.relation = "gte"
assert search.count() == 53
assert data_client.count.call_count == 1
def test_count_filter(data_client):
s = Search(using=data_client).index("git").filter(~Q("exists", field="parent_shas"))
# initial commit + repo document
assert 2 == s.count()
File diff suppressed because it is too large Load Diff
@@ -0,0 +1,566 @@
# SPDX-License-Identifier: Apache-2.0
#
# The OpenSearch Contributors require contributions made to
# this file be licensed under the Apache-2.0 license or a
# compatible open source license.
#
# Modifications Copyright OpenSearch Contributors. See
# GitHub history for details.
#
# Licensed to Elasticsearch B.V. under one or more contributor
# license agreements. See the NOTICE file distributed with
# this work for additional information regarding copyright
# ownership. Elasticsearch B.V. licenses this file to you under
# the Apache License, Version 2.0 (the "License"); you may
# not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing,
# software distributed under the License is distributed on an
# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
# KIND, either express or implied. See the License for the
# specific language governing permissions and limitations
# under the License.
from datetime import datetime
from ipaddress import ip_address
import pytest
from pytest import raises
from pytz import timezone
from opensearchpy import (
Binary,
Boolean,
ConflictError,
Date,
Document,
Double,
InnerDoc,
Ip,
Keyword,
Long,
Mapping,
MetaField,
Nested,
NotFoundError,
Object,
Q,
RankFeatures,
Text,
analyzer,
)
from opensearchpy.helpers.utils import AttrList
snowball = analyzer("my_snow", tokenizer="standard", filter=["lowercase", "snowball"])
class User(InnerDoc):
name = Text(fields={"raw": Keyword()})
class Wiki(Document):
owner = Object(User)
views = Long()
ranked = RankFeatures()
class Index:
name = "test-wiki"
class Repository(Document):
owner = Object(User)
created_at = Date()
description = Text(analyzer=snowball)
tags = Keyword()
@classmethod
def search(cls):
return super(Repository, cls).search().filter("term", commit_repo="repo")
class Index:
name = "git"
class Commit(Document):
committed_date = Date()
authored_date = Date()
description = Text(analyzer=snowball)
class Index:
name = "flat-git"
class Meta:
mapping = Mapping()
class History(InnerDoc):
timestamp = Date()
diff = Text()
class Comment(InnerDoc):
content = Text()
created_at = Date()
author = Object(User)
history = Nested(History)
class Meta:
dynamic = MetaField(False)
class PullRequest(Document):
comments = Nested(Comment)
created_at = Date()
class Index:
name = "test-prs"
class SerializationDoc(Document):
i = Long()
b = Boolean()
d = Double()
bin = Binary()
ip = Ip()
class Index:
name = "test-serialization"
def test_serialization(write_client):
SerializationDoc.init()
write_client.index(
index="test-serialization",
id=42,
body={
"i": [1, 2, "3", None],
"b": [True, False, "true", "false", None],
"d": [0.1, "-0.1", None],
"bin": ["SGVsbG8gV29ybGQ=", None],
"ip": ["::1", "127.0.0.1", None],
},
)
sd = SerializationDoc.get(id=42)
assert sd.i == [1, 2, 3, None]
assert sd.b == [True, False, True, False, None]
assert sd.d == [0.1, -0.1, None]
assert sd.bin == [b"Hello World", None]
assert sd.ip == [ip_address("::1"), ip_address("127.0.0.1"), None]
assert sd.to_dict() == {
"b": [True, False, True, False, None],
"bin": ["SGVsbG8gV29ybGQ=", None],
"d": [0.1, -0.1, None],
"i": [1, 2, 3, None],
"ip": ["::1", "127.0.0.1", None],
}
def test_nested_inner_hits_are_wrapped_properly(pull_request):
history_query = Q(
"nested",
path="comments.history",
inner_hits={},
query=Q("match", comments__history__diff="ahoj"),
)
s = PullRequest.search().query(
"nested", inner_hits={}, path="comments", query=history_query
)
response = s.execute()
pr = response.hits[0]
assert isinstance(pr, PullRequest)
assert isinstance(pr.comments[0], Comment)
assert isinstance(pr.comments[0].history[0], History)
comment = pr.meta.inner_hits.comments.hits[0]
assert isinstance(comment, Comment)
assert comment.author.name == "honzakral"
assert isinstance(comment.history[0], History)
history = comment.meta.inner_hits["comments.history"].hits[0]
assert isinstance(history, History)
assert history.timestamp == datetime(2012, 1, 1)
assert "score" in history.meta
def test_nested_inner_hits_are_deserialized_properly(pull_request):
s = PullRequest.search().query(
"nested",
inner_hits={},
path="comments",
query=Q("match", comments__content="hello"),
)
response = s.execute()
pr = response.hits[0]
assert isinstance(pr.created_at, datetime)
assert isinstance(pr.comments[0], Comment)
assert isinstance(pr.comments[0].created_at, datetime)
def test_nested_top_hits_are_wrapped_properly(pull_request):
s = PullRequest.search()
s.aggs.bucket("comments", "nested", path="comments").metric(
"hits", "top_hits", size=1
)
r = s.execute()
print(r._d_)
assert isinstance(r.aggregations.comments.hits.hits[0], Comment)
def test_update_object_field(write_client):
Wiki.init()
w = Wiki(
owner=User(name="Honza Kral"),
_id="opensearch-py",
ranked={"test1": 0.1, "topic2": 0.2},
)
w.save()
assert "updated" == w.update(owner=[{"name": "Honza"}, {"name": "Nick"}])
assert w.owner[0].name == "Honza"
assert w.owner[1].name == "Nick"
w = Wiki.get(id="opensearch-py")
assert w.owner[0].name == "Honza"
assert w.owner[1].name == "Nick"
assert w.ranked == {"test1": 0.1, "topic2": 0.2}
def test_update_script(write_client):
Wiki.init()
w = Wiki(owner=User(name="Honza Kral"), _id="opensearch-py", views=42)
w.save()
w.update(script="ctx._source.views += params.inc", inc=5)
w = Wiki.get(id="opensearch-py")
assert w.views == 47
def test_update_retry_on_conflict(write_client):
Wiki.init()
w = Wiki(owner=User(name="Honza Kral"), _id="opensearch-py", views=42)
w.save()
w1 = Wiki.get(id="opensearch-py")
w2 = Wiki.get(id="opensearch-py")
w1.update(script="ctx._source.views += params.inc", inc=5, retry_on_conflict=1)
w2.update(script="ctx._source.views += params.inc", inc=5, retry_on_conflict=1)
w = Wiki.get(id="opensearch-py")
assert w.views == 52
@pytest.mark.parametrize("retry_on_conflict", [None, 0])
def test_update_conflicting_version(write_client, retry_on_conflict):
Wiki.init()
w = Wiki(owner=User(name="Honza Kral"), _id="opensearch-py", views=42)
w.save()
w1 = Wiki.get(id="opensearch-py")
w2 = Wiki.get(id="opensearch-py")
w1.update(script="ctx._source.views += params.inc", inc=5)
with raises(ConflictError):
w2.update(
script="ctx._source.views += params.inc",
inc=5,
retry_on_conflict=retry_on_conflict,
)
def test_save_and_update_return_doc_meta(write_client):
Wiki.init()
w = Wiki(owner=User(name="Honza Kral"), _id="opensearch-py", views=42)
resp = w.save(return_doc_meta=True)
assert resp["_index"] == "test-wiki"
assert resp["result"] == "created"
assert resp.keys().__contains__("_id")
assert resp.keys().__contains__("_primary_term")
assert resp.keys().__contains__("_seq_no")
assert resp.keys().__contains__("_shards")
assert resp.keys().__contains__("_version")
resp = w.update(
script="ctx._source.views += params.inc", inc=5, return_doc_meta=True
)
assert resp["_index"] == "test-wiki"
assert resp["result"] == "updated"
assert resp.keys().__contains__("_id")
assert resp.keys().__contains__("_primary_term")
assert resp.keys().__contains__("_seq_no")
assert resp.keys().__contains__("_shards")
assert resp.keys().__contains__("_version")
def test_init(write_client):
Repository.init(index="test-git")
assert write_client.indices.exists(index="test-git")
def test_get_raises_404_on_index_missing(data_client):
with raises(NotFoundError):
Repository.get("opensearch-dsl-php", index="not-there")
def test_get_raises_404_on_non_existent_id(data_client):
with raises(NotFoundError):
Repository.get("opensearch-dsl-php")
def test_get_returns_none_if_404_ignored(data_client):
assert None is Repository.get("opensearch-dsl-php", ignore=404)
def test_get_returns_none_if_404_ignored_and_index_doesnt_exist(data_client):
assert None is Repository.get("42", index="not-there", ignore=404)
def test_get(data_client):
opensearch_repo = Repository.get("opensearch-py")
assert isinstance(opensearch_repo, Repository)
assert opensearch_repo.owner.name == "opensearch"
assert datetime(2014, 3, 3) == opensearch_repo.created_at
def test_exists_return_true(data_client):
assert Repository.exists("opensearch-py")
def test_exists_false(data_client):
assert not Repository.exists("opensearch-dsl-php")
def test_get_with_tz_date(data_client):
first_commit = Commit.get(
id="3ca6e1e73a071a705b4babd2f581c91a2a3e5037", routing="opensearch-py"
)
tzinfo = timezone("Europe/Prague")
assert (
tzinfo.localize(datetime(2014, 5, 2, 13, 47, 19, 123000))
== first_commit.authored_date
)
def test_save_with_tz_date(data_client):
tzinfo = timezone("Europe/Prague")
first_commit = Commit.get(
id="3ca6e1e73a071a705b4babd2f581c91a2a3e5037", routing="opensearch-py"
)
first_commit.committed_date = tzinfo.localize(
datetime(2014, 5, 2, 13, 47, 19, 123456)
)
first_commit.save()
first_commit = Commit.get(
id="3ca6e1e73a071a705b4babd2f581c91a2a3e5037", routing="opensearch-py"
)
assert (
tzinfo.localize(datetime(2014, 5, 2, 13, 47, 19, 123456))
== first_commit.committed_date
)
COMMIT_DOCS_WITH_MISSING = [
{"_id": "0"}, # Missing
{"_id": "3ca6e1e73a071a705b4babd2f581c91a2a3e5037"}, # Existing
{"_id": "f"}, # Missing
{"_id": "eb3e543323f189fd7b698e66295427204fff5755"}, # Existing
]
def test_mget(data_client):
commits = Commit.mget(COMMIT_DOCS_WITH_MISSING)
assert commits[0] is None
assert commits[1].meta.id == "3ca6e1e73a071a705b4babd2f581c91a2a3e5037"
assert commits[2] is None
assert commits[3].meta.id == "eb3e543323f189fd7b698e66295427204fff5755"
def test_mget_raises_exception_when_missing_param_is_invalid(data_client):
with raises(ValueError):
Commit.mget(COMMIT_DOCS_WITH_MISSING, missing="raj")
def test_mget_raises_404_when_missing_param_is_raise(data_client):
with raises(NotFoundError):
Commit.mget(COMMIT_DOCS_WITH_MISSING, missing="raise")
def test_mget_ignores_missing_docs_when_missing_param_is_skip(data_client):
commits = Commit.mget(COMMIT_DOCS_WITH_MISSING, missing="skip")
assert commits[0].meta.id == "3ca6e1e73a071a705b4babd2f581c91a2a3e5037"
assert commits[1].meta.id == "eb3e543323f189fd7b698e66295427204fff5755"
def test_update_works_from_search_response(data_client):
opensearch_repo = Repository.search().execute()[0]
opensearch_repo.update(owner={"other_name": "opensearchpy"})
assert "opensearchpy" == opensearch_repo.owner.other_name
new_version = Repository.get("opensearch-py")
assert "opensearchpy" == new_version.owner.other_name
assert "opensearch" == new_version.owner.name
def test_update(data_client):
opensearch_repo = Repository.get("opensearch-py")
v = opensearch_repo.meta.version
old_seq_no = opensearch_repo.meta.seq_no
opensearch_repo.update(
owner={"new_name": "opensearchpy"}, new_field="testing-update"
)
assert "opensearchpy" == opensearch_repo.owner.new_name
assert "testing-update" == opensearch_repo.new_field
# assert version has been updated
assert opensearch_repo.meta.version == v + 1
new_version = Repository.get("opensearch-py")
assert "testing-update" == new_version.new_field
assert "opensearchpy" == new_version.owner.new_name
assert "opensearch" == new_version.owner.name
assert "seq_no" in new_version.meta
assert new_version.meta.seq_no != old_seq_no
assert "primary_term" in new_version.meta
def test_save_updates_existing_doc(data_client):
opensearch_repo = Repository.get("opensearch-py")
opensearch_repo.new_field = "testing-save"
old_seq_no = opensearch_repo.meta.seq_no
assert "updated" == opensearch_repo.save()
new_repo = data_client.get(index="git", id="opensearch-py")
assert "testing-save" == new_repo["_source"]["new_field"]
assert new_repo["_seq_no"] != old_seq_no
assert new_repo["_seq_no"] == opensearch_repo.meta.seq_no
def test_save_automatically_uses_seq_no_and_primary_term(data_client):
opensearch_repo = Repository.get("opensearch-py")
opensearch_repo.meta.seq_no += 1
with raises(ConflictError):
opensearch_repo.save()
def test_delete_automatically_uses_seq_no_and_primary_term(data_client):
opensearch_repo = Repository.get("opensearch-py")
opensearch_repo.meta.seq_no += 1
with raises(ConflictError):
opensearch_repo.delete()
def assert_doc_equals(expected, actual):
for f in expected:
assert f in actual
assert actual[f] == expected[f]
def test_can_save_to_different_index(write_client):
test_repo = Repository(description="testing", meta={"id": 42})
assert test_repo.save(index="test-document")
assert_doc_equals(
{
"found": True,
"_index": "test-document",
"_id": "42",
"_source": {"description": "testing"},
},
write_client.get(index="test-document", id=42),
)
def test_save_without_skip_empty_will_include_empty_fields(write_client):
test_repo = Repository(field_1=[], field_2=None, field_3={}, meta={"id": 42})
assert test_repo.save(index="test-document", skip_empty=False)
assert_doc_equals(
{
"found": True,
"_index": "test-document",
"_id": "42",
"_source": {"field_1": [], "field_2": None, "field_3": {}},
},
write_client.get(index="test-document", id=42),
)
def test_delete(write_client):
write_client.create(
index="test-document",
id="opensearch-py",
body={
"organization": "opensearch",
"created_at": "2014-03-03",
"owner": {"name": "opensearch"},
},
)
test_repo = Repository(meta={"id": "opensearch-py"})
test_repo.meta.index = "test-document"
test_repo.delete()
assert not write_client.exists(
index="test-document",
id="opensearch-py",
)
def test_search(data_client):
assert Repository.search().count() == 1
def test_search_returns_proper_doc_classes(data_client):
result = Repository.search().execute()
opensearch_repo = result.hits[0]
assert isinstance(opensearch_repo, Repository)
assert opensearch_repo.owner.name == "opensearch"
def test_refresh_mapping(data_client):
class Commit(Document):
class Index:
name = "git"
Commit._index.load_mappings()
assert "stats" in Commit._index._mapping
assert "committer" in Commit._index._mapping
assert "description" in Commit._index._mapping
assert "committed_date" in Commit._index._mapping
assert isinstance(Commit._index._mapping["committed_date"], Date)
def test_highlight_in_meta(data_client):
commit = (
Commit.search()
.query("match", description="inverting")
.highlight("description")
.execute()[0]
)
assert isinstance(commit, Commit)
assert "description" in commit.meta.highlight
assert isinstance(commit.meta.highlight["description"], AttrList)
assert len(commit.meta.highlight["description"]) > 0
@@ -0,0 +1,287 @@
# SPDX-License-Identifier: Apache-2.0
#
# The OpenSearch Contributors require contributions made to
# this file be licensed under the Apache-2.0 license or a
# compatible open source license.
#
# Modifications Copyright OpenSearch Contributors. See
# GitHub history for details.
#
# Licensed to Elasticsearch B.V. under one or more contributor
# license agreements. See the NOTICE file distributed with
# this work for additional information regarding copyright
# ownership. Elasticsearch B.V. licenses this file to you under
# the Apache License, Version 2.0 (the "License"); you may
# not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing,
# software distributed under the License is distributed on an
# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
# KIND, either express or implied. See the License for the
# specific language governing permissions and limitations
# under the License.
from datetime import datetime
import pytest
from opensearchpy import A, Boolean, Date, Document, Keyword
from opensearchpy.helpers.faceted_search import (
DateHistogramFacet,
FacetedSearch,
NestedFacet,
RangeFacet,
TermsFacet,
)
from .test_document import PullRequest
class Repos(Document):
is_public = Boolean()
created_at = Date()
class Index:
name = "git"
class Commit(Document):
files = Keyword()
committed_date = Date()
class Index:
name = "git"
class MetricSearch(FacetedSearch):
index = "git"
doc_types = [Commit]
facets = {
"files": TermsFacet(field="files", metric=A("max", field="committed_date")),
}
@pytest.fixture(scope="session")
def commit_search_cls(opensearch_version):
interval_kwargs = {"fixed_interval": "1d"}
class CommitSearch(FacetedSearch):
index = "flat-git"
fields = (
"description",
"files",
)
facets = {
"files": TermsFacet(field="files"),
"frequency": DateHistogramFacet(
field="authored_date", min_doc_count=1, **interval_kwargs
),
"deletions": RangeFacet(
field="stats.deletions",
ranges=[("ok", (None, 1)), ("good", (1, 5)), ("better", (5, None))],
),
}
return CommitSearch
@pytest.fixture(scope="session")
def repo_search_cls(opensearch_version):
interval_type = "calendar_interval"
class RepoSearch(FacetedSearch):
index = "git"
doc_types = [Repos]
facets = {
"public": TermsFacet(field="is_public"),
"created": DateHistogramFacet(
field="created_at", **{interval_type: "month"}
),
}
def search(self):
s = super(RepoSearch, self).search()
return s.filter("term", commit_repo="repo")
return RepoSearch
@pytest.fixture(scope="session")
def pr_search_cls(opensearch_version):
interval_type = "calendar_interval"
class PRSearch(FacetedSearch):
index = "test-prs"
doc_types = [PullRequest]
facets = {
"comments": NestedFacet(
"comments",
DateHistogramFacet(
field="comments.created_at", **{interval_type: "month"}
),
)
}
return PRSearch
def test_facet_with_custom_metric(data_client):
ms = MetricSearch()
r = ms.execute()
dates = [f[1] for f in r.facets.files]
assert dates == list(sorted(dates, reverse=True))
assert dates[0] == 1399038439000
def test_nested_facet(pull_request, pr_search_cls):
prs = pr_search_cls()
r = prs.execute()
assert r.hits.total.value == 1
assert [(datetime(2018, 1, 1, 0, 0), 1, False)] == r.facets.comments
def test_nested_facet_with_filter(pull_request, pr_search_cls):
prs = pr_search_cls(filters={"comments": datetime(2018, 1, 1, 0, 0)})
r = prs.execute()
assert r.hits.total.value == 1
assert [(datetime(2018, 1, 1, 0, 0), 1, True)] == r.facets.comments
prs = pr_search_cls(filters={"comments": datetime(2018, 2, 1, 0, 0)})
r = prs.execute()
assert not r.hits
def test_datehistogram_facet(data_client, repo_search_cls):
rs = repo_search_cls()
r = rs.execute()
assert r.hits.total.value == 1
assert [(datetime(2014, 3, 1, 0, 0), 1, False)] == r.facets.created
def test_boolean_facet(data_client, repo_search_cls):
rs = repo_search_cls()
r = rs.execute()
assert r.hits.total.value == 1
assert [(True, 1, False)] == r.facets.public
value, count, selected = r.facets.public[0]
assert value is True
def test_empty_search_finds_everything(
data_client, opensearch_version, commit_search_cls
):
cs = commit_search_cls()
r = cs.execute()
assert r.hits.total.value == 52
assert [
("opensearchpy", 39, False),
("test_opensearchpy", 35, False),
("test_opensearchpy/test_dsl", 35, False),
("opensearchpy/query.py", 18, False),
("test_opensearchpy/test_dsl/test_search.py", 15, False),
("opensearchpy/utils.py", 14, False),
("test_opensearchpy/test_dsl/test_query.py", 13, False),
("opensearchpy/search.py", 12, False),
("opensearchpy/aggs.py", 11, False),
("test_opensearchpy/test_dsl/test_result.py", 5, False),
] == r.facets.files
assert [
(datetime(2014, 3, 3, 0, 0), 2, False),
(datetime(2014, 3, 4, 0, 0), 1, False),
(datetime(2014, 3, 5, 0, 0), 3, False),
(datetime(2014, 3, 6, 0, 0), 3, False),
(datetime(2014, 3, 7, 0, 0), 9, False),
(datetime(2014, 3, 10, 0, 0), 2, False),
(datetime(2014, 3, 15, 0, 0), 4, False),
(datetime(2014, 3, 21, 0, 0), 2, False),
(datetime(2014, 3, 23, 0, 0), 2, False),
(datetime(2014, 3, 24, 0, 0), 10, False),
(datetime(2014, 4, 20, 0, 0), 2, False),
(datetime(2014, 4, 22, 0, 0), 2, False),
(datetime(2014, 4, 25, 0, 0), 3, False),
(datetime(2014, 4, 26, 0, 0), 2, False),
(datetime(2014, 4, 27, 0, 0), 2, False),
(datetime(2014, 5, 1, 0, 0), 2, False),
(datetime(2014, 5, 2, 0, 0), 1, False),
] == r.facets.frequency
assert [
("ok", 19, False),
("good", 14, False),
("better", 19, False),
] == r.facets.deletions
def test_term_filters_are_shown_as_selected_and_data_is_filtered(
data_client, commit_search_cls
):
cs = commit_search_cls(filters={"files": "test_opensearchpy/test_dsl"})
r = cs.execute()
assert 35 == r.hits.total.value
assert [
("opensearchpy", 39, False),
("test_opensearchpy", 35, False),
("test_opensearchpy/test_dsl", 35, True),
("opensearchpy/query.py", 18, False),
("test_opensearchpy/test_dsl/test_search.py", 15, False),
("opensearchpy/utils.py", 14, False),
("test_opensearchpy/test_dsl/test_query.py", 13, False),
("opensearchpy/search.py", 12, False),
("opensearchpy/aggs.py", 11, False),
("test_opensearchpy/test_dsl/test_result.py", 5, False),
] == r.facets.files
assert [
(datetime(2014, 3, 3, 0, 0), 1, False),
(datetime(2014, 3, 5, 0, 0), 2, False),
(datetime(2014, 3, 6, 0, 0), 3, False),
(datetime(2014, 3, 7, 0, 0), 6, False),
(datetime(2014, 3, 10, 0, 0), 1, False),
(datetime(2014, 3, 15, 0, 0), 3, False),
(datetime(2014, 3, 21, 0, 0), 2, False),
(datetime(2014, 3, 23, 0, 0), 1, False),
(datetime(2014, 3, 24, 0, 0), 7, False),
(datetime(2014, 4, 20, 0, 0), 1, False),
(datetime(2014, 4, 25, 0, 0), 3, False),
(datetime(2014, 4, 26, 0, 0), 2, False),
(datetime(2014, 4, 27, 0, 0), 1, False),
(datetime(2014, 5, 1, 0, 0), 1, False),
(datetime(2014, 5, 2, 0, 0), 1, False),
] == r.facets.frequency
assert [
("ok", 12, False),
("good", 10, False),
("better", 13, False),
] == r.facets.deletions
def test_range_filters_are_shown_as_selected_and_data_is_filtered(
data_client, commit_search_cls
):
cs = commit_search_cls(filters={"deletions": "better"})
r = cs.execute()
assert 19 == r.hits.total.value
def test_pagination(data_client, commit_search_cls):
cs = commit_search_cls()
cs = cs[0:20]
assert 52 == cs.count()
assert 20 == len(cs.execute())
@@ -0,0 +1,122 @@
# SPDX-License-Identifier: Apache-2.0
#
# The OpenSearch Contributors require contributions made to
# this file be licensed under the Apache-2.0 license or a
# compatible open source license.
#
# Modifications Copyright OpenSearch Contributors. See
# GitHub history for details.
#
# Licensed to Elasticsearch B.V. under one or more contributor
# license agreements. See the NOTICE file distributed with
# this work for additional information regarding copyright
# ownership. Elasticsearch B.V. licenses this file to you under
# the Apache License, Version 2.0 (the "License"); you may
# not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing,
# software distributed under the License is distributed on an
# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
# KIND, either express or implied. See the License for the
# specific language governing permissions and limitations
# under the License.
from opensearchpy import Date, Document, Index, IndexTemplate, Text
from opensearchpy.helpers import analysis
class Post(Document):
title = Text(analyzer=analysis.analyzer("my_analyzer", tokenizer="keyword"))
published_from = Date()
def test_index_template_works(write_client):
it = IndexTemplate("test-template", "test-*")
it.document(Post)
it.settings(number_of_replicas=0, number_of_shards=1)
it.save()
i = Index("test-blog")
i.create()
assert {
"test-blog": {
"mappings": {
"properties": {
"title": {"type": "text", "analyzer": "my_analyzer"},
"published_from": {"type": "date"},
}
}
}
} == write_client.indices.get_mapping(index="test-blog")
def test_index_can_be_saved_even_with_settings(write_client):
i = Index("test-blog", using=write_client)
i.settings(number_of_shards=3, number_of_replicas=0)
i.save()
i.settings(number_of_replicas=1)
i.save()
assert (
"1" == i.get_settings()["test-blog"]["settings"]["index"]["number_of_replicas"]
)
def test_index_exists(data_client):
assert Index("git").exists()
assert not Index("not-there").exists()
def test_index_can_be_created_with_settings_and_mappings(write_client):
i = Index("test-blog", using=write_client)
i.document(Post)
i.settings(number_of_replicas=0, number_of_shards=1)
i.create()
assert {
"test-blog": {
"mappings": {
"properties": {
"title": {"type": "text", "analyzer": "my_analyzer"},
"published_from": {"type": "date"},
}
}
}
} == write_client.indices.get_mapping(index="test-blog")
settings = write_client.indices.get_settings(index="test-blog")
assert settings["test-blog"]["settings"]["index"]["number_of_replicas"] == "0"
assert settings["test-blog"]["settings"]["index"]["number_of_shards"] == "1"
assert settings["test-blog"]["settings"]["index"]["analysis"] == {
"analyzer": {"my_analyzer": {"type": "custom", "tokenizer": "keyword"}}
}
def test_delete(write_client):
write_client.indices.create(
index="test-index",
body={"settings": {"number_of_replicas": 0, "number_of_shards": 1}},
)
i = Index("test-index", using=write_client)
i.delete()
assert not write_client.indices.exists(index="test-index")
def test_multiple_indices_with_same_doc_type_work(write_client):
i1 = Index("test-index-1", using=write_client)
i2 = Index("test-index-2", using=write_client)
for i in (i1, i2):
i.document(Post)
i.create()
for i in ("test-index-1", "test-index-2"):
settings = write_client.indices.get_settings(index=i)
assert settings[i]["settings"]["index"]["analysis"] == {
"analyzer": {"my_analyzer": {"type": "custom", "tokenizer": "keyword"}}
}
@@ -0,0 +1,169 @@
# SPDX-License-Identifier: Apache-2.0
#
# The OpenSearch Contributors require contributions made to
# this file be licensed under the Apache-2.0 license or a
# compatible open source license.
#
# Modifications Copyright OpenSearch Contributors. See
# GitHub history for details.
#
# Licensed to Elasticsearch B.V. under one or more contributor
# license agreements. See the NOTICE file distributed with
# this work for additional information regarding copyright
# ownership. Elasticsearch B.V. licenses this file to you under
# the Apache License, Version 2.0 (the "License"); you may
# not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing,
# software distributed under the License is distributed on an
# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
# KIND, either express or implied. See the License for the
# specific language governing permissions and limitations
# under the License.
from pytest import raises
from opensearchpy import exceptions
from opensearchpy.helpers import analysis, mapping
def test_mapping_saved_into_opensearch(write_client):
m = mapping.Mapping()
m.field(
"name", "text", analyzer=analysis.analyzer("my_analyzer", tokenizer="keyword")
)
m.field("tags", "keyword")
m.save("test-mapping", using=write_client)
assert {
"test-mapping": {
"mappings": {
"properties": {
"name": {"type": "text", "analyzer": "my_analyzer"},
"tags": {"type": "keyword"},
}
}
}
} == write_client.indices.get_mapping(index="test-mapping")
def test_mapping_saved_into_opensearch_when_index_already_exists_closed(write_client):
m = mapping.Mapping()
m.field(
"name", "text", analyzer=analysis.analyzer("my_analyzer", tokenizer="keyword")
)
write_client.indices.create(index="test-mapping")
with raises(exceptions.IllegalOperation):
m.save("test-mapping", using=write_client)
write_client.cluster.health(index="test-mapping", wait_for_status="yellow")
write_client.indices.close(index="test-mapping")
m.save("test-mapping", using=write_client)
assert {
"test-mapping": {
"mappings": {
"properties": {"name": {"type": "text", "analyzer": "my_analyzer"}}
}
}
} == write_client.indices.get_mapping(index="test-mapping")
def test_mapping_saved_into_opensearch_when_index_already_exists_with_analysis(
write_client,
):
m = mapping.Mapping()
analyzer = analysis.analyzer("my_analyzer", tokenizer="keyword")
m.field("name", "text", analyzer=analyzer)
new_analysis = analyzer.get_analysis_definition()
new_analysis["analyzer"]["other_analyzer"] = {
"type": "custom",
"tokenizer": "whitespace",
}
write_client.indices.create(
index="test-mapping", body={"settings": {"analysis": new_analysis}}
)
m.field("title", "text", analyzer=analyzer)
m.save("test-mapping", using=write_client)
assert {
"test-mapping": {
"mappings": {
"properties": {
"name": {"type": "text", "analyzer": "my_analyzer"},
"title": {"type": "text", "analyzer": "my_analyzer"},
}
}
}
} == write_client.indices.get_mapping(index="test-mapping")
def test_mapping_gets_updated_from_opensearch(write_client):
write_client.indices.create(
index="test-mapping",
body={
"settings": {"number_of_shards": 1, "number_of_replicas": 0},
"mappings": {
"date_detection": False,
"properties": {
"title": {
"type": "text",
"analyzer": "snowball",
"fields": {"raw": {"type": "keyword"}},
},
"created_at": {"type": "date"},
"comments": {
"type": "nested",
"properties": {
"created": {"type": "date"},
"author": {
"type": "text",
"analyzer": "snowball",
"fields": {"raw": {"type": "keyword"}},
},
},
},
},
},
},
)
m = mapping.Mapping.from_opensearch("test-mapping", using=write_client)
assert ["comments", "created_at", "title"] == list(
sorted(m.properties.properties._d_.keys())
)
assert {
"date_detection": False,
"properties": {
"comments": {
"type": "nested",
"properties": {
"created": {"type": "date"},
"author": {
"analyzer": "snowball",
"fields": {"raw": {"type": "keyword"}},
"type": "text",
},
},
},
"created_at": {"type": "date"},
"title": {
"analyzer": "snowball",
"fields": {"raw": {"type": "keyword"}},
"type": "text",
},
},
} == m.to_dict()
# test same with alias
write_client.indices.put_alias(index="test-mapping", name="test-alias")
m2 = mapping.Mapping.from_opensearch("test-alias", using=write_client)
assert m2.to_dict() == m.to_dict()
@@ -0,0 +1,186 @@
# -*- coding: utf-8 -*-
# SPDX-License-Identifier: Apache-2.0
#
# The OpenSearch Contributors require contributions made to
# this file be licensed under the Apache-2.0 license or a
# compatible open source license.
#
# Modifications Copyright OpenSearch Contributors. See
# GitHub history for details.
#
# Licensed to Elasticsearch B.V. under one or more contributor
# license agreements. See the NOTICE file distributed with
# this work for additional information regarding copyright
# ownership. Elasticsearch B.V. licenses this file to you under
# the Apache License, Version 2.0 (the "License"); you may
# not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing,
# software distributed under the License is distributed on an
# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
# KIND, either express or implied. See the License for the
# specific language governing permissions and limitations
# under the License.
from __future__ import unicode_literals
from pytest import raises
from opensearchpy import (
Date,
Document,
Keyword,
MultiSearch,
Q,
Search,
Text,
TransportError,
)
from opensearchpy.helpers.response import aggs
from .test_data import FLAT_DATA
class Repository(Document):
created_at = Date()
description = Text(analyzer="snowball")
tags = Keyword()
@classmethod
def search(cls):
return super(Repository, cls).search().filter("term", commit_repo="repo")
class Index:
name = "git"
class Commit(Document):
class Index:
name = "flat-git"
def test_filters_aggregation_buckets_are_accessible(data_client):
has_tests_query = Q("term", files="test_opensearchpy/test_dsl")
s = Commit.search()[0:0]
s.aggs.bucket("top_authors", "terms", field="author.name.raw").bucket(
"has_tests", "filters", filters={"yes": has_tests_query, "no": ~has_tests_query}
).metric("lines", "stats", field="stats.lines")
response = s.execute()
assert isinstance(
response.aggregations.top_authors.buckets[0].has_tests.buckets.yes, aggs.Bucket
)
assert (
35
== response.aggregations.top_authors.buckets[0].has_tests.buckets.yes.doc_count
)
assert (
228
== response.aggregations.top_authors.buckets[0].has_tests.buckets.yes.lines.max
)
def test_top_hits_are_wrapped_in_response(data_client):
s = Commit.search()[0:0]
s.aggs.bucket("top_authors", "terms", field="author.name.raw").metric(
"top_commits", "top_hits", size=5
)
response = s.execute()
top_commits = response.aggregations.top_authors.buckets[0].top_commits
assert isinstance(top_commits, aggs.TopHitsData)
assert 5 == len(top_commits)
hits = [h for h in top_commits]
assert 5 == len(hits)
assert isinstance(hits[0], Commit)
def test_inner_hits_are_wrapped_in_response(data_client):
s = Search(index="git")[0:1].query(
"has_parent", parent_type="repo", inner_hits={}, query=Q("match_all")
)
response = s.execute()
commit = response.hits[0]
assert isinstance(commit.meta.inner_hits.repo, response.__class__)
assert repr(commit.meta.inner_hits.repo[0]).startswith("<Hit(git/opensearch-py): ")
def test_scan_respects_doc_types(data_client):
repos = list(Repository.search().scan())
assert 1 == len(repos)
assert isinstance(repos[0], Repository)
assert repos[0].organization == "opensearch"
def test_scan_iterates_through_all_docs(data_client):
s = Search(index="flat-git")
commits = list(s.scan())
assert 52 == len(commits)
assert {d["_id"] for d in FLAT_DATA} == {c.meta.id for c in commits}
def test_response_is_cached(data_client):
s = Repository.search()
repos = list(s)
assert hasattr(s, "_response")
assert s._response.hits == repos
def test_multi_search(data_client):
s1 = Repository.search()
s2 = Search(index="flat-git")
ms = MultiSearch()
ms = ms.add(s1).add(s2)
r1, r2 = ms.execute()
assert 1 == len(r1)
assert isinstance(r1[0], Repository)
assert r1._search is s1
assert 52 == r2.hits.total.value
assert r2._search is s2
def test_multi_missing(data_client):
s1 = Repository.search()
s2 = Search(index="flat-git")
s3 = Search(index="does_not_exist")
ms = MultiSearch()
ms = ms.add(s1).add(s2).add(s3)
with raises(TransportError):
ms.execute()
r1, r2, r3 = ms.execute(raise_on_error=False)
assert 1 == len(r1)
assert isinstance(r1[0], Repository)
assert r1._search is s1
assert 52 == r2.hits.total.value
assert r2._search is s2
assert r3 is None
def test_raw_subfield_can_be_used_in_aggs(data_client):
s = Search(index="git")[0:0]
s.aggs.bucket("authors", "terms", field="author.name.raw", size=1)
r = s.execute()
authors = r.aggregations.authors
assert 1 == len(authors)
assert {"key": "Honza Král", "doc_count": 52} == authors[0]
@@ -0,0 +1,82 @@
# SPDX-License-Identifier: Apache-2.0
#
# The OpenSearch Contributors require contributions made to
# this file be licensed under the Apache-2.0 license or a
# compatible open source license.
#
# Modifications Copyright OpenSearch Contributors. See
# GitHub history for details.
#
# Licensed to Elasticsearch B.V. under one or more contributor
# license agreements. See the NOTICE file distributed with
# this work for additional information regarding copyright
# ownership. Elasticsearch B.V. licenses this file to you under
# the Apache License, Version 2.0 (the "License"); you may
# not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing,
# software distributed under the License is distributed on an
# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
# KIND, either express or implied. See the License for the
# specific language governing permissions and limitations
# under the License.
from opensearchpy.helpers.search import Q
from opensearchpy.helpers.update_by_query import UpdateByQuery
def test_update_by_query_no_script(write_client, setup_ubq_tests):
index = setup_ubq_tests
ubq = (
UpdateByQuery(using=write_client)
.index(index)
.filter(~Q("exists", field="is_public"))
)
response = ubq.execute()
assert response.total == 52
assert response["took"] > 0
assert not response.timed_out
assert response.updated == 52
assert response.deleted == 0
assert response.took > 0
assert response.success()
def test_update_by_query_with_script(write_client, setup_ubq_tests):
index = setup_ubq_tests
ubq = (
UpdateByQuery(using=write_client)
.index(index)
.filter(~Q("exists", field="parent_shas"))
.script(source="ctx._source.is_public = false")
)
ubq = ubq.params(conflicts="proceed")
response = ubq.execute()
assert response.total == 2
assert response.updated == 2
assert response.version_conflicts == 0
def test_delete_by_query_with_script(write_client, setup_ubq_tests):
index = setup_ubq_tests
ubq = (
UpdateByQuery(using=write_client)
.index(index)
.filter(Q("match", parent_shas="1dd19210b5be92b960f7db6f66ae526288edccc3"))
.script(source='ctx.op = "delete"')
)
ubq = ubq.params(conflicts="proceed")
response = ubq.execute()
assert response.total == 1
assert response.deleted == 1
assert response.success()
+3 -2
View File
@@ -85,7 +85,7 @@ def test_dist(dist):
run(venv_python, "-m", "pip", "install", dist)
# Test the sync namespaces
run(venv_python, "-c", f"from {dist_name} import OpenSearch")
run(venv_python, "-c", f"from {dist_name} import OpenSearch, Q")
run(
venv_python,
"-c",
@@ -173,7 +173,7 @@ def test_dist(dist):
run(
venv_python,
"-c",
f"from {dist_name} import OpenSearch",
f"from {dist_name} import OpenSearch,Q",
expect_exit_code=256,
)
@@ -181,6 +181,7 @@ def test_dist(dist):
def main():
run("git", "checkout", "--", "setup.py", "opensearchpy/")
run("rm", "-rf", "build/", "dist/*", "*.egg-info", ".eggs")
run("python", "setup.py", "sdist", "bdist_wheel")
# Grab the major version to be used as a suffix.
version_path = os.path.join(base_dir, "opensearchpy/_version.py")