6e3f1a1194
* Upgrade syntax with pyupgrade --py38-plus Signed-off-by: Hugo van Kemenade <1324225+hugovk@users.noreply.github.com> * Convert to f-strings with flynt Signed-off-by: Hugo van Kemenade <1324225+hugovk@users.noreply.github.com> * Format with Black Signed-off-by: Hugo van Kemenade <1324225+hugovk@users.noreply.github.com> * Remove redundant mock backport dependency Signed-off-by: Hugo van Kemenade <1324225+hugovk@users.noreply.github.com> * isort imports Signed-off-by: Hugo van Kemenade <1324225+hugovk@users.noreply.github.com> * Add changelog entry Signed-off-by: Hugo van Kemenade <1324225+hugovk@users.noreply.github.com> --------- Signed-off-by: Hugo van Kemenade <1324225+hugovk@users.noreply.github.com>
524 lines
19 KiB
Python
524 lines
19 KiB
Python
# SPDX-License-Identifier: Apache-2.0
|
|
#
|
|
# The OpenSearch Contributors require contributions made to
|
|
# this file be licensed under the Apache-2.0 license or a
|
|
# compatible open source license.
|
|
#
|
|
# Modifications Copyright OpenSearch Contributors. See
|
|
# GitHub history for details.
|
|
#
|
|
# Licensed to Elasticsearch B.V. under one or more contributor
|
|
# license agreements. See the NOTICE file distributed with
|
|
# this work for additional information regarding copyright
|
|
# ownership. Elasticsearch B.V. licenses this file to you under
|
|
# the Apache License, Version 2.0 (the "License"); you may
|
|
# not use this file except in compliance with the License.
|
|
# You may obtain a copy of the License at
|
|
#
|
|
# http://www.apache.org/licenses/LICENSE-2.0
|
|
#
|
|
# Unless required by applicable law or agreed to in writing,
|
|
# software distributed under the License is distributed on an
|
|
# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
|
|
# KIND, either express or implied. See the License for the
|
|
# specific language governing permissions and limitations
|
|
# under the License.
|
|
|
|
import collections.abc as collections_abc
|
|
from fnmatch import fnmatch
|
|
from typing import Any, Tuple, Type
|
|
|
|
from opensearchpy.connection.connections import get_connection
|
|
from opensearchpy.exceptions import NotFoundError, RequestError
|
|
|
|
from ..exceptions import IllegalOperation, ValidationException
|
|
from .field import Field
|
|
from .index import Index
|
|
from .mapping import Mapping
|
|
from .search import Search
|
|
from .utils import DOC_META_FIELDS, META_FIELDS, ObjectBase, merge
|
|
|
|
|
|
class MetaField:
|
|
def __init__(self, *args: Any, **kwargs: Any) -> None:
|
|
self.args, self.kwargs = args, kwargs
|
|
|
|
|
|
class DocumentMeta(type):
|
|
def __new__(
|
|
cls: Any,
|
|
name: str,
|
|
bases: Tuple[Type[ObjectBase]],
|
|
attrs: Any,
|
|
) -> Any:
|
|
# DocumentMeta filters attrs in place
|
|
attrs["_doc_type"] = DocumentOptions(name, bases, attrs)
|
|
return super().__new__(cls, name, bases, attrs)
|
|
|
|
|
|
class IndexMeta(DocumentMeta):
|
|
# global flag to guard us from associating an Index with the base Document
|
|
# class, only user defined subclasses should have an _index attr
|
|
_document_initialized = False
|
|
|
|
def __new__(
|
|
cls: Any,
|
|
name: str,
|
|
bases: Tuple[Type[ObjectBase]],
|
|
attrs: Any,
|
|
) -> Any:
|
|
new_cls = super().__new__(cls, name, bases, attrs)
|
|
if cls._document_initialized:
|
|
index_opts = attrs.pop("Index", None)
|
|
index = cls.construct_index(index_opts, bases)
|
|
new_cls._index = index
|
|
index.document(new_cls)
|
|
cls._document_initialized = True
|
|
return new_cls
|
|
|
|
@classmethod
|
|
def construct_index(cls, opts: Any, bases: Any) -> Any:
|
|
if opts is None:
|
|
for b in bases:
|
|
if hasattr(b, "_index"):
|
|
return b._index
|
|
|
|
# Set None as Index name so it will set _all while making the query
|
|
return Index(name=None)
|
|
|
|
i = Index(getattr(opts, "name", "*"), using=getattr(opts, "using", "default"))
|
|
i.settings(**getattr(opts, "settings", {}))
|
|
i.aliases(**getattr(opts, "aliases", {}))
|
|
for a in getattr(opts, "analyzers", ()):
|
|
i.analyzer(a)
|
|
return i
|
|
|
|
|
|
class DocumentOptions:
|
|
def __init__(
|
|
self,
|
|
name: str,
|
|
bases: Tuple[Type[ObjectBase]],
|
|
attrs: Any,
|
|
) -> None:
|
|
meta = attrs.pop("Meta", None)
|
|
|
|
# create the mapping instance
|
|
self.mapping = getattr(meta, "mapping", Mapping())
|
|
|
|
# register all declared fields into the mapping
|
|
for name, value in list(attrs.items()):
|
|
if isinstance(value, Field):
|
|
self.mapping.field(name, value)
|
|
del attrs[name]
|
|
|
|
# add all the mappings for meta fields
|
|
for name in dir(meta):
|
|
if isinstance(getattr(meta, name, None), MetaField):
|
|
params = getattr(meta, name)
|
|
self.mapping.meta(name, *params.args, **params.kwargs)
|
|
|
|
# document inheritance - include the fields from parents' mappings
|
|
for b in bases:
|
|
if hasattr(b, "_doc_type") and hasattr(b._doc_type, "mapping"):
|
|
self.mapping.update(b._doc_type.mapping, update_only=True)
|
|
|
|
@property
|
|
def name(self) -> Any:
|
|
return self.mapping.properties.name
|
|
|
|
|
|
class InnerDoc(ObjectBase, metaclass=DocumentMeta):
|
|
"""
|
|
Common class for inner documents like Object or Nested
|
|
"""
|
|
|
|
@classmethod
|
|
def from_opensearch(cls, data: Any, data_only: bool = False) -> Any:
|
|
if data_only:
|
|
data = {"_source": data}
|
|
return super().from_opensearch(data)
|
|
|
|
|
|
class Document(ObjectBase, metaclass=IndexMeta):
|
|
"""
|
|
Model-like class for persisting documents in opensearch.
|
|
"""
|
|
|
|
@classmethod
|
|
def _matches(cls: Any, hit: Any) -> Any:
|
|
if cls._index._name is None:
|
|
return True
|
|
return fnmatch(hit.get("_index", ""), cls._index._name)
|
|
|
|
@classmethod
|
|
def _get_using(cls: Any, using: Any = None) -> Any:
|
|
return using or cls._index._using
|
|
|
|
@classmethod
|
|
def _get_connection(cls, using: Any = None) -> Any:
|
|
return get_connection(cls._get_using(using))
|
|
|
|
@classmethod
|
|
def _default_index(cls: Any, index: Any = None) -> Any:
|
|
return index or cls._index._name
|
|
|
|
@classmethod
|
|
def init(cls: Any, index: Any = None, using: Any = None) -> None:
|
|
"""
|
|
Create the index and populate the mappings in opensearch.
|
|
"""
|
|
i = cls._index
|
|
if index:
|
|
i = i.clone(name=index)
|
|
i.save(using=using)
|
|
|
|
def _get_index(self, index: Any = None, required: bool = True) -> Any:
|
|
if index is None:
|
|
index = getattr(self.meta, "index", None)
|
|
if index is None:
|
|
index = getattr(self._index, "_name", None)
|
|
if index is None and required:
|
|
raise ValidationException("No index")
|
|
if index and "*" in index:
|
|
raise ValidationException("You cannot write to a wildcard index.")
|
|
return index
|
|
|
|
def __repr__(self) -> str:
|
|
return "{}({})".format(
|
|
self.__class__.__name__,
|
|
", ".join(
|
|
f"{key}={getattr(self.meta, key)!r}"
|
|
for key in ("index", "id")
|
|
if key in self.meta
|
|
),
|
|
)
|
|
|
|
@classmethod
|
|
def search(cls, using: Any = None, index: Any = None) -> Any:
|
|
"""
|
|
Create an :class:`~opensearchpy.Search` instance that will search
|
|
over this ``Document``.
|
|
"""
|
|
return Search(
|
|
using=cls._get_using(using), index=cls._default_index(index), doc_type=[cls]
|
|
)
|
|
|
|
@classmethod
|
|
def get(cls: Any, id: Any, using: Any = None, index: Any = None, **kwargs: Any) -> Any: # type: ignore
|
|
"""
|
|
Retrieve a single document from opensearch using its ``id``.
|
|
|
|
:arg id: ``id`` of the document to be retrieved
|
|
:arg index: opensearch index to use, if the ``Document`` is
|
|
associated with an index this can be omitted.
|
|
:arg using: connection alias to use, defaults to ``'default'``
|
|
|
|
Any additional keyword arguments will be passed to
|
|
``OpenSearch.get`` unchanged.
|
|
"""
|
|
opensearch = cls._get_connection(using)
|
|
doc = opensearch.get(index=cls._default_index(index), id=id, **kwargs)
|
|
if not doc.get("found", False):
|
|
return None
|
|
return cls.from_opensearch(doc)
|
|
|
|
@classmethod
|
|
def exists(
|
|
cls, id: Any, using: Any = None, index: Any = None, **kwargs: Any
|
|
) -> Any:
|
|
"""
|
|
check if exists a single document from opensearch using its ``id``.
|
|
|
|
:arg id: ``id`` of the document to check if exists
|
|
:arg index: opensearch index to use, if the ``Document`` is
|
|
associated with an index this can be omitted.
|
|
:arg using: connection alias to use, defaults to ``'default'``
|
|
|
|
Any additional keyword arguments will be passed to
|
|
``OpenSearch.exists`` unchanged.
|
|
"""
|
|
opensearch = cls._get_connection(using)
|
|
return opensearch.exists(index=cls._default_index(index), id=id, **kwargs)
|
|
|
|
@classmethod
|
|
def mget(
|
|
cls,
|
|
docs: Any,
|
|
using: Any = None,
|
|
index: Any = None,
|
|
raise_on_error: bool = True,
|
|
missing: str = "none",
|
|
**kwargs: Any,
|
|
) -> Any:
|
|
"""
|
|
Retrieve multiple document by their ``id``'s. Returns a list of instances
|
|
in the same order as requested.
|
|
|
|
:arg docs: list of ``id``'s of the documents to be retrieved or a list
|
|
of document specifications as per
|
|
https://opensearch.org/docs/latest/opensearch/rest-api/document-apis/multi-get/
|
|
:arg index: opensearch index to use, if the ``Document`` is
|
|
associated with an index this can be omitted.
|
|
:arg using: connection alias to use, defaults to ``'default'``
|
|
:arg missing: what to do when one of the documents requested is not
|
|
found. Valid options are ``'none'`` (use ``None``), ``'raise'`` (raise
|
|
``NotFoundError``) or ``'skip'`` (ignore the missing document).
|
|
|
|
Any additional keyword arguments will be passed to
|
|
``OpenSearch.mget`` unchanged.
|
|
"""
|
|
if missing not in ("raise", "skip", "none"):
|
|
raise ValueError("'missing' must be 'raise', 'skip', or 'none'.")
|
|
opensearch = cls._get_connection(using)
|
|
body = {
|
|
"docs": [
|
|
doc if isinstance(doc, collections_abc.Mapping) else {"_id": doc}
|
|
for doc in docs
|
|
]
|
|
}
|
|
results = opensearch.mget(body, index=cls._default_index(index), **kwargs)
|
|
|
|
objs: Any = []
|
|
error_docs: Any = []
|
|
missing_docs: Any = []
|
|
for doc in results["docs"]:
|
|
if doc.get("found"):
|
|
if error_docs or missing_docs:
|
|
# We're going to raise an exception anyway, so avoid an
|
|
# expensive call to cls.from_opensearch().
|
|
continue
|
|
|
|
objs.append(cls.from_opensearch(doc))
|
|
|
|
elif doc.get("error"):
|
|
if raise_on_error:
|
|
error_docs.append(doc)
|
|
if missing == "none":
|
|
objs.append(None)
|
|
|
|
# The doc didn't cause an error, but the doc also wasn't found.
|
|
elif missing == "raise":
|
|
missing_docs.append(doc)
|
|
elif missing == "none":
|
|
objs.append(None)
|
|
|
|
if error_docs:
|
|
error_ids = [doc["_id"] for doc in error_docs]
|
|
message = "Required routing not provided for documents %s."
|
|
message %= ", ".join(error_ids)
|
|
raise RequestError(400, message, error_docs)
|
|
if missing_docs:
|
|
missing_ids = [doc["_id"] for doc in missing_docs]
|
|
message = f"Documents {', '.join(missing_ids)} not found."
|
|
raise NotFoundError(404, message, {"docs": missing_docs})
|
|
return objs
|
|
|
|
def delete(self, using: Any = None, index: Any = None, **kwargs: Any) -> Any:
|
|
"""
|
|
Delete the instance in opensearch.
|
|
|
|
:arg index: opensearch index to use, if the ``Document`` is
|
|
associated with an index this can be omitted.
|
|
:arg using: connection alias to use, defaults to ``'default'``
|
|
|
|
Any additional keyword arguments will be passed to
|
|
``OpenSearch.delete`` unchanged.
|
|
"""
|
|
opensearch = self._get_connection(using)
|
|
# extract routing etc from meta
|
|
doc_meta = {k: self.meta[k] for k in DOC_META_FIELDS if k in self.meta}
|
|
|
|
# Optimistic concurrency control
|
|
if "seq_no" in self.meta and "primary_term" in self.meta:
|
|
doc_meta["if_seq_no"] = self.meta["seq_no"]
|
|
doc_meta["if_primary_term"] = self.meta["primary_term"]
|
|
|
|
doc_meta.update(kwargs)
|
|
opensearch.delete(index=self._get_index(index), **doc_meta)
|
|
|
|
def to_dict(self, include_meta: bool = False, skip_empty: bool = True) -> Any: # type: ignore
|
|
"""
|
|
Serialize the instance into a dictionary so that it can be saved in opensearch.
|
|
|
|
:arg include_meta: if set to ``True`` will include all the metadata
|
|
(``_index``, ``_id`` etc). Otherwise just the document's
|
|
data is serialized. This is useful when passing multiple instances into
|
|
``opensearchpy.helpers.bulk``.
|
|
:arg skip_empty: if set to ``False`` will cause empty values (``None``,
|
|
``[]``, ``{}``) to be left on the document. Those values will be
|
|
stripped out otherwise as they make no difference in opensearch.
|
|
"""
|
|
d = super().to_dict(skip_empty=skip_empty)
|
|
if not include_meta:
|
|
return d
|
|
|
|
meta = {"_" + k: self.meta[k] for k in DOC_META_FIELDS if k in self.meta}
|
|
|
|
# in case of to_dict include the index unlike save/update/delete
|
|
index = self._get_index(required=False)
|
|
if index is not None:
|
|
meta["_index"] = index
|
|
|
|
meta["_source"] = d
|
|
return meta
|
|
|
|
def update(
|
|
self,
|
|
using: Any = None,
|
|
index: Any = None,
|
|
detect_noop: bool = True,
|
|
doc_as_upsert: bool = False,
|
|
refresh: bool = False,
|
|
retry_on_conflict: Any = None,
|
|
script: Any = None,
|
|
script_id: Any = None,
|
|
scripted_upsert: bool = False,
|
|
upsert: Any = None,
|
|
return_doc_meta: bool = False,
|
|
**fields: Any,
|
|
) -> Any:
|
|
"""
|
|
Partial update of the document, specify fields you wish to update and
|
|
both the instance and the document in opensearch will be updated::
|
|
|
|
doc = MyDocument(title='Document Title!')
|
|
doc.save()
|
|
doc.update(title='New Document Title!')
|
|
|
|
:arg index: opensearch index to use, if the ``Document`` is
|
|
associated with an index this can be omitted.
|
|
:arg using: connection alias to use, defaults to ``'default'``
|
|
:arg detect_noop: Set to ``False`` to disable noop detection.
|
|
:arg refresh: Control when the changes made by this request are visible
|
|
to search. Set to ``True`` for immediate effect.
|
|
:arg retry_on_conflict: In between the get and indexing phases of the
|
|
update, it is possible that another process might have already
|
|
updated the same document. By default, the update will fail with a
|
|
version conflict exception. The retry_on_conflict parameter
|
|
controls how many times to retry the update before finally throwing
|
|
an exception.
|
|
:arg doc_as_upsert: Instead of sending a partial doc plus an upsert
|
|
doc, setting doc_as_upsert to true will use the contents of doc as
|
|
the upsert value
|
|
:arg return_doc_meta: set to ``True`` to return all metadata from the
|
|
index API call instead of only the operation result
|
|
|
|
:return operation result noop/updated
|
|
"""
|
|
body: Any = {
|
|
"doc_as_upsert": doc_as_upsert,
|
|
"detect_noop": detect_noop,
|
|
}
|
|
|
|
# scripted update
|
|
if script or script_id:
|
|
if upsert is not None:
|
|
body["upsert"] = upsert
|
|
|
|
if script:
|
|
script = {"source": script}
|
|
else:
|
|
script = {"id": script_id}
|
|
|
|
script["params"] = fields
|
|
|
|
body["script"] = script
|
|
body["scripted_upsert"] = scripted_upsert
|
|
|
|
# partial document update
|
|
else:
|
|
if not fields:
|
|
raise IllegalOperation(
|
|
"You cannot call update() without updating individual fields or a script. "
|
|
"If you wish to update the entire object use save()."
|
|
)
|
|
|
|
# update given fields locally
|
|
merge(self, fields)
|
|
|
|
# prepare data for OpenSearch
|
|
values = self.to_dict()
|
|
|
|
# if fields were given: partial update
|
|
body["doc"] = {k: values.get(k) for k in fields.keys()}
|
|
|
|
# extract routing etc from meta
|
|
doc_meta = {k: self.meta[k] for k in DOC_META_FIELDS if k in self.meta}
|
|
|
|
if retry_on_conflict is not None:
|
|
doc_meta["retry_on_conflict"] = retry_on_conflict
|
|
|
|
# Optimistic concurrency control
|
|
if (
|
|
retry_on_conflict in (None, 0)
|
|
and "seq_no" in self.meta
|
|
and "primary_term" in self.meta
|
|
):
|
|
doc_meta["if_seq_no"] = self.meta["seq_no"]
|
|
doc_meta["if_primary_term"] = self.meta["primary_term"]
|
|
|
|
meta = self._get_connection(using).update(
|
|
index=self._get_index(index), body=body, refresh=refresh, **doc_meta
|
|
)
|
|
# update meta information from OpenSearch
|
|
for k in META_FIELDS:
|
|
if "_" + k in meta:
|
|
setattr(self.meta, k, meta["_" + k])
|
|
|
|
return meta if return_doc_meta else meta["result"]
|
|
|
|
def save(
|
|
self,
|
|
using: Any = None,
|
|
index: Any = None,
|
|
validate: bool = True,
|
|
skip_empty: bool = True,
|
|
return_doc_meta: bool = False,
|
|
**kwargs: Any,
|
|
) -> Any:
|
|
"""
|
|
Save the document into opensearch. If the document doesn't exist it
|
|
is created, it is overwritten otherwise. Returns ``True`` if this
|
|
operations resulted in new document being created.
|
|
|
|
:arg index: opensearch index to use, if the ``Document`` is
|
|
associated with an index this can be omitted.
|
|
:arg using: connection alias to use, defaults to ``'default'``
|
|
:arg validate: set to ``False`` to skip validating the document
|
|
:arg skip_empty: if set to ``False`` will cause empty values (``None``,
|
|
``[]``, ``{}``) to be left on the document. Those values will be
|
|
stripped out otherwise as they make no difference in opensearch.
|
|
:arg return_doc_meta: set to ``True`` to return all metadata from the
|
|
update API call instead of only the operation result
|
|
|
|
Any additional keyword arguments will be passed to
|
|
``OpenSearch.index`` unchanged.
|
|
|
|
:return operation result created/updated
|
|
"""
|
|
if validate:
|
|
self.full_clean()
|
|
|
|
opensearch = self._get_connection(using)
|
|
# extract routing etc from meta
|
|
doc_meta = {k: self.meta[k] for k in DOC_META_FIELDS if k in self.meta}
|
|
|
|
# Optimistic concurrency control
|
|
if "seq_no" in self.meta and "primary_term" in self.meta:
|
|
doc_meta["if_seq_no"] = self.meta["seq_no"]
|
|
doc_meta["if_primary_term"] = self.meta["primary_term"]
|
|
|
|
doc_meta.update(kwargs)
|
|
meta = opensearch.index(
|
|
index=self._get_index(index),
|
|
body=self.to_dict(skip_empty=skip_empty),
|
|
**doc_meta,
|
|
)
|
|
# update meta information from OpenSearch
|
|
for k in META_FIELDS:
|
|
if "_" + k in meta:
|
|
setattr(self.meta, k, meta["_" + k])
|
|
|
|
return meta if return_doc_meta else meta["result"]
|