2021-08-06 12:59:39 +05:30
|
|
|
# SPDX-License-Identifier: Apache-2.0
|
|
|
|
|
#
|
|
|
|
|
# The OpenSearch Contributors require contributions made to
|
|
|
|
|
# this file be licensed under the Apache-2.0 license or a
|
|
|
|
|
# compatible open source license.
|
|
|
|
|
#
|
|
|
|
|
# Modifications Copyright OpenSearch Contributors. See
|
|
|
|
|
# GitHub history for details.
|
|
|
|
|
#
|
2020-07-02 13:15:25 -05:00
|
|
|
# Licensed to Elasticsearch B.V. under one or more contributor
|
|
|
|
|
# license agreements. See the NOTICE file distributed with
|
|
|
|
|
# this work for additional information regarding copyright
|
|
|
|
|
# ownership. Elasticsearch B.V. licenses this file to you under
|
|
|
|
|
# the Apache License, Version 2.0 (the "License"); you may
|
|
|
|
|
# not use this file except in compliance with the License.
|
|
|
|
|
# You may obtain a copy of the License at
|
|
|
|
|
#
|
|
|
|
|
# http://www.apache.org/licenses/LICENSE-2.0
|
|
|
|
|
#
|
|
|
|
|
# Unless required by applicable law or agreed to in writing,
|
|
|
|
|
# software distributed under the License is distributed on an
|
|
|
|
|
# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
|
|
|
|
|
# KIND, either express or implied. See the License for the
|
|
|
|
|
# specific language governing permissions and limitations
|
|
|
|
|
# under the License.
|
2020-04-23 11:22:08 -05:00
|
|
|
|
2022-10-04 00:15:18 +05:30
|
|
|
|
2015-10-11 04:47:02 +02:00
|
|
|
import threading
|
2021-01-13 14:21:04 -06:00
|
|
|
import time
|
2023-11-09 10:51:20 -05:00
|
|
|
from typing import Any
|
2024-07-20 23:19:20 +03:00
|
|
|
from unittest import mock
|
2023-12-04 06:26:25 -08:00
|
|
|
from unittest.mock import Mock
|
2021-01-13 14:21:04 -06:00
|
|
|
|
2020-05-14 16:09:24 -05:00
|
|
|
import pytest
|
2021-01-13 14:21:04 -06:00
|
|
|
|
2021-09-16 14:59:29 +05:30
|
|
|
from opensearchpy import OpenSearch, helpers
|
|
|
|
|
from opensearchpy.serializer import JSONSerializer
|
2015-10-11 04:47:02 +02:00
|
|
|
|
2023-02-14 15:03:56 -08:00
|
|
|
from ..test_cases import TestCase
|
2015-10-11 04:47:02 +02:00
|
|
|
|
2018-11-26 10:10:12 -07:00
|
|
|
lock_side_effect = threading.Lock()
|
|
|
|
|
|
2019-03-29 09:25:23 -06:00
|
|
|
|
2023-11-09 10:51:20 -05:00
|
|
|
def mock_process_bulk_chunk(*args: Any, **kwargs: Any) -> Any:
|
2018-11-26 10:10:12 -07:00
|
|
|
"""
|
|
|
|
|
Threadsafe way of mocking process bulk chunk:
|
|
|
|
|
https://stackoverflow.com/questions/39332139/thread-safe-version-of-mock-call-count
|
|
|
|
|
"""
|
|
|
|
|
|
|
|
|
|
with lock_side_effect:
|
2023-11-09 10:51:20 -05:00
|
|
|
mock_process_bulk_chunk.call_count += 1 # type: ignore
|
2018-11-26 10:10:12 -07:00
|
|
|
time.sleep(0.1)
|
|
|
|
|
return []
|
|
|
|
|
|
|
|
|
|
|
2023-11-09 10:51:20 -05:00
|
|
|
mock_process_bulk_chunk.call_count = 0 # type: ignore
|
2018-11-26 10:10:12 -07:00
|
|
|
|
|
|
|
|
|
2015-10-11 04:47:02 +02:00
|
|
|
class TestParallelBulk(TestCase):
|
2019-03-29 09:25:23 -06:00
|
|
|
@mock.patch(
|
2021-09-16 14:59:29 +05:30
|
|
|
"opensearchpy.helpers.actions._process_bulk_chunk",
|
2019-03-29 09:25:23 -06:00
|
|
|
side_effect=mock_process_bulk_chunk,
|
|
|
|
|
)
|
2023-11-09 10:51:20 -05:00
|
|
|
def test_all_chunks_sent(self, _process_bulk_chunk: Any) -> None:
|
2019-03-29 09:25:23 -06:00
|
|
|
actions = ({"x": i} for i in range(100))
|
2021-08-13 15:51:50 +05:30
|
|
|
list(helpers.parallel_bulk(OpenSearch(), actions, chunk_size=2))
|
2015-10-11 04:47:02 +02:00
|
|
|
|
2023-11-09 10:51:20 -05:00
|
|
|
self.assertEqual(50, mock_process_bulk_chunk.call_count) # type: ignore
|
2015-10-11 04:47:02 +02:00
|
|
|
|
2023-11-17 13:18:42 -05:00
|
|
|
@mock.patch("opensearchpy.OpenSearch.bulk")
|
|
|
|
|
def test_with_all_options(self, _bulk: Any) -> None:
|
|
|
|
|
actions = ({"x": i} for i in range(100))
|
|
|
|
|
list(
|
|
|
|
|
helpers.parallel_bulk(
|
|
|
|
|
OpenSearch(),
|
|
|
|
|
actions=actions,
|
|
|
|
|
chunk_size=2,
|
|
|
|
|
raise_on_error=False,
|
|
|
|
|
raise_on_exception=False,
|
|
|
|
|
max_chunk_bytes=20 * 1024 * 1024,
|
|
|
|
|
request_timeout=160,
|
|
|
|
|
ignore_status=(123),
|
|
|
|
|
)
|
|
|
|
|
)
|
|
|
|
|
|
|
|
|
|
self.assertEqual(50, _bulk.call_count)
|
|
|
|
|
_bulk.assert_called_with(
|
|
|
|
|
'{"index":{}}\n{"x":98}\n{"index":{}}\n{"x":99}\n', request_timeout=160
|
|
|
|
|
)
|
|
|
|
|
|
|
|
|
|
@mock.patch("opensearchpy.helpers.actions._process_bulk_chunk")
|
|
|
|
|
def test_process_bulk_chunk_with_all_options(
|
|
|
|
|
self, _process_bulk_chunk: Any
|
|
|
|
|
) -> None:
|
|
|
|
|
actions = ({"x": i} for i in range(100))
|
|
|
|
|
client = OpenSearch()
|
|
|
|
|
list(
|
|
|
|
|
helpers.parallel_bulk(
|
|
|
|
|
client,
|
|
|
|
|
actions=actions,
|
|
|
|
|
chunk_size=2,
|
|
|
|
|
raise_on_error=True,
|
|
|
|
|
raise_on_exception=True,
|
|
|
|
|
max_chunk_bytes=20 * 1024 * 1024,
|
|
|
|
|
request_timeout=160,
|
|
|
|
|
ignore_status=(123),
|
|
|
|
|
)
|
|
|
|
|
)
|
|
|
|
|
|
|
|
|
|
self.assertEqual(50, _process_bulk_chunk.call_count)
|
|
|
|
|
_process_bulk_chunk.assert_called_with(
|
|
|
|
|
client,
|
|
|
|
|
['{"index":{}}', '{"x":98}', '{"index":{}}', '{"x":99}'],
|
|
|
|
|
[({"index": {}}, {"x": 98}), ({"index": {}}, {"x": 99})],
|
|
|
|
|
True,
|
|
|
|
|
True,
|
|
|
|
|
123,
|
|
|
|
|
request_timeout=160,
|
|
|
|
|
)
|
|
|
|
|
|
2023-11-09 10:51:20 -05:00
|
|
|
@pytest.mark.skip # type: ignore
|
2015-10-11 04:47:02 +02:00
|
|
|
@mock.patch(
|
2021-09-16 14:59:29 +05:30
|
|
|
"opensearchpy.helpers.actions._process_bulk_chunk",
|
2015-10-11 04:47:02 +02:00
|
|
|
# make sure we spend some time in the thread
|
2023-11-17 13:18:42 -05:00
|
|
|
side_effect=lambda *args, **kwargs: [
|
2023-11-09 10:51:20 -05:00
|
|
|
(True, time.sleep(0.001) or threading.current_thread().ident) # type: ignore
|
2019-03-29 09:25:23 -06:00
|
|
|
],
|
2015-10-11 04:47:02 +02:00
|
|
|
)
|
2023-11-09 10:51:20 -05:00
|
|
|
def test_chunk_sent_from_different_threads(self, _process_bulk_chunk: Any) -> None:
|
2019-03-29 09:25:23 -06:00
|
|
|
actions = ({"x": i} for i in range(100))
|
|
|
|
|
results = list(
|
2021-08-13 15:51:50 +05:30
|
|
|
helpers.parallel_bulk(OpenSearch(), actions, thread_count=10, chunk_size=2)
|
2019-03-29 09:25:23 -06:00
|
|
|
)
|
2024-07-20 23:19:20 +03:00
|
|
|
self.assertTrue(len({r[1] for r in results}) > 1)
|
2015-10-11 04:47:02 +02:00
|
|
|
|
2019-03-29 09:25:23 -06:00
|
|
|
|
2015-10-11 04:50:31 +02:00
|
|
|
class TestChunkActions(TestCase):
|
2023-11-09 10:51:20 -05:00
|
|
|
def setup_method(self, _: Any) -> None:
|
2024-01-19 13:36:05 -05:00
|
|
|
"""
|
|
|
|
|
creates some documents for testing
|
|
|
|
|
"""
|
|
|
|
|
self.actions: Any = [
|
|
|
|
|
({"index": {}}, {"some": "datá", "i": i}) for i in range(100)
|
|
|
|
|
]
|
2015-10-11 04:50:31 +02:00
|
|
|
|
2023-11-06 13:08:19 -05:00
|
|
|
def test_expand_action(self) -> None:
|
2020-09-28 09:17:16 -05:00
|
|
|
self.assertEqual(helpers.expand_action({}), ({"index": {}}, {}))
|
|
|
|
|
self.assertEqual(
|
|
|
|
|
helpers.expand_action({"key": "val"}), ({"index": {}}, {"key": "val"})
|
|
|
|
|
)
|
|
|
|
|
|
2023-11-09 10:51:20 -05:00
|
|
|
def test_expand_action_actions(self) -> None:
|
2020-09-28 09:17:16 -05:00
|
|
|
self.assertEqual(
|
|
|
|
|
helpers.expand_action(
|
|
|
|
|
{"_op_type": "delete", "_id": "id", "_index": "index"}
|
|
|
|
|
),
|
|
|
|
|
({"delete": {"_id": "id", "_index": "index"}}, None),
|
|
|
|
|
)
|
|
|
|
|
self.assertEqual(
|
|
|
|
|
helpers.expand_action(
|
|
|
|
|
{"_op_type": "update", "_id": "id", "_index": "index", "key": "val"}
|
|
|
|
|
),
|
|
|
|
|
({"update": {"_id": "id", "_index": "index"}}, {"key": "val"}),
|
|
|
|
|
)
|
|
|
|
|
self.assertEqual(
|
|
|
|
|
helpers.expand_action(
|
|
|
|
|
{"_op_type": "create", "_id": "id", "_index": "index", "key": "val"}
|
|
|
|
|
),
|
|
|
|
|
({"create": {"_id": "id", "_index": "index"}}, {"key": "val"}),
|
|
|
|
|
)
|
|
|
|
|
self.assertEqual(
|
|
|
|
|
helpers.expand_action(
|
|
|
|
|
{
|
|
|
|
|
"_op_type": "create",
|
|
|
|
|
"_id": "id",
|
|
|
|
|
"_index": "index",
|
|
|
|
|
"_source": {"key": "val"},
|
|
|
|
|
}
|
|
|
|
|
),
|
|
|
|
|
({"create": {"_id": "id", "_index": "index"}}, {"key": "val"}),
|
|
|
|
|
)
|
|
|
|
|
|
2023-11-06 13:08:19 -05:00
|
|
|
def test_expand_action_options(self) -> None:
|
2020-09-28 09:17:16 -05:00
|
|
|
for option in (
|
|
|
|
|
"_id",
|
|
|
|
|
"_index",
|
|
|
|
|
"_percolate",
|
|
|
|
|
"_timestamp",
|
|
|
|
|
"if_seq_no",
|
|
|
|
|
"if_primary_term",
|
|
|
|
|
"parent",
|
|
|
|
|
"pipeline",
|
|
|
|
|
"retry_on_conflict",
|
|
|
|
|
"routing",
|
|
|
|
|
"version",
|
|
|
|
|
"version_type",
|
|
|
|
|
("_parent", "parent"),
|
|
|
|
|
("_retry_on_conflict", "retry_on_conflict"),
|
|
|
|
|
("_routing", "routing"),
|
|
|
|
|
("_version", "version"),
|
|
|
|
|
("_version_type", "version_type"),
|
|
|
|
|
("_if_seq_no", "if_seq_no"),
|
|
|
|
|
("_if_primary_term", "if_primary_term"),
|
|
|
|
|
):
|
|
|
|
|
if isinstance(option, str):
|
|
|
|
|
action_option = option
|
|
|
|
|
else:
|
|
|
|
|
option, action_option = option
|
|
|
|
|
self.assertEqual(
|
|
|
|
|
helpers.expand_action({"key": "val", option: 0}),
|
|
|
|
|
({"index": {action_option: 0}}, {"key": "val"}),
|
|
|
|
|
)
|
|
|
|
|
|
2023-11-09 10:51:20 -05:00
|
|
|
def test__source_metadata_or_source(self) -> None:
|
2020-09-28 09:17:16 -05:00
|
|
|
self.assertEqual(
|
|
|
|
|
helpers.expand_action({"_source": {"key": "val"}}),
|
|
|
|
|
({"index": {}}, {"key": "val"}),
|
|
|
|
|
)
|
|
|
|
|
|
|
|
|
|
self.assertEqual(
|
|
|
|
|
helpers.expand_action(
|
|
|
|
|
{"_source": ["key"], "key": "val", "_op_type": "update"}
|
|
|
|
|
),
|
|
|
|
|
({"update": {"_source": ["key"]}}, {"key": "val"}),
|
|
|
|
|
)
|
|
|
|
|
|
|
|
|
|
self.assertEqual(
|
|
|
|
|
helpers.expand_action(
|
|
|
|
|
{"_source": True, "key": "val", "_op_type": "update"}
|
|
|
|
|
),
|
|
|
|
|
({"update": {"_source": True}}, {"key": "val"}),
|
|
|
|
|
)
|
|
|
|
|
|
|
|
|
|
# This case is only to ensure backwards compatibility with old functionality.
|
|
|
|
|
self.assertEqual(
|
|
|
|
|
helpers.expand_action(
|
|
|
|
|
{"_source": {"key2": "val2"}, "key": "val", "_op_type": "update"}
|
|
|
|
|
),
|
|
|
|
|
({"update": {}}, {"key2": "val2"}),
|
|
|
|
|
)
|
|
|
|
|
|
2023-11-06 13:08:19 -05:00
|
|
|
def test_chunks_are_chopped_by_byte_size(self) -> None:
|
2020-05-08 16:07:52 -05:00
|
|
|
self.assertEqual(
|
2019-03-29 09:25:23 -06:00
|
|
|
100,
|
|
|
|
|
len(
|
|
|
|
|
list(helpers._chunk_actions(self.actions, 100000, 1, JSONSerializer()))
|
|
|
|
|
),
|
|
|
|
|
)
|
2015-10-11 04:50:31 +02:00
|
|
|
|
2023-11-06 13:08:19 -05:00
|
|
|
def test_chunks_are_chopped_by_chunk_size(self) -> None:
|
2020-05-08 16:07:52 -05:00
|
|
|
self.assertEqual(
|
2019-03-29 09:25:23 -06:00
|
|
|
10,
|
|
|
|
|
len(
|
|
|
|
|
list(
|
|
|
|
|
helpers._chunk_actions(self.actions, 10, 99999999, JSONSerializer())
|
|
|
|
|
)
|
|
|
|
|
),
|
|
|
|
|
)
|
2015-10-11 05:00:14 +02:00
|
|
|
|
2023-11-06 13:08:19 -05:00
|
|
|
def test_chunks_are_chopped_by_byte_size_properly(self) -> None:
|
2018-11-26 10:58:47 -07:00
|
|
|
max_byte_size = 170
|
2019-03-29 09:25:23 -06:00
|
|
|
chunks = list(
|
|
|
|
|
helpers._chunk_actions(
|
|
|
|
|
self.actions, 100000, max_byte_size, JSONSerializer()
|
|
|
|
|
)
|
|
|
|
|
)
|
2020-05-08 16:07:52 -05:00
|
|
|
self.assertEqual(25, len(chunks))
|
2024-01-25 18:17:09 -05:00
|
|
|
for _, chunk_actions in chunks:
|
2024-07-20 23:19:20 +03:00
|
|
|
chunk = "".join(chunk_actions) # fmt: skip
|
2019-03-29 09:25:23 -06:00
|
|
|
chunk = chunk if isinstance(chunk, str) else chunk.encode("utf-8")
|
2018-11-26 10:58:47 -07:00
|
|
|
self.assertLessEqual(len(chunk), max_byte_size)
|
|
|
|
|
|
2019-03-29 09:25:23 -06:00
|
|
|
|
2015-10-11 05:00:14 +02:00
|
|
|
class TestExpandActions(TestCase):
|
2023-11-06 13:08:19 -05:00
|
|
|
def test_string_actions_are_marked_as_simple_inserts(self) -> None:
|
2020-05-08 16:07:52 -05:00
|
|
|
self.assertEqual(
|
2019-03-29 09:25:23 -06:00
|
|
|
('{"index":{}}', "whatever"), helpers.expand_action("whatever")
|
|
|
|
|
)
|
2023-12-04 06:26:25 -08:00
|
|
|
|
|
|
|
|
|
|
|
|
|
class TestScanFunction(TestCase):
|
|
|
|
|
@mock.patch("opensearchpy.OpenSearch.clear_scroll")
|
|
|
|
|
@mock.patch("opensearchpy.OpenSearch.scroll")
|
|
|
|
|
@mock.patch("opensearchpy.OpenSearch.search")
|
|
|
|
|
def test_scan_with_missing_hits_key(
|
|
|
|
|
self, mock_search: Mock, mock_scroll: Mock, mock_clear_scroll: Mock
|
|
|
|
|
) -> None:
|
2024-01-19 13:36:05 -05:00
|
|
|
"""
|
|
|
|
|
Simulate a response where the 'hits' key is missing
|
|
|
|
|
"""
|
2023-12-04 06:26:25 -08:00
|
|
|
mock_search.return_value = {"_scroll_id": "dummy_scroll_id", "_shards": {}}
|
|
|
|
|
|
|
|
|
|
mock_scroll.side_effect = [{"_scroll_id": "dummy_scroll_id", "_shards": {}}]
|
|
|
|
|
|
|
|
|
|
mock_clear_scroll.return_value = None
|
|
|
|
|
|
|
|
|
|
client = OpenSearch()
|
|
|
|
|
|
|
|
|
|
# The test should pass without raising a KeyError
|
|
|
|
|
scan_result = list(helpers.scan(client, query={"query": {"match_all": {}}}))
|
|
|
|
|
assert scan_result == [], "Expected empty results when 'hits' key is missing"
|