0ddbf8cafa
* updated files with docstrings to pass pylint Signed-off-by: Mark Cohen <markcoh@amazon.com> * updated samples to prepare for enabling missing-docstring linter; will continue to work on this before committing setup.cfg Signed-off-by: Mark Cohen <markcoh@amazon.com> * removed missing-function-docstring from setup.cfg so the linter doesn't fail while work on docstrings continues Signed-off-by: Mark Cohen <markcoh@amazon.com> * corrected unnecessary return docstring values Signed-off-by: Mark Cohen <markcoh@amazon.com> * fixing failure in 'black' on reformatting Signed-off-by: Mark Cohen <markcoh@amazon.com> * updated utils to pass missing-function-docstring tests Signed-off-by: Mark Cohen <markcoh@amazon.com> * updated functions with missing docstrings or pylint ignore instructions; added a utility to automatically add these ignore instructions to most functions that should be self-describing; rolled back some automatically generated code mistakenly changed Signed-off-by: Mark Cohen <markcoh@amazon.com> * * ignoring opensearchpy for pylint and then added it back to noxfile.py * fixed some lints; created a feature flag for newer dynamic pylint so now lints can be fixed first in legacy code and then enabled by multiple people * extracted a method for per-folder linting * updated noxfile.lint_per_folder with type hints * enabled unspecified-encoding in pylint * added disable missing-function-docstring pragma to test_clients.py in test_async and test_server * added more encodings to pass unspecified-encoding pylint tests * updated changelog Signed-off-by: Mark Cohen <markcoh@amazon.com> * updated CHANGELOG.md entry removed the feature flag for pylint lint_per_folder fixed failures from mypy and pylint removed pylint MESSAGE CONTROL config from setup.cfg after relocating to lint_per_folder method Signed-off-by: Mark Cohen <markcoh@amazon.com> * removed pylint ignore missing-function-docstring Signed-off-by: Mark Cohen <markcoh@amazon.com> * added pylint.extensions.docparams plugin updated some docstrings to correct parameters removed pylint from setup.cfg Signed-off-by: Mark Cohen <markcoh@amazon.com> * added four lints for opensearchpy/ Signed-off-by: Mark Cohen <markcoh@amazon.com> * adding await back to client.info() call Signed-off-by: Mark Cohen <markcoh@amazon.com> * updated TODOs as requested renamed test_opensearchpy.test_async.test_server.test_helpers.conftest.setup_ubq_tests to setup_update_by_query_tests added OpenSearch-main/rest-api-spec/src/main/resources/rest-api-spec/test/indices/stats/50_noop_update[0] to skip tests list run_tests.py catches a CalledProcessError when the git repo already exists and the command to add the origin fails in fetch_opensearch_repo() Signed-off-by: Mark Cohen <markcoh@amazon.com> --------- Signed-off-by: Mark Cohen <markcoh@amazon.com>
114 lines
3.0 KiB
Python
Executable File
114 lines
3.0 KiB
Python
Executable File
#!/usr/bin/env python
|
|
|
|
# SPDX-License-Identifier: Apache-2.0
|
|
#
|
|
# The OpenSearch Contributors require contributions made to
|
|
# this file be licensed under the Apache-2.0 license or a
|
|
# compatible open source license.
|
|
#
|
|
# Modifications Copyright OpenSearch Contributors. See
|
|
# GitHub history for details.
|
|
|
|
|
|
import os
|
|
from typing import Any
|
|
|
|
from opensearchpy import OpenSearch, helpers
|
|
|
|
|
|
def main() -> None:
|
|
"""
|
|
demonstrates how to bulk load data using opensearchpy.helpers
|
|
including examples of serial, parallel, and streaming bulk load
|
|
"""
|
|
# connect to an instance of OpenSearch
|
|
|
|
host = os.getenv("HOST", default="localhost")
|
|
port = int(os.getenv("PORT", 9200))
|
|
auth = (os.getenv("USERNAME", "admin"), os.getenv("PASSWORD", "admin"))
|
|
|
|
client = OpenSearch(
|
|
hosts=[{"host": host, "port": port}],
|
|
http_auth=auth,
|
|
use_ssl=True,
|
|
verify_certs=False,
|
|
ssl_show_warn=False,
|
|
)
|
|
|
|
# check whether an index exists
|
|
index_name = "my-index"
|
|
|
|
if not client.indices.exists(index_name):
|
|
client.indices.create(
|
|
index_name,
|
|
body={
|
|
"mappings": {
|
|
"properties": {
|
|
"value": {"type": "float"},
|
|
}
|
|
}
|
|
},
|
|
)
|
|
|
|
# index data
|
|
data = []
|
|
for i in range(100):
|
|
data.append({"_index": index_name, "_id": i, "value": i})
|
|
|
|
# serialized bulk raising an exception on error
|
|
rc = helpers.bulk(client, data) # pylint: disable=invalid-name
|
|
print(f"Bulk-inserted {rc[0]} items (bulk).")
|
|
|
|
# parallel bulk with explicit error checking
|
|
succeeded = []
|
|
failed = []
|
|
for success, item in helpers.parallel_bulk(
|
|
client,
|
|
actions=data,
|
|
chunk_size=10,
|
|
raise_on_error=False,
|
|
raise_on_exception=False,
|
|
max_chunk_bytes=20 * 1024 * 1024,
|
|
request_timeout=60,
|
|
):
|
|
if success:
|
|
succeeded.append(item)
|
|
else:
|
|
failed.append(item)
|
|
|
|
if len(failed) > 0:
|
|
print(f"There were {len(failed)} errors:")
|
|
for item in failed:
|
|
print(item["index"]["error"])
|
|
|
|
if len(succeeded) > 0:
|
|
print(f"Bulk-inserted {len(succeeded)} items (parallel_bulk).")
|
|
|
|
# streaming bulk with a data generator
|
|
def _generate_data() -> Any:
|
|
for i in range(100):
|
|
yield {"_index": index_name, "_id": i, "value": i}
|
|
|
|
succeeded = []
|
|
failed = []
|
|
for success, item in helpers.streaming_bulk(client, actions=_generate_data()):
|
|
if success:
|
|
succeeded.append(item)
|
|
else:
|
|
failed.append(item)
|
|
|
|
if len(failed) > 0:
|
|
print(f"There were {len(failed)} errors:")
|
|
for item in failed:
|
|
print(item["index"]["error"])
|
|
|
|
if len(succeeded) > 0:
|
|
print(f"Bulk-inserted {len(succeeded)} items (streaming_bulk).")
|
|
|
|
# delete index
|
|
client.indices.delete(index=index_name)
|
|
|
|
|
|
if __name__ == "__main__":
|
|
main()
|