Added bulk samples and explained error handling. (#448)
* Added bulk samples and explained error handling. Signed-off-by: dblock <dblock@amazon.com> * The client can serialize an array for you. Signed-off-by: dblock <dblock@amazon.com> --------- Signed-off-by: dblock <dblock@amazon.com>
This commit is contained in:
committed by
GitHub
parent
f54973e583
commit
54a517ada9
+39
-14
@@ -1,10 +1,15 @@
|
||||
- [Bulk Indexing](#bulk-indexing)
|
||||
- [Use a Helper](#use-a-helper)
|
||||
- [Line-Delimited JSON](#line-delimited-json)
|
||||
- [Bulk Helper](#bulk-helper)
|
||||
|
||||
# Bulk Indexing
|
||||
|
||||
The [Bulk API](https://opensearch.org/docs/latest/api-reference/document-apis/bulk/) lets you add, update, or delete multiple documents in a single request.
|
||||
|
||||
## Line-Delimited JSON
|
||||
|
||||
The `bulk` API accepts line-delimited JSON. This method requires the caller to evaluate the return value and parse errors in the case of a failure or partial success. See [samples/bulk/bulk-ld.py](../samples/bulk/bulk-ld.py) for a working sample.
|
||||
|
||||
```python
|
||||
from opensearchpy import OpenSearch
|
||||
|
||||
@@ -20,27 +25,47 @@ docs = '''
|
||||
'''
|
||||
|
||||
response = client.bulk(docs)
|
||||
print(response)
|
||||
if response["errors"]:
|
||||
print(f"There were errors!")
|
||||
else:
|
||||
print(f"Bulk-inserted {len(rc['items'])} items.")
|
||||
```
|
||||
|
||||
## Use a Helper
|
||||
The client can also serialize an array of data into bulk-delimited JSON for you. See [samples/bulk/bulk-array.py](../samples/bulk/bulk-array.py) for a working sample.
|
||||
|
||||
```python
|
||||
data = [
|
||||
{ "index": { "_index": "index-2022-06-08", "_id": 1 }}
|
||||
{ "name": "foo"}
|
||||
{ "index": { "_index": "index-2022-06-09", "_id": 2 }}
|
||||
{ "name": "bar"}
|
||||
{ "index": { "_index": "index-2022-06-10", "_id": 3 }}
|
||||
{ "name": "baz"}
|
||||
]
|
||||
|
||||
response = client.bulk(data)
|
||||
if response["errors"]:
|
||||
print(f"There were errors!")
|
||||
else:
|
||||
print(f"Bulk-inserted {len(rc['items'])} items.")
|
||||
```
|
||||
|
||||
## Bulk Helper
|
||||
|
||||
A helper can generate the line-delimited JSON for you from a Python array that contains `_index` and `_id` fields, and parse errors. The `helpers.bulk` implementation will raise `BulkIndexError` if any error occurs. This may indicate a partially successful result. See [samples/bulk/bulk-helpers.py](../samples/bulk/bulk-helpers.py) for a working sample.
|
||||
|
||||
```python
|
||||
from opensearchpy import OpenSearch, helpers
|
||||
|
||||
client = OpenSearch(...)
|
||||
|
||||
docs = []
|
||||
def generate_data():
|
||||
mywords = ['foo', 'bar', 'baz']
|
||||
for index, word in enumerate(mywords):
|
||||
docs.append({
|
||||
"_index": "mywords",
|
||||
"word": word,
|
||||
"_id": index
|
||||
})
|
||||
return docs
|
||||
docs = [
|
||||
{ "_index": "words", "_id": "word1", word: "foo" },
|
||||
{ "_index": "words", "_id": "word2", word: "bar" },
|
||||
{ "_index": "words", "_id": "word3", word: "baz" },
|
||||
]
|
||||
|
||||
response = helpers.bulk(client, generate_data(), max_retries=3)
|
||||
response = helpers.bulk(client, docs, max_retries=3)
|
||||
print(response)
|
||||
```
|
||||
|
||||
|
||||
Executable
+64
@@ -0,0 +1,64 @@
|
||||
#!/usr/bin/env python
|
||||
|
||||
# SPDX-License-Identifier: Apache-2.0
|
||||
#
|
||||
# The OpenSearch Contributors require contributions made to
|
||||
# this file be licensed under the Apache-2.0 license or a
|
||||
# compatible open source license.
|
||||
|
||||
import os
|
||||
import json
|
||||
|
||||
from opensearchpy import OpenSearch
|
||||
|
||||
# connect to an instance of OpenSearch
|
||||
|
||||
host = os.getenv('HOST', default='localhost')
|
||||
port = int(os.getenv('PORT', 9200))
|
||||
auth = (
|
||||
os.getenv('USERNAME', 'admin'),
|
||||
os.getenv('PASSWORD', 'admin')
|
||||
)
|
||||
|
||||
client = OpenSearch(
|
||||
hosts = [{'host': host, 'port': port}],
|
||||
http_auth = auth,
|
||||
use_ssl = True,
|
||||
verify_certs = False,
|
||||
ssl_show_warn = False
|
||||
)
|
||||
|
||||
# check whether an index exists
|
||||
index_name = "my-index"
|
||||
|
||||
if not client.indices.exists(index_name):
|
||||
|
||||
client.indices.create(index_name,
|
||||
body={
|
||||
"mappings":{
|
||||
"properties": {
|
||||
"value": {
|
||||
"type": "float"
|
||||
},
|
||||
}
|
||||
}
|
||||
}
|
||||
)
|
||||
|
||||
# index data
|
||||
data = []
|
||||
for i in range(100):
|
||||
data.append({ "index": {"_index": index_name, "_id": i }})
|
||||
data.append({ "value": i })
|
||||
|
||||
rc = client.bulk(data)
|
||||
if rc["errors"]:
|
||||
print(f"There were errors:")
|
||||
for item in rc["items"]:
|
||||
print(f"{item['index']['status']}: {item['index']['error']['type']}")
|
||||
else:
|
||||
print(f"Bulk-inserted {len(rc['items'])} items.")
|
||||
|
||||
# delete index
|
||||
client.indices.delete(index=index_name)
|
||||
|
||||
Executable
+58
@@ -0,0 +1,58 @@
|
||||
#!/usr/bin/env python
|
||||
|
||||
# SPDX-License-Identifier: Apache-2.0
|
||||
#
|
||||
# The OpenSearch Contributors require contributions made to
|
||||
# this file be licensed under the Apache-2.0 license or a
|
||||
# compatible open source license.
|
||||
|
||||
import os
|
||||
import json
|
||||
|
||||
from opensearchpy import OpenSearch, helpers
|
||||
|
||||
# connect to an instance of OpenSearch
|
||||
|
||||
host = os.getenv('HOST', default='localhost')
|
||||
port = int(os.getenv('PORT', 9200))
|
||||
auth = (
|
||||
os.getenv('USERNAME', 'admin'),
|
||||
os.getenv('PASSWORD', 'admin')
|
||||
)
|
||||
|
||||
client = OpenSearch(
|
||||
hosts = [{'host': host, 'port': port}],
|
||||
http_auth = auth,
|
||||
use_ssl = True,
|
||||
verify_certs = False,
|
||||
ssl_show_warn = False
|
||||
)
|
||||
|
||||
# check whether an index exists
|
||||
index_name = "my-index"
|
||||
|
||||
if not client.indices.exists(index_name):
|
||||
|
||||
client.indices.create(index_name,
|
||||
body={
|
||||
"mappings":{
|
||||
"properties": {
|
||||
"value": {
|
||||
"type": "float"
|
||||
},
|
||||
}
|
||||
}
|
||||
}
|
||||
)
|
||||
|
||||
# index data
|
||||
data = []
|
||||
for i in range(100):
|
||||
data.append({ "_index": index_name, "_id": i, "value": i })
|
||||
|
||||
rc = helpers.bulk(client, data)
|
||||
print(f"Bulk-inserted {rc[0]} items.")
|
||||
|
||||
# delete index
|
||||
client.indices.delete(index=index_name)
|
||||
|
||||
Executable
+64
@@ -0,0 +1,64 @@
|
||||
#!/usr/bin/env python
|
||||
|
||||
# SPDX-License-Identifier: Apache-2.0
|
||||
#
|
||||
# The OpenSearch Contributors require contributions made to
|
||||
# this file be licensed under the Apache-2.0 license or a
|
||||
# compatible open source license.
|
||||
|
||||
import os
|
||||
import json
|
||||
|
||||
from opensearchpy import OpenSearch
|
||||
|
||||
# connect to an instance of OpenSearch
|
||||
|
||||
host = os.getenv('HOST', default='localhost')
|
||||
port = int(os.getenv('PORT', 9200))
|
||||
auth = (
|
||||
os.getenv('USERNAME', 'admin'),
|
||||
os.getenv('PASSWORD', 'admin')
|
||||
)
|
||||
|
||||
client = OpenSearch(
|
||||
hosts = [{'host': host, 'port': port}],
|
||||
http_auth = auth,
|
||||
use_ssl = True,
|
||||
verify_certs = False,
|
||||
ssl_show_warn = False
|
||||
)
|
||||
|
||||
# check whether an index exists
|
||||
index_name = "my-index"
|
||||
|
||||
if not client.indices.exists(index_name):
|
||||
|
||||
client.indices.create(index_name,
|
||||
body={
|
||||
"mappings":{
|
||||
"properties": {
|
||||
"value": {
|
||||
"type": "float"
|
||||
},
|
||||
}
|
||||
}
|
||||
}
|
||||
)
|
||||
|
||||
# index data
|
||||
data = ''
|
||||
for i in range(100):
|
||||
data += json.dumps({ "index": {"_index": index_name, "_id": i }}) + "\n"
|
||||
data += json.dumps({ "value": i }) + "\n"
|
||||
|
||||
rc = client.bulk(data)
|
||||
if rc["errors"]:
|
||||
print(f"There were errors:")
|
||||
for item in rc["items"]:
|
||||
print(f"{item['index']['status']}: {item['index']['error']['type']}")
|
||||
else:
|
||||
print(f"Bulk-inserted {len(rc['items'])} items.")
|
||||
|
||||
# delete index
|
||||
client.indices.delete(index=index_name)
|
||||
|
||||
Reference in New Issue
Block a user