Added a support for search (search_after parameter) (#859)
* Added a sample that uses search_after parameter Signed-off-by: Nathalie Jonathan <nathhjo@amazon.com> * Moved search_after sample to samples/search folder, updated CHANGELOG and _sync sample, and added _async sample. Signed-off-by: Nathalie Jonathan <nathhjo@amazon.com> * Solved conflicts in CHANGELOG.md Signed-off-by: Nathalie Jonathan <nathhjo@amazon.com> --------- Signed-off-by: Nathalie Jonathan <nathhjo@amazon.com>
This commit is contained in:
@@ -4,6 +4,7 @@ Inspired from [Keep a Changelog](https://keepachangelog.com/en/1.0.0/)
|
||||
## [Unreleased]
|
||||
### Added
|
||||
- Added option to pass custom headers to 'AWSV4SignerAsyncAuth' ([863](https://github.com/opensearch-project/opensearch-py/pull/863))
|
||||
- Added sync and async sample that uses `search_after` parameter ([859](https://github.com/opensearch-project/opensearch-py/pull/859))
|
||||
### Updated APIs
|
||||
### Changed
|
||||
### Deprecated
|
||||
|
||||
@@ -0,0 +1,112 @@
|
||||
#!/usr/bin/env python
|
||||
|
||||
# SPDX-License-Identifier: Apache-2.0
|
||||
#
|
||||
# The OpenSearch Contributors require contributions made to
|
||||
# this file be licensed under the Apache-2.0 license or a
|
||||
# compatible open source license.
|
||||
#
|
||||
# Modifications Copyright OpenSearch Contributors. See
|
||||
# GitHub history for details.
|
||||
|
||||
import asyncio
|
||||
import os
|
||||
|
||||
from opensearchpy import AsyncOpenSearch
|
||||
|
||||
|
||||
async def main() -> None:
|
||||
"""
|
||||
This sample uses asyncio and AsyncOpenSearch to asynchronously
|
||||
connect to local OpenSearch cluster, performs a search query on an index,
|
||||
retrieves the first page of results, and fetches the next page of results
|
||||
using the search_after parameter.
|
||||
"""
|
||||
|
||||
# connect to OpenSearch
|
||||
host = "localhost"
|
||||
port = 9200
|
||||
auth = (
|
||||
"admin",
|
||||
os.getenv("OPENSEARCH_PASSWORD", "admin"),
|
||||
) # For testing only. Don't store credentials in code.
|
||||
|
||||
client = AsyncOpenSearch(
|
||||
hosts=[{"host": host, "port": port}],
|
||||
http_auth=auth,
|
||||
use_ssl=True,
|
||||
verify_certs=False,
|
||||
ssl_show_warn=False,
|
||||
)
|
||||
|
||||
# create an index
|
||||
await client.indices.create(index="movies")
|
||||
|
||||
try:
|
||||
# add a large dataset (100 movies)
|
||||
for i in range(15):
|
||||
await client.index(
|
||||
index="movies",
|
||||
id=i,
|
||||
body={
|
||||
"title": f"The Dark Knight {i}",
|
||||
"director": "Christopher Nolan",
|
||||
"year": 2008 + i,
|
||||
},
|
||||
)
|
||||
|
||||
for i in range(95):
|
||||
await client.index(
|
||||
index="movies",
|
||||
id=i + 15,
|
||||
body={
|
||||
"title": f"Movie Title {i + 15}",
|
||||
"director": f"Director {i + 15}",
|
||||
"year": 1950 + i + 15,
|
||||
},
|
||||
)
|
||||
|
||||
# refresh the index to make the documents searchable
|
||||
await client.indices.refresh(index="movies")
|
||||
|
||||
# define the search query with sorting and pagination options
|
||||
search_body = {
|
||||
"query": {"match": {"title": "dark knight"}},
|
||||
"sort": [{"year": {"order": "asc"}}],
|
||||
"size": 10,
|
||||
}
|
||||
|
||||
page = 1
|
||||
total_hits = 0
|
||||
while True:
|
||||
# execute the search
|
||||
response = await client.search(index="movies", body=search_body)
|
||||
hits = response["hits"]["hits"]
|
||||
|
||||
# break if no more results
|
||||
if not hits:
|
||||
break
|
||||
|
||||
print(f"\nPage {page}:")
|
||||
|
||||
for hit in hits:
|
||||
print(hit)
|
||||
total_hits += 1
|
||||
|
||||
# get the sort values of the last document for the next page
|
||||
last_sort = hits[-1]["sort"]
|
||||
search_body["search_after"] = last_sort
|
||||
page += 1
|
||||
|
||||
print("\nPagination Summary:")
|
||||
print(f"Total pages: {page - 1}")
|
||||
print(f"Total hits: {total_hits}")
|
||||
print(f"Results per page: {search_body['size']}")
|
||||
finally:
|
||||
# delete the index
|
||||
await client.indices.delete(index="movies")
|
||||
await client.close()
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
asyncio.run(main())
|
||||
@@ -0,0 +1,109 @@
|
||||
#!/usr/bin/env python
|
||||
|
||||
# SPDX-License-Identifier: Apache-2.0
|
||||
#
|
||||
# The OpenSearch Contributors require contributions made to
|
||||
# this file be licensed under the Apache-2.0 license or a
|
||||
# compatible open source license.
|
||||
#
|
||||
# Modifications Copyright OpenSearch Contributors. See
|
||||
# GitHub history for details.
|
||||
|
||||
import os
|
||||
|
||||
from opensearchpy import OpenSearch
|
||||
|
||||
|
||||
def main() -> None:
|
||||
"""
|
||||
This sample shows how to use search_after to paginate through the search results.
|
||||
It performs a search query on an index, retrieves the first page of results,
|
||||
and then fetches the next page of results using the search_after parameter.
|
||||
"""
|
||||
|
||||
# connect to OpenSearch
|
||||
host = "localhost"
|
||||
port = 9200
|
||||
auth = (
|
||||
"admin",
|
||||
os.getenv("OPENSEARCH_PASSWORD", "admin"),
|
||||
) # For testing only. Don't store credentials in code.
|
||||
|
||||
client = OpenSearch(
|
||||
hosts=[{"host": host, "port": port}],
|
||||
http_auth=auth,
|
||||
use_ssl=True,
|
||||
verify_certs=False,
|
||||
ssl_show_warn=False,
|
||||
)
|
||||
|
||||
# create an index
|
||||
client.indices.create(index="movies")
|
||||
|
||||
try:
|
||||
# add a large dataset (100 movies)
|
||||
for i in range(15):
|
||||
client.index(
|
||||
index="movies",
|
||||
id=i,
|
||||
body={
|
||||
"title": f"The Dark Knight {i}",
|
||||
"director": "Christopher Nolan",
|
||||
"year": 2008 + i,
|
||||
},
|
||||
)
|
||||
|
||||
for i in range(95):
|
||||
client.index(
|
||||
index="movies",
|
||||
id=i + 15,
|
||||
body={
|
||||
"title": f"Movie Title {i + 15}",
|
||||
"director": f"Director {i + 15}",
|
||||
"year": 1950 + i + 15,
|
||||
},
|
||||
)
|
||||
|
||||
# refresh the index to make the documents searchable
|
||||
client.indices.refresh(index="movies")
|
||||
|
||||
# define the search query with sorting and pagination options
|
||||
search_body = {
|
||||
"query": {"match": {"title": "dark knight"}},
|
||||
"sort": [{"year": {"order": "asc"}}],
|
||||
"size": 10,
|
||||
}
|
||||
|
||||
page = 1
|
||||
total_hits = 0
|
||||
while True:
|
||||
# execute the search
|
||||
response = client.search(index="movies", body=search_body)
|
||||
hits = response["hits"]["hits"]
|
||||
|
||||
# break if no more results
|
||||
if not hits:
|
||||
break
|
||||
|
||||
print(f"\nPage {page}:")
|
||||
|
||||
for hit in hits:
|
||||
print(hit)
|
||||
total_hits += 1
|
||||
|
||||
# get the sort values of the last document for the next page
|
||||
last_sort = hits[-1]["sort"]
|
||||
search_body["search_after"] = last_sort
|
||||
page += 1
|
||||
|
||||
print("\nPagination Summary:")
|
||||
print(f"Total pages: {page - 1}")
|
||||
print(f"Total hits: {total_hits}")
|
||||
print(f"Results per page: {search_body['size']}")
|
||||
finally:
|
||||
# delete the index
|
||||
client.indices.delete(index="movies")
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
main()
|
||||
Reference in New Issue
Block a user