Added a support for search (search_after parameter) (#859)
* Added a sample that uses search_after parameter Signed-off-by: Nathalie Jonathan <nathhjo@amazon.com> * Moved search_after sample to samples/search folder, updated CHANGELOG and _sync sample, and added _async sample. Signed-off-by: Nathalie Jonathan <nathhjo@amazon.com> * Solved conflicts in CHANGELOG.md Signed-off-by: Nathalie Jonathan <nathhjo@amazon.com> --------- Signed-off-by: Nathalie Jonathan <nathhjo@amazon.com>
This commit is contained in:
@@ -4,6 +4,7 @@ Inspired from [Keep a Changelog](https://keepachangelog.com/en/1.0.0/)
|
|||||||
## [Unreleased]
|
## [Unreleased]
|
||||||
### Added
|
### Added
|
||||||
- Added option to pass custom headers to 'AWSV4SignerAsyncAuth' ([863](https://github.com/opensearch-project/opensearch-py/pull/863))
|
- Added option to pass custom headers to 'AWSV4SignerAsyncAuth' ([863](https://github.com/opensearch-project/opensearch-py/pull/863))
|
||||||
|
- Added sync and async sample that uses `search_after` parameter ([859](https://github.com/opensearch-project/opensearch-py/pull/859))
|
||||||
### Updated APIs
|
### Updated APIs
|
||||||
### Changed
|
### Changed
|
||||||
### Deprecated
|
### Deprecated
|
||||||
|
|||||||
@@ -0,0 +1,112 @@
|
|||||||
|
#!/usr/bin/env python
|
||||||
|
|
||||||
|
# SPDX-License-Identifier: Apache-2.0
|
||||||
|
#
|
||||||
|
# The OpenSearch Contributors require contributions made to
|
||||||
|
# this file be licensed under the Apache-2.0 license or a
|
||||||
|
# compatible open source license.
|
||||||
|
#
|
||||||
|
# Modifications Copyright OpenSearch Contributors. See
|
||||||
|
# GitHub history for details.
|
||||||
|
|
||||||
|
import asyncio
|
||||||
|
import os
|
||||||
|
|
||||||
|
from opensearchpy import AsyncOpenSearch
|
||||||
|
|
||||||
|
|
||||||
|
async def main() -> None:
|
||||||
|
"""
|
||||||
|
This sample uses asyncio and AsyncOpenSearch to asynchronously
|
||||||
|
connect to local OpenSearch cluster, performs a search query on an index,
|
||||||
|
retrieves the first page of results, and fetches the next page of results
|
||||||
|
using the search_after parameter.
|
||||||
|
"""
|
||||||
|
|
||||||
|
# connect to OpenSearch
|
||||||
|
host = "localhost"
|
||||||
|
port = 9200
|
||||||
|
auth = (
|
||||||
|
"admin",
|
||||||
|
os.getenv("OPENSEARCH_PASSWORD", "admin"),
|
||||||
|
) # For testing only. Don't store credentials in code.
|
||||||
|
|
||||||
|
client = AsyncOpenSearch(
|
||||||
|
hosts=[{"host": host, "port": port}],
|
||||||
|
http_auth=auth,
|
||||||
|
use_ssl=True,
|
||||||
|
verify_certs=False,
|
||||||
|
ssl_show_warn=False,
|
||||||
|
)
|
||||||
|
|
||||||
|
# create an index
|
||||||
|
await client.indices.create(index="movies")
|
||||||
|
|
||||||
|
try:
|
||||||
|
# add a large dataset (100 movies)
|
||||||
|
for i in range(15):
|
||||||
|
await client.index(
|
||||||
|
index="movies",
|
||||||
|
id=i,
|
||||||
|
body={
|
||||||
|
"title": f"The Dark Knight {i}",
|
||||||
|
"director": "Christopher Nolan",
|
||||||
|
"year": 2008 + i,
|
||||||
|
},
|
||||||
|
)
|
||||||
|
|
||||||
|
for i in range(95):
|
||||||
|
await client.index(
|
||||||
|
index="movies",
|
||||||
|
id=i + 15,
|
||||||
|
body={
|
||||||
|
"title": f"Movie Title {i + 15}",
|
||||||
|
"director": f"Director {i + 15}",
|
||||||
|
"year": 1950 + i + 15,
|
||||||
|
},
|
||||||
|
)
|
||||||
|
|
||||||
|
# refresh the index to make the documents searchable
|
||||||
|
await client.indices.refresh(index="movies")
|
||||||
|
|
||||||
|
# define the search query with sorting and pagination options
|
||||||
|
search_body = {
|
||||||
|
"query": {"match": {"title": "dark knight"}},
|
||||||
|
"sort": [{"year": {"order": "asc"}}],
|
||||||
|
"size": 10,
|
||||||
|
}
|
||||||
|
|
||||||
|
page = 1
|
||||||
|
total_hits = 0
|
||||||
|
while True:
|
||||||
|
# execute the search
|
||||||
|
response = await client.search(index="movies", body=search_body)
|
||||||
|
hits = response["hits"]["hits"]
|
||||||
|
|
||||||
|
# break if no more results
|
||||||
|
if not hits:
|
||||||
|
break
|
||||||
|
|
||||||
|
print(f"\nPage {page}:")
|
||||||
|
|
||||||
|
for hit in hits:
|
||||||
|
print(hit)
|
||||||
|
total_hits += 1
|
||||||
|
|
||||||
|
# get the sort values of the last document for the next page
|
||||||
|
last_sort = hits[-1]["sort"]
|
||||||
|
search_body["search_after"] = last_sort
|
||||||
|
page += 1
|
||||||
|
|
||||||
|
print("\nPagination Summary:")
|
||||||
|
print(f"Total pages: {page - 1}")
|
||||||
|
print(f"Total hits: {total_hits}")
|
||||||
|
print(f"Results per page: {search_body['size']}")
|
||||||
|
finally:
|
||||||
|
# delete the index
|
||||||
|
await client.indices.delete(index="movies")
|
||||||
|
await client.close()
|
||||||
|
|
||||||
|
|
||||||
|
if __name__ == "__main__":
|
||||||
|
asyncio.run(main())
|
||||||
@@ -0,0 +1,109 @@
|
|||||||
|
#!/usr/bin/env python
|
||||||
|
|
||||||
|
# SPDX-License-Identifier: Apache-2.0
|
||||||
|
#
|
||||||
|
# The OpenSearch Contributors require contributions made to
|
||||||
|
# this file be licensed under the Apache-2.0 license or a
|
||||||
|
# compatible open source license.
|
||||||
|
#
|
||||||
|
# Modifications Copyright OpenSearch Contributors. See
|
||||||
|
# GitHub history for details.
|
||||||
|
|
||||||
|
import os
|
||||||
|
|
||||||
|
from opensearchpy import OpenSearch
|
||||||
|
|
||||||
|
|
||||||
|
def main() -> None:
|
||||||
|
"""
|
||||||
|
This sample shows how to use search_after to paginate through the search results.
|
||||||
|
It performs a search query on an index, retrieves the first page of results,
|
||||||
|
and then fetches the next page of results using the search_after parameter.
|
||||||
|
"""
|
||||||
|
|
||||||
|
# connect to OpenSearch
|
||||||
|
host = "localhost"
|
||||||
|
port = 9200
|
||||||
|
auth = (
|
||||||
|
"admin",
|
||||||
|
os.getenv("OPENSEARCH_PASSWORD", "admin"),
|
||||||
|
) # For testing only. Don't store credentials in code.
|
||||||
|
|
||||||
|
client = OpenSearch(
|
||||||
|
hosts=[{"host": host, "port": port}],
|
||||||
|
http_auth=auth,
|
||||||
|
use_ssl=True,
|
||||||
|
verify_certs=False,
|
||||||
|
ssl_show_warn=False,
|
||||||
|
)
|
||||||
|
|
||||||
|
# create an index
|
||||||
|
client.indices.create(index="movies")
|
||||||
|
|
||||||
|
try:
|
||||||
|
# add a large dataset (100 movies)
|
||||||
|
for i in range(15):
|
||||||
|
client.index(
|
||||||
|
index="movies",
|
||||||
|
id=i,
|
||||||
|
body={
|
||||||
|
"title": f"The Dark Knight {i}",
|
||||||
|
"director": "Christopher Nolan",
|
||||||
|
"year": 2008 + i,
|
||||||
|
},
|
||||||
|
)
|
||||||
|
|
||||||
|
for i in range(95):
|
||||||
|
client.index(
|
||||||
|
index="movies",
|
||||||
|
id=i + 15,
|
||||||
|
body={
|
||||||
|
"title": f"Movie Title {i + 15}",
|
||||||
|
"director": f"Director {i + 15}",
|
||||||
|
"year": 1950 + i + 15,
|
||||||
|
},
|
||||||
|
)
|
||||||
|
|
||||||
|
# refresh the index to make the documents searchable
|
||||||
|
client.indices.refresh(index="movies")
|
||||||
|
|
||||||
|
# define the search query with sorting and pagination options
|
||||||
|
search_body = {
|
||||||
|
"query": {"match": {"title": "dark knight"}},
|
||||||
|
"sort": [{"year": {"order": "asc"}}],
|
||||||
|
"size": 10,
|
||||||
|
}
|
||||||
|
|
||||||
|
page = 1
|
||||||
|
total_hits = 0
|
||||||
|
while True:
|
||||||
|
# execute the search
|
||||||
|
response = client.search(index="movies", body=search_body)
|
||||||
|
hits = response["hits"]["hits"]
|
||||||
|
|
||||||
|
# break if no more results
|
||||||
|
if not hits:
|
||||||
|
break
|
||||||
|
|
||||||
|
print(f"\nPage {page}:")
|
||||||
|
|
||||||
|
for hit in hits:
|
||||||
|
print(hit)
|
||||||
|
total_hits += 1
|
||||||
|
|
||||||
|
# get the sort values of the last document for the next page
|
||||||
|
last_sort = hits[-1]["sort"]
|
||||||
|
search_body["search_after"] = last_sort
|
||||||
|
page += 1
|
||||||
|
|
||||||
|
print("\nPagination Summary:")
|
||||||
|
print(f"Total pages: {page - 1}")
|
||||||
|
print(f"Total hits: {total_hits}")
|
||||||
|
print(f"Results per page: {search_body['size']}")
|
||||||
|
finally:
|
||||||
|
# delete the index
|
||||||
|
client.indices.delete(index="movies")
|
||||||
|
|
||||||
|
|
||||||
|
if __name__ == "__main__":
|
||||||
|
main()
|
||||||
Reference in New Issue
Block a user