opensearch-pyd/samples/search/search_after_sync.py

#!/usr/bin/env python

# SPDX-License-Identifier: Apache-2.0
#
# The OpenSearch Contributors require contributions made to
# this file be licensed under the Apache-2.0 license or a
# compatible open source license.
#
# Modifications Copyright OpenSearch Contributors. See
# GitHub history for details.

import os

from opensearchpy import OpenSearch


def main() -> None:
    """
    This sample shows how to use search_after to paginate through the search results.
    It performs a search query on an index, retrieves the first page of results,
    and then fetches the next page of results using the search_after parameter.
    """

    # connect to OpenSearch
    host = "localhost"
    port = 9200
    auth = (
        "admin",
        os.getenv("OPENSEARCH_PASSWORD", "admin"),
    )  # For testing only. Don't store credentials in code.

    client = OpenSearch(
        hosts=[{"host": host, "port": port}],
        http_auth=auth,
        use_ssl=True,
        verify_certs=False,
        ssl_show_warn=False,
    )

    # create an index
    client.indices.create(index="movies")

    try:
        # add a large dataset (100 movies)
        for i in range(15):
            client.index(
                index="movies",
                id=i,
                body={
                    "title": f"The Dark Knight {i}",
                    "director": "Christopher Nolan",
                    "year": 2008 + i,
                },
            )

        for i in range(95):
            client.index(
                index="movies",
                id=i + 15,
                body={
                    "title": f"Movie Title {i + 15}",
                    "director": f"Director {i + 15}",
                    "year": 1950 + i + 15,
                },
            )

        # refresh the index to make the documents searchable
        client.indices.refresh(index="movies")

        # define the search query with sorting and pagination options
        search_body = {
            "query": {"match": {"title": "dark knight"}},
            "sort": [{"year": {"order": "asc"}}],
            "size": 10,
        }

        page = 1
        total_hits = 0
        while True:
            # execute the search
            response = client.search(index="movies", body=search_body)
            hits = response["hits"]["hits"]

            # break if no more results
            if not hits:
                break

            print(f"\nPage {page}:")

            for hit in hits:
                print(hit)
                total_hits += 1

            # get the sort values of the last document for the next page
            last_sort = hits[-1]["sort"]
            search_body["search_after"] = last_sort
            page += 1

        print("\nPagination Summary:")
        print(f"Total pages: {page - 1}")
        print(f"Total hits: {total_hits}")
        print(f"Results per page: {search_body['size']}")
    finally:
        # delete the index
        client.indices.delete(index="movies")


if __name__ == "__main__":
    main()