Fix AuthorizationException with AWSV4SignerAsyncAuth when the doc ID has special characters. (#848)

* Lifecycle integration tests.

Signed-off-by: dblock <dblock@amazon.com>

* Added a test that makes sure the slash is properly encoded.

Signed-off-by: dblock <dblock@amazon.com>

* Added more tests for signer and _make_path.

Signed-off-by: Nathalie Jonathan <nathhjo@amazon.com>

* Prevent AIOHttpConnection from encoding the url a second time.

Signed-off-by: Nathalie Jonathan <nathhjo@amazon.com>

---------

Signed-off-by: dblock <dblock@amazon.com>
Signed-off-by: Nathalie Jonathan <nathhjo@amazon.com>
Co-authored-by: dblock <dblock@amazon.com>
This commit is contained in:
nathaliellenaa
2024-11-27 14:50:22 -08:00
committed by GitHub
parent bf9add4eed
commit b9e48dc847
13 changed files with 445 additions and 9 deletions
+1
View File
@@ -14,6 +14,7 @@ Inspired from [Keep a Changelog](https://keepachangelog.com/en/1.0.0/)
### Fixed ### Fixed
- Fix `Transport.perform_request`'s arguments `timeout` and `ignore` variable usage ([810](https://github.com/opensearch-project/opensearch-py/pull/810)) - Fix `Transport.perform_request`'s arguments `timeout` and `ignore` variable usage ([810](https://github.com/opensearch-project/opensearch-py/pull/810))
- Fix `Index.save` not passing through aliases to the underlying index ([823](https://github.com/opensearch-project/opensearch-py/pull/823)) - Fix `Index.save` not passing through aliases to the underlying index ([823](https://github.com/opensearch-project/opensearch-py/pull/823))
- Fix `AuthorizationException` with AWS OpenSearch when the doc ID contains `:` ([848](https://github.com/opensearch-project/opensearch-py/pull/848))
### Security ### Security
### Dependencies ### Dependencies
+5 -1
View File
@@ -101,7 +101,11 @@ test_opensearchpy/test_connection.py::TestRequestsConnection::test_no_http_compr
test_opensearchpy/test_async/test_connection.py::TestAIOHttpConnection::test_no_http_compression PASSED [100%] test_opensearchpy/test_async/test_connection.py::TestAIOHttpConnection::test_no_http_compression PASSED [100%]
``` ```
Note that integration tests require docker to be installed and running, and downloads quite a bit of data from over the internet and hence take few minutes to complete. ```
./.ci/run-tests false 2.16.0 test_indices_lifecycle
```
Note that integration tests require docker to be installed and running, and downloads quite a bit of data from the internet and hence take few minutes to complete.
## Linter ## Linter
+3 -1
View File
@@ -15,6 +15,8 @@ import ssl
import warnings import warnings
from typing import Any, Collection, Mapping, Optional, Union from typing import Any, Collection, Mapping, Optional, Union
import yarl
from .._async._extra_imports import aiohttp, aiohttp_exceptions # type: ignore from .._async._extra_imports import aiohttp, aiohttp_exceptions # type: ignore
from .._async.compat import get_running_loop from .._async.compat import get_running_loop
from .._async.http_aiohttp import AIOHttpConnection from .._async.http_aiohttp import AIOHttpConnection
@@ -210,7 +212,7 @@ class AsyncHttpConnection(AIOHttpConnection):
try: try:
async with self.session.request( async with self.session.request(
method, method,
url, yarl.URL(url, encoded=True),
data=body, data=body,
auth=auth, auth=auth,
headers=req_headers, headers=req_headers,
+2 -2
View File
@@ -19,9 +19,9 @@ from opensearchpy import OpenSearch
def main() -> None: def main() -> None:
""" """
an example showing how to create an synchronous connection to An example showing how to create a synchronous connection to
OpenSearch, create an index, index a document and search to OpenSearch, create an index, index a document and search to
return the document return the document.
""" """
host = "localhost" host = "localhost"
port = 9200 port = 9200
+2 -2
View File
@@ -18,9 +18,9 @@ from opensearchpy import AsyncOpenSearch
async def main() -> None: async def main() -> None:
""" """
an example showing how to create an asynchronous connection An example showing how to create an asynchronous connection
to OpenSearch, create an index, index a document and to OpenSearch, create an index, index a document and
search to return the document search to return the document.
""" """
# connect to OpenSearch # connect to OpenSearch
host = "localhost" host = "localhost"
+63
View File
@@ -0,0 +1,63 @@
#!/usr/bin/env python
# SPDX-License-Identifier: Apache-2.0
#
# The OpenSearch Contributors require contributions made to
# this file be licensed under the Apache-2.0 license or a
# compatible open source license.
#
# Modifications Copyright OpenSearch Contributors. See
# GitHub history for details.
import os
from opensearchpy import OpenSearch
# connect to OpenSearch
def main() -> None:
"""
An example showing how to create a synchronous connection to
OpenSearch, create an index, index a document and search to
return the document.
"""
host = "localhost"
port = 9200
auth = (
"admin",
os.getenv("OPENSEARCH_PASSWORD", "admin"),
) # For testing only. Don't store credentials in code.
client = OpenSearch(
hosts=[{"host": host, "port": port}],
http_auth=auth,
use_ssl=True,
verify_certs=False,
ssl_show_warn=False,
)
info = client.info()
print(f"Welcome to {info['version']['distribution']} {info['version']['number']}!")
index_name = "кино"
index_create_result = client.indices.create(index=index_name)
print(index_create_result)
document = {"название": "Солярис", "автор": "Андрей Тарковский", "год": "2011"}
id = "соларис@2011"
doc_insert_result = client.index(
index=index_name, body=document, id=id, refresh=True
)
print(doc_insert_result)
doc_delete_result = client.delete(index=index_name, id=id)
print(doc_delete_result)
index_delete_result = client.indices.delete(index=index_name)
print(index_delete_result)
if __name__ == "__main__":
main()
+72
View File
@@ -0,0 +1,72 @@
#!/usr/bin/env python
# SPDX-License-Identifier: Apache-2.0
#
# The OpenSearch Contributors require contributions made to
# this file be licensed under the Apache-2.0 license or a
# compatible open source license.
#
# Modifications Copyright OpenSearch Contributors. See
# GitHub history for details.
import asyncio
import os
from opensearchpy import AsyncOpenSearch
async def main() -> None:
"""
An example showing how to create an asynchronous connection
to OpenSearch, create an index, index a document and
search to return the document.
"""
# connect to OpenSearch
host = "localhost"
port = 9200
auth = (
"admin",
os.getenv("OPENSEARCH_PASSWORD", "admin"),
) # For testing only. Don't store credentials in code.
client = AsyncOpenSearch(
hosts=[{"host": host, "port": port}],
http_auth=auth,
use_ssl=True,
verify_certs=False,
ssl_show_warn=False,
)
try:
info = await client.info()
print(
f"Welcome to {info['version']['distribution']} {info['version']['number']}!"
)
index_name = "кино"
index_create_result = await client.indices.create(index=index_name)
print(index_create_result)
document = {"название": "Солярис", "автор": "Андрей Тарковский", "год": "2011"}
id = "соларис@2011"
doc_insert_result = await client.index(
index=index_name, body=document, id=id, refresh=True
)
print(doc_insert_result)
doc_delete_result = await client.delete(index=index_name, id=id)
print(doc_delete_result)
index_delete_result = await client.indices.delete(index=index_name)
print(index_delete_result)
finally:
await client.close()
if __name__ == "__main__":
loop = asyncio.new_event_loop()
asyncio.set_event_loop(loop)
loop.run_until_complete(main())
loop.close()
@@ -29,6 +29,7 @@ from typing import Any
from unittest import mock from unittest import mock
import pytest import pytest
import yarl
from multidict import CIMultiDict from multidict import CIMultiDict
from opensearchpy._async._extra_imports import aiohttp # type: ignore from opensearchpy._async._extra_imports import aiohttp # type: ignore
@@ -91,7 +92,7 @@ class TestAsyncHttpConnection:
await c.perform_request("post", "/test") await c.perform_request("post", "/test")
mock_request.assert_called_with( mock_request.assert_called_with(
"post", "post",
"http://localhost:9200/test", yarl.URL("http://localhost:9200/test", encoded=True),
data=None, data=None,
auth=c._http_auth, auth=c._http_auth,
headers={}, headers={},
@@ -120,7 +121,7 @@ class TestAsyncHttpConnection:
mock_request.assert_called_with( mock_request.assert_called_with(
"post", "post",
"http://localhost:9200/test", yarl.URL("http://localhost:9200/test", encoded=True),
data=None, data=None,
auth=None, auth=None,
headers={ headers={
@@ -30,10 +30,70 @@ from typing import Any
import pytest import pytest
from _pytest.mark.structures import MarkDecorator from _pytest.mark.structures import MarkDecorator
from opensearchpy.exceptions import RequestError
pytestmark: MarkDecorator = pytest.mark.asyncio pytestmark: MarkDecorator = pytest.mark.asyncio
class TestSpecialCharacters:
async def test_index_with_slash(self, async_client: Any) -> None:
index_name = "movies/shmovies"
with pytest.raises(RequestError) as e:
await async_client.indices.create(index=index_name)
assert (
str(e.value)
== "RequestError(400, 'invalid_index_name_exception', 'Invalid index name [movies/shmovies], must not contain the following characters [ , \", *, \\\\, <, |, ,, >, /, ?]')"
)
class TestUnicode: class TestUnicode:
async def test_indices_lifecycle_english(self, async_client: Any) -> None:
index_name = "movies"
index_create_result = await async_client.indices.create(index=index_name)
assert index_create_result["acknowledged"] is True
assert index_name == index_create_result["index"]
document = {"name": "Solaris", "director": "Andrei Tartakovsky", "year": "2011"}
id = "solaris@2011"
doc_insert_result = await async_client.index(
index=index_name, body=document, id=id, refresh=True
)
assert "created" == doc_insert_result["result"]
assert index_name == doc_insert_result["_index"]
assert id == doc_insert_result["_id"]
doc_delete_result = await async_client.delete(index=index_name, id=id)
assert "deleted" == doc_delete_result["result"]
assert index_name == doc_delete_result["_index"]
assert id == doc_delete_result["_id"]
index_delete_result = await async_client.indices.delete(index=index_name)
assert index_delete_result["acknowledged"] is True
async def test_indices_lifecycle_russian(self, async_client: Any) -> None:
index_name = "кино"
index_create_result = await async_client.indices.create(index=index_name)
assert index_create_result["acknowledged"] is True
assert index_name == index_create_result["index"]
document = {"название": "Солярис", "автор": "Андрей Тарковский", "год": "2011"}
id = "соларис@2011"
doc_insert_result = await async_client.index(
index=index_name, body=document, id=id, refresh=True
)
assert "created" == doc_insert_result["result"]
assert index_name == doc_insert_result["_index"]
assert id == doc_insert_result["_id"]
doc_delete_result = await async_client.delete(index=index_name, id=id)
assert "deleted" == doc_delete_result["result"]
assert index_name == doc_delete_result["_index"]
assert id == doc_delete_result["_id"]
index_delete_result = await async_client.indices.delete(index=index_name)
assert index_delete_result["acknowledged"] is True
async def test_indices_analyze(self, async_client: Any) -> None: async def test_indices_analyze(self, async_client: Any) -> None:
await async_client.indices.analyze(body='{"text": "привет"}') await async_client.indices.analyze(body='{"text": "привет"}')
@@ -8,6 +8,7 @@
# GitHub history for details. # GitHub history for details.
import uuid import uuid
from typing import Any, Collection, Dict, Mapping, Optional, Tuple, Union
from unittest.mock import Mock from unittest.mock import Mock
import pytest import pytest
@@ -103,3 +104,75 @@ class TestAsyncSignerWithFrozenCredentials(TestAsyncSigner):
assert "X-Amz-Date" in headers assert "X-Amz-Date" in headers
assert "X-Amz-Security-Token" in headers assert "X-Amz-Security-Token" in headers
assert len(mock_session.get_frozen_credentials.mock_calls) == 1 assert len(mock_session.get_frozen_credentials.mock_calls) == 1
class TestAsyncSignerWithSpecialCharacters:
def mock_session(self) -> Mock:
access_key = uuid.uuid4().hex
secret_key = uuid.uuid4().hex
token = uuid.uuid4().hex
dummy_session = Mock()
dummy_session.access_key = access_key
dummy_session.secret_key = secret_key
dummy_session.token = token
del dummy_session.get_frozen_credentials
return dummy_session
async def test_aws_signer_async_consitent_url(self) -> None:
region = "us-west-2"
from opensearchpy import AsyncOpenSearch
from opensearchpy.connection.http_async import AsyncHttpConnection
from opensearchpy.helpers.asyncsigner import AWSV4SignerAsyncAuth
# Store URLs for comparison
signed_url = None
sent_url = None
doc_id = "doc_id:with!special*chars%3A"
quoted_doc_id = "doc_id%3Awith%21special*chars%253A"
url = f"https://search-domain.region.es.amazonaws.com:9200/index/_doc/{quoted_doc_id}"
# Create a mock signer class to capture the signed URL
class MockSigner(AWSV4SignerAsyncAuth):
def _sign_request(
self,
method: str,
url: str,
query_string: Optional[str] = None,
body: Optional[Union[str, bytes]] = None,
) -> Dict[str, str]:
nonlocal signed_url
signed_url = url
return {}
# Create a mock connection class to capture the sent URL
class MockConnection(AsyncHttpConnection):
async def perform_request(
self: "MockConnection",
method: str,
url: str,
params: Optional[Mapping[str, Any]] = None,
body: Optional[Any] = None,
timeout: Optional[Union[int, float]] = None,
ignore: Collection[int] = (),
headers: Optional[Mapping[str, str]] = None,
) -> Tuple[int, Mapping[str, str], str]:
nonlocal sent_url
sent_url = f"{self.host}{url}"
return 200, {}, "{}"
auth = MockSigner(self.mock_session(), region)
auth("GET", url)
client = AsyncOpenSearch(
hosts=[{"host": "search-domain.region.es.amazonaws.com"}],
http_auth=auth,
use_ssl=True,
verify_certs=True,
connection_class=MockConnection,
)
await client.index("index", {"test": "data"}, id=doc_id)
assert signed_url == sent_url, "URLs don't match"
+40 -1
View File
@@ -154,9 +154,48 @@ class TestQueryParams(TestCase):
class TestMakePath(TestCase): class TestMakePath(TestCase):
def test_handles_unicode(self) -> None: def test_handles_unicode(self) -> None:
from urllib.parse import quote
id = "中文" id = "中文"
self.assertEqual( self.assertEqual(
"/some-index/type/%E4%B8%AD%E6%96%87", _make_path("some-index", "type", id) _make_path("some-index", "type", quote(id)),
"/some-index/type/%25E4%25B8%25AD%25E6%2596%2587",
)
def test_handles_single_arg(self) -> None:
from urllib.parse import quote
id = "idwith!char"
self.assertEqual(
_make_path("some-index", "type", quote(id)),
"/some-index/type/idwith%2521char",
)
def test_handles_multiple_args(self) -> None:
from urllib.parse import quote
ids = ["id!with@char", "another#id$here"]
quoted_ids = [quote(id) for id in ids]
self.assertEqual(
_make_path("some-index", "type", quoted_ids),
"/some-index/type/id%2521with%2540char,another%2523id%2524here",
)
def test_handles_arrays_of_args(self) -> None:
self.assertEqual(
"/index1,index2/type1,type2/doc1,doc2",
_make_path(
("index1", "index2"), ["type1", "type2"], tuple(["doc1", "doc2"])
),
)
from urllib.parse import quote
ids = [quote("$id!1"), quote("id*@2"), quote("#id3#")]
self.assertEqual(
_make_path("some-index", ids, "type"),
"/some-index/%2524id%25211,id%252A%25402,%2523id3%2523/type",
) )
@@ -513,6 +513,65 @@ class TestRequestsHttpConnection(TestCase):
("GET", "http://localhost/?key1=value1&key2=value2", None), ("GET", "http://localhost/?key1=value1&key2=value2", None),
) )
def test_aws_signer_consitent_url(self) -> None:
region = "us-west-2"
from typing import Any, Collection, Mapping, Optional, Union
from opensearchpy import OpenSearch
from opensearchpy.helpers.signer import RequestsAWSV4SignerAuth
# Store URLs for comparison
signed_url = None
sent_url = None
doc_id = "doc_id:with!special*chars%3A"
quoted_doc_id = "doc_id%3Awith%21special*chars%253A"
url = f"https://search-domain.region.es.amazonaws.com:9200/index/_doc/{quoted_doc_id}"
# Create a mock signer class to capture the signed URL
class MockSigner(RequestsAWSV4SignerAuth):
def __call__(self, prepared_request): # type: ignore
nonlocal signed_url
if isinstance(prepared_request, str):
signed_url = prepared_request
else:
signed_url = prepared_request.url
return prepared_request
# Create a mock connection class to capture the sent URL
class MockConnection(RequestsHttpConnection):
def perform_request( # type: ignore
self,
method: str,
url: str,
params: Optional[Mapping[str, Any]] = None,
body: Optional[bytes] = None,
timeout: Optional[Union[int, float]] = None,
allow_redirects: Optional[bool] = True,
ignore: Collection[int] = (),
headers: Optional[Mapping[str, str]] = None,
) -> Any:
nonlocal sent_url
sent_url = f"{self.host}{url}"
return 200, {}, "{}"
auth = MockSigner(self.mock_session(), region)
client = OpenSearch(
hosts=[{"host": "search-domain.region.es.amazonaws.com"}],
http_auth=auth(url),
use_ssl=True,
verify_certs=True,
connection_class=MockConnection,
)
client.index("index", {"test": "data"}, id=doc_id)
self.assertEqual(
signed_url,
sent_url,
"URLs don't match",
)
class TestRequestsConnectionRedirect(TestCase): class TestRequestsConnectionRedirect(TestCase):
server1: TestHTTPServer server1: TestHTTPServer
@@ -25,10 +25,72 @@
# under the License. # under the License.
import pytest
from opensearchpy.exceptions import RequestError
from . import OpenSearchTestCase from . import OpenSearchTestCase
class TestSpecialCharacters(OpenSearchTestCase):
def test_index_with_slash(self) -> None:
index_name = "movies/shmovies"
with pytest.raises(RequestError) as e:
self.client.indices.create(index=index_name)
self.assertEqual(
str(e.value),
"RequestError(400, 'invalid_index_name_exception', 'Invalid index name [movies/shmovies], must not contain the following characters [ , \", *, \\\\, <, |, ,, >, /, ?]')",
)
class TestUnicode(OpenSearchTestCase): class TestUnicode(OpenSearchTestCase):
def test_indices_lifecycle_english(self) -> None:
index_name = "movies"
index_create_result = self.client.indices.create(index=index_name)
self.assertTrue(index_create_result["acknowledged"])
self.assertEqual(index_name, index_create_result["index"])
document = {"name": "Solaris", "director": "Andrei Tartakovsky", "year": "2011"}
id = "solaris@2011"
doc_insert_result = self.client.index(
index=index_name, body=document, id=id, refresh=True
)
self.assertEqual("created", doc_insert_result["result"])
self.assertEqual(index_name, doc_insert_result["_index"])
self.assertEqual(id, doc_insert_result["_id"])
doc_delete_result = self.client.delete(index=index_name, id=id)
self.assertEqual("deleted", doc_delete_result["result"])
self.assertEqual(index_name, doc_delete_result["_index"])
self.assertEqual(id, doc_delete_result["_id"])
index_delete_result = self.client.indices.delete(index=index_name)
self.assertTrue(index_delete_result["acknowledged"])
def test_indices_lifecycle_russian(self) -> None:
index_name = "кино"
index_create_result = self.client.indices.create(index=index_name)
self.assertTrue(index_create_result["acknowledged"])
self.assertEqual(index_name, index_create_result["index"])
document = {"название": "Солярис", "автор": "Андрей Тарковский", "год": "2011"}
id = "соларис@2011"
doc_insert_result = self.client.index(
index=index_name, body=document, id=id, refresh=True
)
self.assertEqual("created", doc_insert_result["result"])
self.assertEqual(index_name, doc_insert_result["_index"])
self.assertEqual(id, doc_insert_result["_id"])
doc_delete_result = self.client.delete(index=index_name, id=id)
self.assertEqual("deleted", doc_delete_result["result"])
self.assertEqual(index_name, doc_delete_result["_index"])
self.assertEqual(id, doc_delete_result["_id"])
index_delete_result = self.client.indices.delete(index=index_name)
self.assertTrue(index_delete_result["acknowledged"])
def test_indices_analyze(self) -> None: def test_indices_analyze(self) -> None:
self.client.indices.analyze(body='{"text": "привет"}') self.client.indices.analyze(body='{"text": "привет"}')