diff --git a/sdk/cosmos/azure-cosmos-ai/CHANGELOG.md b/sdk/cosmos/azure-cosmos-ai/CHANGELOG.md new file mode 100644 index 000000000000..938f5025b033 --- /dev/null +++ b/sdk/cosmos/azure-cosmos-ai/CHANGELOG.md @@ -0,0 +1,14 @@ +# Release History + +## 1.0.0b1 (Unreleased) + +### Features Added + +- Initial preview release of the `azure-cosmos-ai` package, a companion to `azure-cosmos`. +- Added `azure.cosmos.ai.AzureOpenAIEmbeddingProvider` (sync) and `azure.cosmos.ai.aio.AzureOpenAIEmbeddingProvider` (async): the default Azure OpenAI implementation of the `EmbeddingProvider` Protocol introduced in `azure-cosmos`. + +### Breaking Changes + +### Bugs Fixed + +### Other Changes diff --git a/sdk/cosmos/azure-cosmos-ai/LICENSE b/sdk/cosmos/azure-cosmos-ai/LICENSE new file mode 100644 index 000000000000..63447fd8bbbf --- /dev/null +++ b/sdk/cosmos/azure-cosmos-ai/LICENSE @@ -0,0 +1,21 @@ +Copyright (c) Microsoft Corporation. + +MIT License + +Permission is hereby granted, free of charge, to any person obtaining a copy +of this software and associated documentation files (the "Software"), to deal +in the Software without restriction, including without limitation the rights +to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +copies of the Software, and to permit persons to whom the Software is +furnished to do so, subject to the following conditions: + +The above copyright notice and this permission notice shall be included in all +copies or substantial portions of the Software. + +THE SOFTWARE IS PROVIDED *AS IS*, WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE +SOFTWARE. \ No newline at end of file diff --git a/sdk/cosmos/azure-cosmos-ai/MANIFEST.in b/sdk/cosmos/azure-cosmos-ai/MANIFEST.in new file mode 100644 index 000000000000..dfb0956250b6 --- /dev/null +++ b/sdk/cosmos/azure-cosmos-ai/MANIFEST.in @@ -0,0 +1,4 @@ +recursive-include tests *.py +include *.md +include LICENSE +include azure/cosmos/ai/py.typed diff --git a/sdk/cosmos/azure-cosmos-ai/README.md b/sdk/cosmos/azure-cosmos-ai/README.md new file mode 100644 index 000000000000..c854583dab3d --- /dev/null +++ b/sdk/cosmos/azure-cosmos-ai/README.md @@ -0,0 +1,83 @@ +# Azure Cosmos DB AI extensions for Python + +`azure-cosmos-ai` is a companion package to [`azure-cosmos`](https://pypi.org/project/azure-cosmos/) that provides AI-related extensions for the Azure Cosmos DB SDK. + +It ships the default Azure OpenAI implementation of the `EmbeddingProvider` Protocol introduced in `azure-cosmos` 4.16.0b3, used by the SDK to generate vector embeddings for `GenerateEmbeddings(...)` query expressions. + +## Getting started + +### Install the package + +```bash +pip install azure-cosmos-ai +``` + +### Prerequisites + +- Python 3.9 or later +- An Azure subscription +- An existing Azure Cosmos DB for NoSQL account +- An Azure OpenAI resource with an embeddings deployment (e.g. `text-embedding-3-small`) + +## Key concepts + +The provider stores **only the credential**. Endpoint, deployment name, and dimensions are read from the container's `vectorEmbeddingPolicy.embeddingSource` and forwarded to the provider by the Cosmos SDK at query time. This keeps the policy as the single source of truth. + +## Examples + +### API key (sync) + +```python +from azure.cosmos import CosmosClient +from azure.cosmos.ai import AzureOpenAIEmbeddingProvider + +provider = AzureOpenAIEmbeddingProvider(credential="") + +client = CosmosClient( + url="https://my-cosmos.documents.azure.com:443/", + credential="", + embedding_provider=provider, +) +``` + +### Entra — shared credential (recommended) + +Pass the same `TokenCredential` to `CosmosClient` (for Cosmos RBAC) and to the +provider (for Azure OpenAI). One identity covers both services. + +```python +from azure.cosmos.aio import CosmosClient +from azure.cosmos.ai.aio import AzureOpenAIEmbeddingProvider +from azure.identity.aio import DefaultAzureCredential + +async with DefaultAzureCredential() as cred: + async with AzureOpenAIEmbeddingProvider(credential=cred) as provider: + async with CosmosClient( + url="https://my-cosmos.documents.azure.com:443/", + credential=cred, + embedding_provider=provider, + ) as client: + ... +``` + +### Supported credential types + +| Type | Auth mode | +|------------------------------------------------|-----------| +| `str` | Azure OpenAI API key | +| `azure.core.credentials.AzureKeyCredential` | Azure OpenAI API key | +| `azure.core.credentials.TokenCredential` (sync) / `azure.core.credentials_async.AsyncTokenCredential` (async) | Entra (RBAC) | + +## Troubleshooting + +The provider deliberately does not wrap exceptions thrown by the underlying +[`openai`](https://pypi.org/project/openai/) client (e.g. `openai.BadRequestError`, +`openai.AuthenticationError`, `openai.RateLimitError`, `openai.APIConnectionError`). +Inputs that exceed the model's context length surface as `openai.BadRequestError` +with code `context_length_exceeded`. + +Retries are handled by the `openai` SDK; this provider adds no extra retry policy. + +## Contributing + +This project welcomes contributions and suggestions. See the [Azure SDK for Python contributing guide](https://github.com/Azure/azure-sdk-for-python/blob/main/CONTRIBUTING.md) for details. diff --git a/sdk/cosmos/azure-cosmos-ai/azure/__init__.py b/sdk/cosmos/azure-cosmos-ai/azure/__init__.py new file mode 100644 index 000000000000..d55ccad1f573 --- /dev/null +++ b/sdk/cosmos/azure-cosmos-ai/azure/__init__.py @@ -0,0 +1 @@ +__path__ = __import__("pkgutil").extend_path(__path__, __name__) # type: ignore diff --git a/sdk/cosmos/azure-cosmos-ai/azure/cosmos/__init__.py b/sdk/cosmos/azure-cosmos-ai/azure/cosmos/__init__.py new file mode 100644 index 000000000000..d55ccad1f573 --- /dev/null +++ b/sdk/cosmos/azure-cosmos-ai/azure/cosmos/__init__.py @@ -0,0 +1 @@ +__path__ = __import__("pkgutil").extend_path(__path__, __name__) # type: ignore diff --git a/sdk/cosmos/azure-cosmos-ai/azure/cosmos/ai/__init__.py b/sdk/cosmos/azure-cosmos-ai/azure/cosmos/ai/__init__.py new file mode 100644 index 000000000000..d4612a0e579c --- /dev/null +++ b/sdk/cosmos/azure-cosmos-ai/azure/cosmos/ai/__init__.py @@ -0,0 +1,27 @@ +# The MIT License (MIT) +# Copyright (c) 2023 Microsoft Corporation + +# Permission is hereby granted, free of charge, to any person obtaining a copy +# of this software and associated documentation files (the "Software"), to deal +# in the Software without restriction, including without limitation the rights +# to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +# copies of the Software, and to permit persons to whom the Software is +# furnished to do so, subject to the following conditions: + +# The above copyright notice and this permission notice shall be included in all +# copies or substantial portions of the Software. + +# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +# AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +# OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE +# SOFTWARE. + +from ._version import VERSION +from ._azure_openai_provider import AzureOpenAIEmbeddingProvider + + +__version__ = VERSION +__all__ = ["AzureOpenAIEmbeddingProvider"] diff --git a/sdk/cosmos/azure-cosmos-ai/azure/cosmos/ai/_azure_openai_provider.py b/sdk/cosmos/azure-cosmos-ai/azure/cosmos/ai/_azure_openai_provider.py new file mode 100644 index 000000000000..270f7ded5311 --- /dev/null +++ b/sdk/cosmos/azure-cosmos-ai/azure/cosmos/ai/_azure_openai_provider.py @@ -0,0 +1,198 @@ +# The MIT License (MIT) +# Copyright (c) 2023 Microsoft Corporation + +# Permission is hereby granted, free of charge, to any person obtaining a copy +# of this software and associated documentation files (the "Software"), to deal +# in the Software without restriction, including without limitation the rights +# to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +# copies of the Software, and to permit persons to whom the Software is +# furnished to do so, subject to the following conditions: + +# The above copyright notice and this permission notice shall be included in all +# copies or substantial portions of the Software. + +# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +# AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +# OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE +# SOFTWARE. + +"""Synchronous Azure OpenAI implementation of the EmbeddingProvider Protocol.""" + +import inspect +import time +from typing import Any, Dict, Mapping, Optional, Sequence, Union + +from azure.core.credentials import AzureKeyCredential, TokenCredential +from azure.cosmos import EmbeddingResult +from azure.identity import get_bearer_token_provider +from openai import AzureOpenAI + +_AZURE_OPENAI_API_VERSION = "2024-10-21" +_COGNITIVE_SERVICES_SCOPE = "https://cognitiveservices.azure.com/.default" + + +class AzureOpenAIEmbeddingProvider: + """Default Azure OpenAI implementation of the + :class:`azure.cosmos.EmbeddingProvider` Protocol. + + The provider only stores the credential. Endpoint, deployment name, and + dimensions are read from the container's ``vectorEmbeddingPolicy`` and + forwarded to :meth:`generate_embeddings` by the Cosmos SDK at query time. + + :param credential: One of: + + * ``str`` – Azure OpenAI API key. + * :class:`~azure.core.credentials.AzureKeyCredential` – Azure OpenAI API key. + * :class:`~azure.core.credentials.TokenCredential` – Entra (RBAC). Pass the + same credential you use with :class:`~azure.cosmos.CosmosClient` to share + one identity across both services. + :type credential: str or + ~azure.core.credentials.AzureKeyCredential or + ~azure.core.credentials.TokenCredential + :keyword str api_version: Azure OpenAI REST API version. Defaults to + ``"2024-10-21"`` (the GA version when this package shipped). Override to + access newer model features without waiting for a new release of this + package. + :keyword openai_client_kwargs: Additional keyword arguments forwarded + verbatim to :class:`openai.AzureOpenAI` (e.g. ``timeout``, ``max_retries``, + ``http_client``, ``default_headers``, ``user``). Keys that this provider + controls (``azure_endpoint``, ``api_version``, ``api_key``, + ``azure_ad_token_provider``) are not overridable through this mapping. + :paramtype openai_client_kwargs: ~typing.Mapping[str, ~typing.Any] or None + """ + + def __init__( + self, + credential: Union[str, AzureKeyCredential, TokenCredential], + *, + api_version: str = _AZURE_OPENAI_API_VERSION, + openai_client_kwargs: Optional[Mapping[str, Any]] = None, + ) -> None: + if isinstance(credential, (str, AzureKeyCredential)): + pass + elif _is_token_credential(credential): + pass + elif _is_async_token_credential(credential): + raise TypeError( + "Synchronous AzureOpenAIEmbeddingProvider received an async " + f"credential ({type(credential).__name__}). Either use " + "azure.cosmos.ai.aio.AzureOpenAIEmbeddingProvider instead, or " + "pass a synchronous TokenCredential such as " + "azure.identity.DefaultAzureCredential." + ) + else: + raise TypeError( + "credential must be a str, AzureKeyCredential, or synchronous " + f"TokenCredential; got {type(credential).__name__}" + ) + + self._credential = credential + self._api_version = api_version + self._openai_client_kwargs: Dict[str, Any] = dict(openai_client_kwargs or {}) + self._clients: Dict[str, AzureOpenAI] = {} + + def generate_embeddings( + self, + texts: Sequence[str], + *, + endpoint: str, + deployment_name: str, + dimensions: int, + **kwargs: Any, + ) -> EmbeddingResult: + """Generate embeddings for ``texts`` using Azure OpenAI. + + :param texts: Input strings. + :type texts: ~typing.Sequence[str] + :keyword str endpoint: Azure OpenAI endpoint + (from ``vectorEmbeddingPolicy.embeddingSource.endpoint``). + :keyword str deployment_name: Azure OpenAI deployment name + (from ``vectorEmbeddingPolicy.embeddingSource.deploymentName``). + :keyword int dimensions: Embedding dimensions + (from ``vectorEmbeddingPolicy.dimensions``). + :keyword Any kwargs: Reserved for forward compatibility with future + Cosmos SDK additions. Currently, no per-call kwargs are forwarded to + the underlying ``openai`` call; use ``openai_client_kwargs`` on the + constructor (e.g. ``timeout``, ``max_retries``) to configure the + underlying client. + :returns: Vectors in the same order as ``texts``, plus token usage and + measured latency. + :rtype: ~azure.cosmos.EmbeddingResult + """ + if not texts: + return EmbeddingResult(vectors=[], total_tokens=0, latency=None) + + client = self._get_or_create_client(endpoint) + start = time.perf_counter() + response = client.embeddings.create( + input=list(texts), + model=deployment_name, + dimensions=dimensions, + ) + latency = time.perf_counter() - start + total_tokens: Optional[int] = response.usage.total_tokens if response.usage else None + return EmbeddingResult( + vectors=[item.embedding for item in response.data], + total_tokens=total_tokens, + latency=latency, + ) + + def close(self) -> None: + """Close every cached underlying Azure OpenAI client and clear the cache.""" + clients = list(self._clients.values()) + self._clients.clear() + for client in clients: + try: + client.close() + except Exception: # pylint: disable=broad-except + pass + + def __enter__(self) -> "AzureOpenAIEmbeddingProvider": + return self + + def __exit__(self, *args: Any) -> None: + self.close() + + def _get_or_create_client(self, endpoint: str) -> AzureOpenAI: + key = endpoint.rstrip("/") + client = self._clients.get(key) + if client is None: + client = self._build_client(key) + self._clients[key] = client + return client + + def _build_client(self, endpoint: str) -> AzureOpenAI: + # User-supplied kwargs go first so our explicit args win on collision. + common: Dict[str, Any] = dict(self._openai_client_kwargs) + common.update(azure_endpoint=endpoint, api_version=self._api_version) + if isinstance(self._credential, str): + return AzureOpenAI(api_key=self._credential, **common) + if isinstance(self._credential, AzureKeyCredential): + return AzureOpenAI(api_key=self._credential.key, **common) + token_provider = get_bearer_token_provider(self._credential, _COGNITIVE_SERVICES_SCOPE) + return AzureOpenAI(azure_ad_token_provider=token_provider, **common) + + +def _is_token_credential(obj: Any) -> bool: + """Duck-type check for a *synchronous* TokenCredential. + + Accepts any object that exposes a non-coroutine, callable ``get_token``. + Async credentials (where ``get_token`` is a coroutine function) are rejected + so the mismatch is caught at ``__init__`` instead of failing deep inside + ``openai`` with a confusing ``coroutine`` error. + """ + get_token = getattr(obj, "get_token", None) + return callable(get_token) and not inspect.iscoroutinefunction(get_token) + + +def _is_async_token_credential(obj: Any) -> bool: + """Duck-type check for an *asynchronous* TokenCredential. + + Used only to produce an actionable error message when an async credential is + accidentally passed to the sync provider. + """ + get_token = getattr(obj, "get_token", None) + return callable(get_token) and inspect.iscoroutinefunction(get_token) diff --git a/sdk/cosmos/azure-cosmos-ai/azure/cosmos/ai/_version.py b/sdk/cosmos/azure-cosmos-ai/azure/cosmos/ai/_version.py new file mode 100644 index 000000000000..36cd1059c7bb --- /dev/null +++ b/sdk/cosmos/azure-cosmos-ai/azure/cosmos/ai/_version.py @@ -0,0 +1,22 @@ +# The MIT License (MIT) +# Copyright (c) 2023 Microsoft Corporation + +# Permission is hereby granted, free of charge, to any person obtaining a copy +# of this software and associated documentation files (the "Software"), to deal +# in the Software without restriction, including without limitation the rights +# to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +# copies of the Software, and to permit persons to whom the Software is +# furnished to do so, subject to the following conditions: + +# The above copyright notice and this permission notice shall be included in all +# copies or substantial portions of the Software. + +# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +# AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +# OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE +# SOFTWARE. + +VERSION = "1.0.0b1" diff --git a/sdk/cosmos/azure-cosmos-ai/azure/cosmos/ai/aio/__init__.py b/sdk/cosmos/azure-cosmos-ai/azure/cosmos/ai/aio/__init__.py new file mode 100644 index 000000000000..db33676aeb88 --- /dev/null +++ b/sdk/cosmos/azure-cosmos-ai/azure/cosmos/ai/aio/__init__.py @@ -0,0 +1,24 @@ +# The MIT License (MIT) +# Copyright (c) 2023 Microsoft Corporation + +# Permission is hereby granted, free of charge, to any person obtaining a copy +# of this software and associated documentation files (the "Software"), to deal +# in the Software without restriction, including without limitation the rights +# to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +# copies of the Software, and to permit persons to whom the Software is +# furnished to do so, subject to the following conditions: + +# The above copyright notice and this permission notice shall be included in all +# copies or substantial portions of the Software. + +# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +# AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +# OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE +# SOFTWARE. + +from ._azure_openai_provider import AzureOpenAIEmbeddingProvider + +__all__ = ["AzureOpenAIEmbeddingProvider"] diff --git a/sdk/cosmos/azure-cosmos-ai/azure/cosmos/ai/aio/_azure_openai_provider.py b/sdk/cosmos/azure-cosmos-ai/azure/cosmos/ai/aio/_azure_openai_provider.py new file mode 100644 index 000000000000..837f1824f8a4 --- /dev/null +++ b/sdk/cosmos/azure-cosmos-ai/azure/cosmos/ai/aio/_azure_openai_provider.py @@ -0,0 +1,219 @@ +# The MIT License (MIT) +# Copyright (c) 2023 Microsoft Corporation + +# Permission is hereby granted, free of charge, to any person obtaining a copy +# of this software and associated documentation files (the "Software"), to deal +# in the Software without restriction, including without limitation the rights +# to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +# copies of the Software, and to permit persons to whom the Software is +# furnished to do so, subject to the following conditions: + +# The above copyright notice and this permission notice shall be included in all +# copies or substantial portions of the Software. + +# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +# AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +# OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE +# SOFTWARE. + +"""Asynchronous Azure OpenAI implementation of the EmbeddingProvider Protocol.""" + +import asyncio +import inspect +import time +from typing import Any, Dict, Mapping, Optional, Sequence, Union + +from azure.core.credentials import AzureKeyCredential +from azure.core.credentials_async import AsyncTokenCredential +from azure.cosmos import EmbeddingResult +from azure.identity.aio import get_bearer_token_provider +from openai import AsyncAzureOpenAI + +_AZURE_OPENAI_API_VERSION = "2024-10-21" +_COGNITIVE_SERVICES_SCOPE = "https://cognitiveservices.azure.com/.default" + + +class AzureOpenAIEmbeddingProvider: + """Async default Azure OpenAI implementation of the + :class:`azure.cosmos.aio.EmbeddingProvider` Protocol. + + The provider only stores the credential. Endpoint, deployment name, and + dimensions are read from the container's ``vectorEmbeddingPolicy`` and + forwarded to :meth:`generate_embeddings` by the Cosmos SDK at query time. + + :param credential: One of: + + * ``str`` – Azure OpenAI API key. + * :class:`~azure.core.credentials.AzureKeyCredential` – Azure OpenAI API key. + * :class:`~azure.core.credentials_async.AsyncTokenCredential` – Entra (RBAC). + Pass the same credential you use with + :class:`~azure.cosmos.aio.CosmosClient` to share one identity across both + services. + :type credential: str or + ~azure.core.credentials.AzureKeyCredential or + ~azure.core.credentials_async.AsyncTokenCredential + :keyword str api_version: Azure OpenAI REST API version. Defaults to + ``"2024-10-21"`` (the GA version when this package shipped). Override to + access newer model features without waiting for a new release of this + package. + :keyword openai_client_kwargs: Additional keyword arguments forwarded + verbatim to :class:`openai.AsyncAzureOpenAI` (e.g. ``timeout``, + ``max_retries``, ``http_client``, ``default_headers``, ``user``). Keys + that this provider controls (``azure_endpoint``, ``api_version``, + ``api_key``, ``azure_ad_token_provider``) are not overridable through + this mapping. + :paramtype openai_client_kwargs: ~typing.Mapping[str, ~typing.Any] or None + """ + + def __init__( + self, + credential: Union[str, AzureKeyCredential, AsyncTokenCredential], + *, + api_version: str = _AZURE_OPENAI_API_VERSION, + openai_client_kwargs: Optional[Mapping[str, Any]] = None, + ) -> None: + if isinstance(credential, (str, AzureKeyCredential)): + pass + elif _is_async_token_credential(credential): + pass + elif _is_sync_token_credential(credential): + raise TypeError( + "Asynchronous AzureOpenAIEmbeddingProvider received a sync " + f"credential ({type(credential).__name__}). Either use " + "azure.cosmos.ai.AzureOpenAIEmbeddingProvider instead, or " + "pass an asynchronous TokenCredential such as " + "azure.identity.aio.DefaultAzureCredential." + ) + else: + raise TypeError( + "credential must be a str, AzureKeyCredential, or asynchronous " + f"TokenCredential; got {type(credential).__name__}" + ) + + self._credential = credential + self._api_version = api_version + self._openai_client_kwargs: Dict[str, Any] = dict(openai_client_kwargs or {}) + self._clients: Dict[str, AsyncAzureOpenAI] = {} + # Lazily created on first use so we don't require a running event loop at __init__. + self._clients_lock: Optional[asyncio.Lock] = None + + async def generate_embeddings( + self, + texts: Sequence[str], + *, + endpoint: str, + deployment_name: str, + dimensions: int, + **kwargs: Any, + ) -> EmbeddingResult: + """Generate embeddings for ``texts`` using Azure OpenAI. + + Safe to call concurrently from multiple coroutines. + + :param texts: Input strings. + :type texts: ~typing.Sequence[str] + :keyword str endpoint: Azure OpenAI endpoint + (from ``vectorEmbeddingPolicy.embeddingSource.endpoint``). + :keyword str deployment_name: Azure OpenAI deployment name + (from ``vectorEmbeddingPolicy.embeddingSource.deploymentName``). + :keyword int dimensions: Embedding dimensions + (from ``vectorEmbeddingPolicy.dimensions``). + :keyword Any kwargs: Reserved for forward compatibility with future + Cosmos SDK additions. Currently, no per-call kwargs are forwarded to + the underlying ``openai`` call; use ``openai_client_kwargs`` on the + constructor (e.g. ``timeout``, ``max_retries``) to configure the + underlying client. + :returns: Vectors in the same order as ``texts``, plus token usage and + measured latency. + :rtype: ~azure.cosmos.EmbeddingResult + """ + if not texts: + return EmbeddingResult(vectors=[], total_tokens=0, latency=None) + + client = await self._get_or_create_client(endpoint) + start = time.perf_counter() + response = await client.embeddings.create( + input=list(texts), + model=deployment_name, + dimensions=dimensions, + ) + latency = time.perf_counter() - start + total_tokens: Optional[int] = response.usage.total_tokens if response.usage else None + return EmbeddingResult( + vectors=[item.embedding for item in response.data], + total_tokens=total_tokens, + latency=latency, + ) + + async def close(self) -> None: + """Close every cached underlying Azure OpenAI client and clear the cache. + + Snapshots the cached clients and clears the dict *before* awaiting each + ``close()`` so that a concurrent :meth:`generate_embeddings` cannot + observe a half-closed client. + """ + clients = list(self._clients.values()) + self._clients.clear() + for client in clients: + try: + await client.close() + except Exception: # pylint: disable=broad-except + pass + + async def __aenter__(self) -> "AzureOpenAIEmbeddingProvider": + return self + + async def __aexit__(self, *args: Any) -> None: + await self.close() + + def _ensure_lock(self) -> asyncio.Lock: + if self._clients_lock is None: + self._clients_lock = asyncio.Lock() + return self._clients_lock + + async def _get_or_create_client(self, endpoint: str) -> AsyncAzureOpenAI: + key = endpoint.rstrip("/") + client = self._clients.get(key) + if client is not None: + return client + async with self._ensure_lock(): + client = self._clients.get(key) + if client is None: + client = self._build_client(key) + self._clients[key] = client + return client + + def _build_client(self, endpoint: str) -> AsyncAzureOpenAI: + # User-supplied kwargs go first so our explicit args win on collision. + common: Dict[str, Any] = dict(self._openai_client_kwargs) + common.update(azure_endpoint=endpoint, api_version=self._api_version) + if isinstance(self._credential, str): + return AsyncAzureOpenAI(api_key=self._credential, **common) + if isinstance(self._credential, AzureKeyCredential): + return AsyncAzureOpenAI(api_key=self._credential.key, **common) + token_provider = get_bearer_token_provider(self._credential, _COGNITIVE_SERVICES_SCOPE) + return AsyncAzureOpenAI(azure_ad_token_provider=token_provider, **common) + + +def _is_async_token_credential(obj: Any) -> bool: + """Duck-type check for an *asynchronous* TokenCredential. + + Accepts any object that exposes a coroutine ``get_token`` method. Sync + credentials are rejected so the mismatch is caught at ``__init__`` instead + of failing deep inside ``openai`` with a confusing error. + """ + get_token = getattr(obj, "get_token", None) + return callable(get_token) and inspect.iscoroutinefunction(get_token) + + +def _is_sync_token_credential(obj: Any) -> bool: + """Duck-type check for a *synchronous* TokenCredential. + + Used only to produce an actionable error message when a sync credential is + accidentally passed to the async provider. + """ + get_token = getattr(obj, "get_token", None) + return callable(get_token) and not inspect.iscoroutinefunction(get_token) diff --git a/sdk/cosmos/azure-cosmos-ai/azure/cosmos/ai/py.typed b/sdk/cosmos/azure-cosmos-ai/azure/cosmos/ai/py.typed new file mode 100644 index 000000000000..e69de29bb2d1 diff --git a/sdk/cosmos/azure-cosmos-ai/dev_requirements.txt b/sdk/cosmos/azure-cosmos-ai/dev_requirements.txt new file mode 100644 index 000000000000..fc31dc33fba7 --- /dev/null +++ b/sdk/cosmos/azure-cosmos-ai/dev_requirements.txt @@ -0,0 +1,5 @@ +../azure-cosmos +../../core/azure-core +../../identity/azure-identity +openai>=1.0.0 +-e ../../../eng/tools/azure-sdk-tools diff --git a/sdk/cosmos/azure-cosmos-ai/pyproject.toml b/sdk/cosmos/azure-cosmos-ai/pyproject.toml new file mode 100644 index 000000000000..52c35fbc61c2 --- /dev/null +++ b/sdk/cosmos/azure-cosmos-ai/pyproject.toml @@ -0,0 +1,7 @@ +[tool.azure-sdk-build] +mypy = true +pyright = false +pylint = true + +[tool.azure-sdk-conda] +in_bundle = false diff --git a/sdk/cosmos/azure-cosmos-ai/pytest.ini b/sdk/cosmos/azure-cosmos-ai/pytest.ini new file mode 100644 index 000000000000..c3dee16689a6 --- /dev/null +++ b/sdk/cosmos/azure-cosmos-ai/pytest.ini @@ -0,0 +1,9 @@ +[pytest] +# All tests under sdk/cosmos/ run through the shared cosmos-sdk-client pipeline +# template, which filters via `-m cosmosEmulator`. azure-cosmos-ai's unit tests +# are fully mocked and do not require the Cosmos emulator, but they still need +# to be collected by that filter — so we register the marker here (to silence +# pytest's unknown-marker warning) and apply it module-wide in each test file. +markers = + cosmosEmulator: marks tests as part of the cosmos service test umbrella (azure-cosmos-ai tests do not actually require the emulator). +asyncio_mode = auto diff --git a/sdk/cosmos/azure-cosmos-ai/sdk_packaging.toml b/sdk/cosmos/azure-cosmos-ai/sdk_packaging.toml new file mode 100644 index 000000000000..901bc8ccbfa6 --- /dev/null +++ b/sdk/cosmos/azure-cosmos-ai/sdk_packaging.toml @@ -0,0 +1,2 @@ +[packaging] +auto_update = false diff --git a/sdk/cosmos/azure-cosmos-ai/setup.py b/sdk/cosmos/azure-cosmos-ai/setup.py new file mode 100644 index 000000000000..704495d9ff58 --- /dev/null +++ b/sdk/cosmos/azure-cosmos-ai/setup.py @@ -0,0 +1,98 @@ +#!/usr/bin/env python + +# The MIT License (MIT) +# Copyright (c) 2023 Microsoft Corporation + +# Permission is hereby granted, free of charge, to any person obtaining a copy +# of this software and associated documentation files (the "Software"), to deal +# in the Software without restriction, including without limitation the rights +# to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +# copies of the Software, and to permit persons to whom the Software is +# furnished to do so, subject to the following conditions: + +# The above copyright notice and this permission notice shall be included in all +# copies or substantial portions of the Software. + +# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +# AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +# OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE +# SOFTWARE. + +import re +import os +from io import open +from setuptools import find_packages, setup + +# Change the PACKAGE_NAME only to change folder and different name +PACKAGE_NAME = "azure-cosmos-ai" +PACKAGE_PPRINT_NAME = "Cosmos AI" + +# a-b-c => a/b/c +PACKAGE_FOLDER_PATH = PACKAGE_NAME.replace("-", "/") +# a-b-c => a.b.c +NAMESPACE_NAME = PACKAGE_NAME.replace("-", ".") + +# Version extraction inspired from 'requests' +with open(os.path.join(PACKAGE_FOLDER_PATH, '_version.py'), 'r') as fd: + version = re.search(r'^VERSION\s*=\s*[\'"]([^\'"]*)[\'"]', + fd.read(), re.MULTILINE).group(1) + +if not version: + raise RuntimeError("Cannot find version information") + +with open("README.md", encoding="utf-8") as f: + readme = f.read() +with open("CHANGELOG.md", encoding="utf-8") as f: + changelog = f.read() + +exclude_packages = [ + "tests", + "azure", + "azure.cosmos", +] + +setup( + name=PACKAGE_NAME, + version=version, + include_package_data=True, + description="Microsoft Azure {} Extensions for Python".format(PACKAGE_PPRINT_NAME), + long_description=readme + "\n\n" + changelog, + long_description_content_type="text/markdown", + license="MIT License", + author="Microsoft Corporation", + author_email="askdocdb@microsoft.com", + maintainer="Microsoft", + maintainer_email="askdocdb@microsoft.com", + url="https://github.com/Azure/azure-sdk-for-python", + keywords="azure, azure sdk", + classifiers=[ + "Development Status :: 4 - Beta", + "Intended Audience :: Developers", + "Natural Language :: English", + "Operating System :: OS Independent", + "Programming Language :: Python", + "Programming Language :: Python :: 3 :: Only", + "Programming Language :: Python :: 3", + "Programming Language :: Python :: 3.9", + "Programming Language :: Python :: 3.10", + "Programming Language :: Python :: 3.11", + "Programming Language :: Python :: 3.12", + "Programming Language :: Python :: 3.13", + "License :: OSI Approved :: MIT License", + ], + zip_safe=False, + packages=find_packages(exclude=exclude_packages), + package_data={ + "azure.cosmos.ai": ["py.typed"], + }, + python_requires=">=3.9", + install_requires=[ + "azure-cosmos>=4.16.0b3", + "azure-core>=1.30.0", + "azure-identity>=1.19.0", + "openai>=1.0.0", + ], +) diff --git a/sdk/cosmos/azure-cosmos-ai/tests/conftest.py b/sdk/cosmos/azure-cosmos-ai/tests/conftest.py new file mode 100644 index 000000000000..6f24c99ce5f7 --- /dev/null +++ b/sdk/cosmos/azure-cosmos-ai/tests/conftest.py @@ -0,0 +1,44 @@ +# The MIT License (MIT) +# Copyright (c) 2023 Microsoft Corporation + +# Permission is hereby granted, free of charge, to any person obtaining a copy +# of this software and associated documentation files (the "Software"), to deal +# in the Software without restriction, including without limitation the rights +# to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +# copies of the Software, and to permit persons to whom the Software is +# furnished to do so, subject to the following conditions: + +# The above copyright notice and this permission notice shall be included in all +# copies or substantial portions of the Software. + +# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +# AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +# OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE +# SOFTWARE. + +"""Test fixtures for azure-cosmos-ai. + +Until ``azure-cosmos`` 4.16.0b3 (PR #46902) is released, ``EmbeddingResult`` +isn't available from ``azure.cosmos`` in our checkout. Inject a minimal stub +on import so the provider modules — which ``from azure.cosmos import +EmbeddingResult`` at module load — work in CI/local dev. Once the dependency +ships, this stub becomes a no-op (the real class wins). +""" + +from dataclasses import dataclass +from typing import List, Optional + +import azure.cosmos as _cosmos + +if not hasattr(_cosmos, "EmbeddingResult"): + + @dataclass + class EmbeddingResult: # pylint: disable=too-few-public-methods + vectors: List[List[float]] + total_tokens: Optional[int] = None + latency: Optional[float] = None + + _cosmos.EmbeddingResult = EmbeddingResult # type: ignore[attr-defined] diff --git a/sdk/cosmos/azure-cosmos-ai/tests/test_azure_openai_provider.py b/sdk/cosmos/azure-cosmos-ai/tests/test_azure_openai_provider.py new file mode 100644 index 000000000000..254d62f7e159 --- /dev/null +++ b/sdk/cosmos/azure-cosmos-ai/tests/test_azure_openai_provider.py @@ -0,0 +1,365 @@ +# The MIT License (MIT) +# Copyright (c) 2023 Microsoft Corporation + +# Permission is hereby granted, free of charge, to any person obtaining a copy +# of this software and associated documentation files (the "Software"), to deal +# in the Software without restriction, including without limitation the rights +# to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +# copies of the Software, and to permit persons to whom the Software is +# furnished to do so, subject to the following conditions: + +# The above copyright notice and this permission notice shall be included in all +# copies or substantial portions of the Software. + +# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +# AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +# OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE +# SOFTWARE. + +"""Tests for AzureOpenAIEmbeddingProvider (sync). + +This module exposes two test classes: + +* ``TestAzureOpenAIProvider`` runs fully mocked unit tests and is always + collected. +* ``TestAzureOpenAIProviderLive`` runs opt-in live tests against a real + Azure OpenAI resource. Set ``COSMOS_AI_LIVE_TESTS=1`` and provide + connection settings via environment variables to enable it: + + * ``AZURE_OPENAI_ENDPOINT`` required (e.g. ``https://.openai.azure.com/``) + * ``AZURE_OPENAI_EMBEDDING_DEPLOYMENT`` required + * ``AZURE_OPENAI_EMBEDDING_DIMENSIONS`` required (int) + * ``AZURE_OPENAI_API_KEY`` required for the API key tests +""" + +import os +from types import SimpleNamespace +from unittest.mock import MagicMock, patch + +import pytest +from azure.core.credentials import AccessToken, AzureKeyCredential +from azure.identity import DefaultAzureCredential + +from azure.cosmos.ai import AzureOpenAIEmbeddingProvider + + +ENDPOINT = "https://example.com/" +ENDPOINT_KEY = "https://example.com" +DEPLOYMENT = "text-embedding-3-small" +DIMENSIONS = 1536 + + +# Apply the shared cosmos CI test marker at module scope. The cosmos-sdk-client +# pipeline template filters via ``-m cosmosEmulator``; without this declaration +# every test in this module would be silently deselected in CI. +pytestmark = pytest.mark.cosmosEmulator + + +class _FakeTokenCredential: + def __init__(self): + self.calls = [] + + def get_token(self, *scopes, **kwargs): # pylint: disable=unused-argument + self.calls.append(scopes) + return AccessToken("fake-token", 9999999999) + + +class _FakeAsyncTokenCredential: + """Stand-in for an azure.identity.aio credential without the import cost.""" + + def __init__(self): + self.calls = [] + + async def get_token(self, *scopes, **kwargs): # pylint: disable=unused-argument + self.calls.append(scopes) + return AccessToken("fake-token", 9999999999) + + +def _fake_response(vectors, total_tokens=42): + return SimpleNamespace( + data=[SimpleNamespace(embedding=v) for v in vectors], + usage=SimpleNamespace(total_tokens=total_tokens) if total_tokens is not None else None, + ) + + +@pytest.fixture +def mock_aoai(): + """Patches AzureOpenAI inside the provider module.""" + with patch("azure.cosmos.ai._azure_openai_provider.AzureOpenAI") as cls: + instance = MagicMock(name="AzureOpenAIInstance") + cls.return_value = instance + instance.embeddings.create.return_value = _fake_response([[0.1, 0.2], [0.3, 0.4]]) + yield cls, instance + + +class TestAzureOpenAIProvider: + """Unit tests with a mocked underlying ``AzureOpenAI`` client.""" + + # ----- constructor / credential dispatch ----- + + def test_init_accepts_str(self, mock_aoai): # pylint: disable=unused-argument + AzureOpenAIEmbeddingProvider(credential="my-key") + + def test_init_accepts_azure_key_credential(self, mock_aoai): # pylint: disable=unused-argument + AzureOpenAIEmbeddingProvider(credential=AzureKeyCredential("my-key")) + + def test_init_accepts_token_credential(self, mock_aoai): # pylint: disable=unused-argument + AzureOpenAIEmbeddingProvider(credential=_FakeTokenCredential()) + + def test_init_rejects_unknown_credential(self): + with pytest.raises(TypeError): + AzureOpenAIEmbeddingProvider(credential=12345) # type: ignore[arg-type] + + def test_init_rejects_async_credential_with_actionable_message(self): + with pytest.raises(TypeError, match=r"(?i)async"): + AzureOpenAIEmbeddingProvider(credential=_FakeAsyncTokenCredential()) # type: ignore[arg-type] + + def test_init_accepts_api_version_override(self, mock_aoai): + cls, _ = mock_aoai + provider = AzureOpenAIEmbeddingProvider(credential="key", api_version="2024-12-01-preview") + provider.generate_embeddings( + ["x"], endpoint=ENDPOINT, deployment_name=DEPLOYMENT, dimensions=DIMENSIONS + ) + _, ctor_kwargs = cls.call_args + assert ctor_kwargs["api_version"] == "2024-12-01-preview" + + def test_init_accepts_openai_client_kwargs(self, mock_aoai): + cls, _ = mock_aoai + provider = AzureOpenAIEmbeddingProvider( + credential="key", + openai_client_kwargs={"timeout": 30.0, "default_headers": {"x-test": "1"}}, + ) + provider.generate_embeddings( + ["x"], endpoint=ENDPOINT, deployment_name=DEPLOYMENT, dimensions=DIMENSIONS + ) + _, ctor_kwargs = cls.call_args + assert ctor_kwargs["timeout"] == 30.0 + assert ctor_kwargs["default_headers"] == {"x-test": "1"} + # Explicit provider-controlled kwargs still win. + assert ctor_kwargs["azure_endpoint"] == ENDPOINT_KEY + assert ctor_kwargs["api_version"] == "2024-10-21" + + # ----- generate_embeddings ----- + + def test_generate_embeddings_forwards_params_and_returns_result(self, mock_aoai): + cls, instance = mock_aoai + instance.embeddings.create.return_value = _fake_response( + [[1.0, 2.0], [3.0, 4.0], [5.0, 6.0]], total_tokens=99 + ) + + provider = AzureOpenAIEmbeddingProvider(credential="key") + result = provider.generate_embeddings( + ["a", "b", "c"], + endpoint=ENDPOINT, + deployment_name=DEPLOYMENT, + dimensions=DIMENSIONS, + ) + + cls.assert_called_once() + _, ctor_kwargs = cls.call_args + assert ctor_kwargs["azure_endpoint"] == ENDPOINT_KEY + assert ctor_kwargs["api_key"] == "key" + assert ctor_kwargs["api_version"] == "2024-10-21" + + instance.embeddings.create.assert_called_once_with( + input=["a", "b", "c"], + model=DEPLOYMENT, + dimensions=DIMENSIONS, + ) + + assert result.vectors == [[1.0, 2.0], [3.0, 4.0], [5.0, 6.0]] + assert result.total_tokens == 99 + assert isinstance(result.latency, float) + assert result.latency >= 0.0 + + def test_generate_embeddings_missing_usage_returns_none(self, mock_aoai): + _, instance = mock_aoai + instance.embeddings.create.return_value = _fake_response([[1.0]], total_tokens=None) + + provider = AzureOpenAIEmbeddingProvider(credential="key") + result = provider.generate_embeddings( + ["a"], endpoint=ENDPOINT, deployment_name=DEPLOYMENT, dimensions=DIMENSIONS + ) + assert result.total_tokens is None + + def test_empty_texts_short_circuits(self, mock_aoai): + cls, instance = mock_aoai + provider = AzureOpenAIEmbeddingProvider(credential="key") + result = provider.generate_embeddings( + [], endpoint=ENDPOINT, deployment_name=DEPLOYMENT, dimensions=DIMENSIONS + ) + assert result.vectors == [] + assert result.total_tokens == 0 + assert result.latency is None + cls.assert_not_called() + instance.embeddings.create.assert_not_called() + + def test_exceptions_propagate(self, mock_aoai): + _, instance = mock_aoai + instance.embeddings.create.side_effect = RuntimeError("boom") + + provider = AzureOpenAIEmbeddingProvider(credential="key") + with pytest.raises(RuntimeError, match="boom"): + provider.generate_embeddings( + ["a"], endpoint=ENDPOINT, deployment_name=DEPLOYMENT, dimensions=DIMENSIONS + ) + + # ----- credential plumbing into AzureOpenAI ----- + + def test_azure_key_credential_passed_as_api_key(self, mock_aoai): + cls, _ = mock_aoai + provider = AzureOpenAIEmbeddingProvider(credential=AzureKeyCredential("aaa")) + provider.generate_embeddings( + ["x"], endpoint=ENDPOINT, deployment_name=DEPLOYMENT, dimensions=DIMENSIONS + ) + _, ctor_kwargs = cls.call_args + assert ctor_kwargs["api_key"] == "aaa" + + def test_token_credential_uses_bearer_token_provider(self, mock_aoai): + cls, _ = mock_aoai + cred = _FakeTokenCredential() + provider = AzureOpenAIEmbeddingProvider(credential=cred) + provider.generate_embeddings( + ["x"], endpoint=ENDPOINT, deployment_name=DEPLOYMENT, dimensions=DIMENSIONS + ) + _, ctor_kwargs = cls.call_args + assert "api_key" not in ctor_kwargs + token_provider = ctor_kwargs["azure_ad_token_provider"] + token = token_provider() + assert token == "fake-token" + assert cred.calls and cred.calls[0][0] == "https://cognitiveservices.azure.com/.default" + + # ----- close / context manager ----- + + def test_close_clears_cache(self, mock_aoai): + cls, instance = mock_aoai + provider = AzureOpenAIEmbeddingProvider(credential="key") + provider.generate_embeddings( + ["x"], endpoint=ENDPOINT, deployment_name=DEPLOYMENT, dimensions=DIMENSIONS + ) + provider.close() + instance.close.assert_called_once() + provider.generate_embeddings( + ["x"], endpoint=ENDPOINT, deployment_name=DEPLOYMENT, dimensions=DIMENSIONS + ) + assert cls.call_count == 2 + + def test_context_manager_closes(self, mock_aoai): + _, instance = mock_aoai + with AzureOpenAIEmbeddingProvider(credential="key") as provider: + provider.generate_embeddings( + ["x"], endpoint=ENDPOINT, deployment_name=DEPLOYMENT, dimensions=DIMENSIONS + ) + instance.close.assert_called_once() + + +# ----- live test config ----- + +_LIVE_ENABLED = os.getenv("COSMOS_AI_LIVE_TESTS") == "1" +_LIVE_ENDPOINT = os.getenv("AZURE_OPENAI_ENDPOINT", "") +_LIVE_DEPLOYMENT = os.getenv("AZURE_OPENAI_EMBEDDING_DEPLOYMENT", "") +_LIVE_DIMENSIONS = int(os.getenv("AZURE_OPENAI_EMBEDDING_DIMENSIONS") or "0") +_LIVE_API_KEY = os.getenv("AZURE_OPENAI_API_KEY", "") + +_LIVE_SKIP_REASON = ( + "Set COSMOS_AI_LIVE_TESTS=1, AZURE_OPENAI_ENDPOINT, " + "AZURE_OPENAI_EMBEDDING_DEPLOYMENT and AZURE_OPENAI_EMBEDDING_DIMENSIONS " + "to run live tests." +) + +_LIVE_TEXTS = ["healthcare research papers", "azure cosmos vector search"] + + +def _assert_valid_live_result(result, expected_count): + assert len(result.vectors) == expected_count + for vec in result.vectors: + assert isinstance(vec, list) + assert len(vec) == _LIVE_DIMENSIONS + assert all(isinstance(v, float) for v in vec) + assert result.total_tokens is None or result.total_tokens > 0 + assert isinstance(result.latency, float) + assert result.latency > 0.0 + + +@pytest.mark.skipif( + not (_LIVE_ENABLED and _LIVE_ENDPOINT and _LIVE_DEPLOYMENT and _LIVE_DIMENSIONS), + reason=_LIVE_SKIP_REASON, +) +class TestAzureOpenAIProviderLive: + """Live tests against a real Azure OpenAI resource. Opt-in.""" + + @pytest.mark.skipif(not _LIVE_API_KEY, reason="AZURE_OPENAI_API_KEY not set.") + def test_live_generate_embeddings_with_string_key(self): + with AzureOpenAIEmbeddingProvider(credential=_LIVE_API_KEY) as provider: + result = provider.generate_embeddings( + _LIVE_TEXTS, + endpoint=_LIVE_ENDPOINT, + deployment_name=_LIVE_DEPLOYMENT, + dimensions=_LIVE_DIMENSIONS, + ) + _assert_valid_live_result(result, len(_LIVE_TEXTS)) + + @pytest.mark.skipif(not _LIVE_API_KEY, reason="AZURE_OPENAI_API_KEY not set.") + def test_live_generate_embeddings_with_azure_key_credential(self): + with AzureOpenAIEmbeddingProvider(credential=AzureKeyCredential(_LIVE_API_KEY)) as provider: + result = provider.generate_embeddings( + _LIVE_TEXTS, + endpoint=_LIVE_ENDPOINT, + deployment_name=_LIVE_DEPLOYMENT, + dimensions=_LIVE_DIMENSIONS, + ) + _assert_valid_live_result(result, len(_LIVE_TEXTS)) + + def test_live_generate_embeddings_with_default_azure_credential(self): + try: + credential = DefaultAzureCredential() + except Exception as exc: # pylint: disable=broad-except + pytest.skip(f"DefaultAzureCredential unavailable: {exc}") + with AzureOpenAIEmbeddingProvider(credential=credential) as provider: + try: + result = provider.generate_embeddings( + _LIVE_TEXTS, + endpoint=_LIVE_ENDPOINT, + deployment_name=_LIVE_DEPLOYMENT, + dimensions=_LIVE_DIMENSIONS, + ) + except Exception as exc: # pylint: disable=broad-except + pytest.skip(f"Entra auth to Azure OpenAI failed (RBAC not granted?): {exc}") + _assert_valid_live_result(result, len(_LIVE_TEXTS)) + + def test_live_empty_texts_short_circuits_no_network(self): + with AzureOpenAIEmbeddingProvider(credential=_LIVE_API_KEY or "unused") as provider: + result = provider.generate_embeddings( + [], + endpoint=_LIVE_ENDPOINT, + deployment_name=_LIVE_DEPLOYMENT, + dimensions=_LIVE_DIMENSIONS, + ) + assert result.vectors == [] + assert result.total_tokens == 0 + assert result.latency is None + + @pytest.mark.skipif(not _LIVE_API_KEY, reason="AZURE_OPENAI_API_KEY not set.") + def test_live_underlying_client_is_cached_across_calls(self): + with AzureOpenAIEmbeddingProvider(credential=_LIVE_API_KEY) as provider: + provider.generate_embeddings( + _LIVE_TEXTS[:1], + endpoint=_LIVE_ENDPOINT, + deployment_name=_LIVE_DEPLOYMENT, + dimensions=_LIVE_DIMENSIONS, + ) + first_clients = dict(provider._clients) # pylint: disable=protected-access + assert len(first_clients) == 1 + provider.generate_embeddings( + _LIVE_TEXTS[:1], + endpoint=_LIVE_ENDPOINT, + deployment_name=_LIVE_DEPLOYMENT, + dimensions=_LIVE_DIMENSIONS, + ) + second_clients = dict(provider._clients) # pylint: disable=protected-access + assert first_clients.keys() == second_clients.keys() + for key in first_clients: + assert first_clients[key] is second_clients[key] diff --git a/sdk/cosmos/azure-cosmos-ai/tests/test_azure_openai_provider_async.py b/sdk/cosmos/azure-cosmos-ai/tests/test_azure_openai_provider_async.py new file mode 100644 index 000000000000..743550feef03 --- /dev/null +++ b/sdk/cosmos/azure-cosmos-ai/tests/test_azure_openai_provider_async.py @@ -0,0 +1,390 @@ +# The MIT License (MIT) +# Copyright (c) 2023 Microsoft Corporation + +# Permission is hereby granted, free of charge, to any person obtaining a copy +# of this software and associated documentation files (the "Software"), to deal +# in the Software without restriction, including without limitation the rights +# to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +# copies of the Software, and to permit persons to whom the Software is +# furnished to do so, subject to the following conditions: + +# The above copyright notice and this permission notice shall be included in all +# copies or substantial portions of the Software. + +# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +# AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +# OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE +# SOFTWARE. + +"""Tests for AzureOpenAIEmbeddingProvider (async). + +This module exposes two test classes: + +* ``TestAzureOpenAIProviderAsync`` runs fully mocked unit tests and is always + collected. +* ``TestAzureOpenAIProviderLiveAsync`` runs opt-in live tests against a real + Azure OpenAI resource. Set ``COSMOS_AI_LIVE_TESTS=1`` and provide + connection settings via environment variables to enable it: + + * ``AZURE_OPENAI_ENDPOINT`` required (e.g. ``https://.openai.azure.com/``) + * ``AZURE_OPENAI_EMBEDDING_DEPLOYMENT`` required + * ``AZURE_OPENAI_EMBEDDING_DIMENSIONS`` required (int) + * ``AZURE_OPENAI_API_KEY`` required for the API key tests +""" + +import os +from types import SimpleNamespace +from unittest.mock import AsyncMock, MagicMock, patch + +import pytest +from azure.core.credentials import AccessToken, AzureKeyCredential +from azure.identity.aio import DefaultAzureCredential + +from azure.cosmos.ai.aio import AzureOpenAIEmbeddingProvider + + +ENDPOINT = "https://example.com/" +ENDPOINT_KEY = "https://example.com" +DEPLOYMENT = "text-embedding-3-small" +DIMENSIONS = 1536 + + +# Apply the shared cosmos CI test marker at module scope. The cosmos-sdk-client +# pipeline template filters via ``-m cosmosEmulator``; without this declaration +# every test in this module would be silently deselected in CI. +pytestmark = pytest.mark.cosmosEmulator + + +class _FakeAsyncTokenCredential: + def __init__(self): + self.calls = [] + + async def get_token(self, *scopes, **kwargs): # pylint: disable=unused-argument + self.calls.append(scopes) + return AccessToken("fake-token", 9999999999) + + async def close(self): + pass + + +class _FakeSyncTokenCredential: + """Stand-in for an azure.identity (sync) credential. Used to verify the + async provider rejects sync credentials at __init__.""" + + def __init__(self): + self.calls = [] + + def get_token(self, *scopes, **kwargs): # pylint: disable=unused-argument + self.calls.append(scopes) + return AccessToken("fake-token", 9999999999) + + +def _fake_response(vectors, total_tokens=42): + return SimpleNamespace( + data=[SimpleNamespace(embedding=v) for v in vectors], + usage=SimpleNamespace(total_tokens=total_tokens) if total_tokens is not None else None, + ) + + +@pytest.fixture +def mock_aoai(): + """Patches AsyncAzureOpenAI inside the async provider module.""" + with patch("azure.cosmos.ai.aio._azure_openai_provider.AsyncAzureOpenAI") as cls: + instance = MagicMock(name="AsyncAzureOpenAIInstance") + instance.embeddings.create = AsyncMock(return_value=_fake_response([[0.1, 0.2]])) + instance.close = AsyncMock() + cls.return_value = instance + yield cls, instance + + +class TestAzureOpenAIProviderAsync: + """Unit tests with a mocked underlying ``AsyncAzureOpenAI`` client.""" + + # ----- constructor / credential dispatch ----- + + def test_init_accepts_str(self, mock_aoai): # pylint: disable=unused-argument + AzureOpenAIEmbeddingProvider(credential="my-key") + + def test_init_accepts_azure_key_credential(self, mock_aoai): # pylint: disable=unused-argument + AzureOpenAIEmbeddingProvider(credential=AzureKeyCredential("my-key")) + + def test_init_accepts_async_token_credential(self, mock_aoai): # pylint: disable=unused-argument + AzureOpenAIEmbeddingProvider(credential=_FakeAsyncTokenCredential()) + + def test_init_rejects_unknown_credential(self): + with pytest.raises(TypeError): + AzureOpenAIEmbeddingProvider(credential=12345) # type: ignore[arg-type] + + def test_init_rejects_sync_credential_with_actionable_message(self): + with pytest.raises(TypeError, match=r"(?i)sync"): + AzureOpenAIEmbeddingProvider(credential=_FakeSyncTokenCredential()) # type: ignore[arg-type] + + @pytest.mark.asyncio + async def test_init_accepts_api_version_override(self, mock_aoai): + cls, _ = mock_aoai + provider = AzureOpenAIEmbeddingProvider(credential="key", api_version="2024-12-01-preview") + await provider.generate_embeddings( + ["x"], endpoint=ENDPOINT, deployment_name=DEPLOYMENT, dimensions=DIMENSIONS + ) + _, ctor_kwargs = cls.call_args + assert ctor_kwargs["api_version"] == "2024-12-01-preview" + + @pytest.mark.asyncio + async def test_init_accepts_openai_client_kwargs(self, mock_aoai): + cls, _ = mock_aoai + provider = AzureOpenAIEmbeddingProvider( + credential="key", + openai_client_kwargs={"timeout": 30.0, "default_headers": {"x-test": "1"}}, + ) + await provider.generate_embeddings( + ["x"], endpoint=ENDPOINT, deployment_name=DEPLOYMENT, dimensions=DIMENSIONS + ) + _, ctor_kwargs = cls.call_args + assert ctor_kwargs["timeout"] == 30.0 + assert ctor_kwargs["default_headers"] == {"x-test": "1"} + # Explicit provider-controlled kwargs still win. + assert ctor_kwargs["azure_endpoint"] == ENDPOINT_KEY + assert ctor_kwargs["api_version"] == "2024-10-21" + + # ----- generate_embeddings ----- + + @pytest.mark.asyncio + async def test_generate_embeddings_forwards_params_and_returns_result(self, mock_aoai): + cls, instance = mock_aoai + instance.embeddings.create.return_value = _fake_response( + [[1.0, 2.0], [3.0, 4.0], [5.0, 6.0]], total_tokens=99 + ) + + provider = AzureOpenAIEmbeddingProvider(credential="key") + result = await provider.generate_embeddings( + ["a", "b", "c"], + endpoint=ENDPOINT, + deployment_name=DEPLOYMENT, + dimensions=DIMENSIONS, + ) + + cls.assert_called_once() + _, ctor_kwargs = cls.call_args + assert ctor_kwargs["azure_endpoint"] == ENDPOINT_KEY + assert ctor_kwargs["api_key"] == "key" + assert ctor_kwargs["api_version"] == "2024-10-21" + + instance.embeddings.create.assert_awaited_once_with( + input=["a", "b", "c"], + model=DEPLOYMENT, + dimensions=DIMENSIONS, + ) + + assert result.vectors == [[1.0, 2.0], [3.0, 4.0], [5.0, 6.0]] + assert result.total_tokens == 99 + assert isinstance(result.latency, float) + assert result.latency >= 0.0 + + @pytest.mark.asyncio + async def test_generate_embeddings_missing_usage_returns_none(self, mock_aoai): + _, instance = mock_aoai + instance.embeddings.create.return_value = _fake_response([[1.0]], total_tokens=None) + + provider = AzureOpenAIEmbeddingProvider(credential="key") + result = await provider.generate_embeddings( + ["a"], endpoint=ENDPOINT, deployment_name=DEPLOYMENT, dimensions=DIMENSIONS + ) + assert result.total_tokens is None + + @pytest.mark.asyncio + async def test_empty_texts_short_circuits(self, mock_aoai): + cls, instance = mock_aoai + provider = AzureOpenAIEmbeddingProvider(credential="key") + result = await provider.generate_embeddings( + [], endpoint=ENDPOINT, deployment_name=DEPLOYMENT, dimensions=DIMENSIONS + ) + assert result.vectors == [] + assert result.total_tokens == 0 + assert result.latency is None + cls.assert_not_called() + instance.embeddings.create.assert_not_called() + + @pytest.mark.asyncio + async def test_exceptions_propagate(self, mock_aoai): + _, instance = mock_aoai + instance.embeddings.create.side_effect = RuntimeError("boom") + + provider = AzureOpenAIEmbeddingProvider(credential="key") + with pytest.raises(RuntimeError, match="boom"): + await provider.generate_embeddings( + ["a"], endpoint=ENDPOINT, deployment_name=DEPLOYMENT, dimensions=DIMENSIONS + ) + + # ----- credential plumbing ----- + + @pytest.mark.asyncio + async def test_azure_key_credential_passed_as_api_key(self, mock_aoai): + cls, _ = mock_aoai + provider = AzureOpenAIEmbeddingProvider(credential=AzureKeyCredential("aaa")) + await provider.generate_embeddings( + ["x"], endpoint=ENDPOINT, deployment_name=DEPLOYMENT, dimensions=DIMENSIONS + ) + _, ctor_kwargs = cls.call_args + assert ctor_kwargs["api_key"] == "aaa" + + @pytest.mark.asyncio + async def test_async_token_credential_uses_bearer_token_provider(self, mock_aoai): + cls, _ = mock_aoai + cred = _FakeAsyncTokenCredential() + provider = AzureOpenAIEmbeddingProvider(credential=cred) + await provider.generate_embeddings( + ["x"], endpoint=ENDPOINT, deployment_name=DEPLOYMENT, dimensions=DIMENSIONS + ) + _, ctor_kwargs = cls.call_args + assert "api_key" not in ctor_kwargs + token_provider = ctor_kwargs["azure_ad_token_provider"] + token = await token_provider() + assert token == "fake-token" + assert cred.calls and cred.calls[0][0] == "https://cognitiveservices.azure.com/.default" + + # ----- close / async context manager ----- + + @pytest.mark.asyncio + async def test_close_clears_cache(self, mock_aoai): + cls, instance = mock_aoai + provider = AzureOpenAIEmbeddingProvider(credential="key") + await provider.generate_embeddings( + ["x"], endpoint=ENDPOINT, deployment_name=DEPLOYMENT, dimensions=DIMENSIONS + ) + await provider.close() + instance.close.assert_awaited_once() + await provider.generate_embeddings( + ["x"], endpoint=ENDPOINT, deployment_name=DEPLOYMENT, dimensions=DIMENSIONS + ) + assert cls.call_count == 2 + + @pytest.mark.asyncio + async def test_async_context_manager_closes(self, mock_aoai): + _, instance = mock_aoai + async with AzureOpenAIEmbeddingProvider(credential="key") as provider: + await provider.generate_embeddings( + ["x"], endpoint=ENDPOINT, deployment_name=DEPLOYMENT, dimensions=DIMENSIONS + ) + instance.close.assert_awaited_once() + + +# ----- live test config ----- + +_LIVE_ENABLED = os.getenv("COSMOS_AI_LIVE_TESTS") == "1" +_LIVE_ENDPOINT = os.getenv("AZURE_OPENAI_ENDPOINT", "") +_LIVE_DEPLOYMENT = os.getenv("AZURE_OPENAI_EMBEDDING_DEPLOYMENT", "") +_LIVE_DIMENSIONS = int(os.getenv("AZURE_OPENAI_EMBEDDING_DIMENSIONS") or "0") +_LIVE_API_KEY = os.getenv("AZURE_OPENAI_API_KEY", "") + +_LIVE_SKIP_REASON = ( + "Set COSMOS_AI_LIVE_TESTS=1, AZURE_OPENAI_ENDPOINT, " + "AZURE_OPENAI_EMBEDDING_DEPLOYMENT and AZURE_OPENAI_EMBEDDING_DIMENSIONS " + "to run live tests." +) + +_LIVE_TEXTS = ["healthcare research papers", "azure cosmos vector search"] + + +def _assert_valid_live_result(result, expected_count): + assert len(result.vectors) == expected_count + for vec in result.vectors: + assert isinstance(vec, list) + assert len(vec) == _LIVE_DIMENSIONS + assert all(isinstance(v, float) for v in vec) + assert result.total_tokens is None or result.total_tokens > 0 + assert isinstance(result.latency, float) + assert result.latency > 0.0 + + +@pytest.mark.skipif( + not (_LIVE_ENABLED and _LIVE_ENDPOINT and _LIVE_DEPLOYMENT and _LIVE_DIMENSIONS), + reason=_LIVE_SKIP_REASON, +) +class TestAzureOpenAIProviderLiveAsync: + """Live tests against a real Azure OpenAI resource. Opt-in.""" + + @pytest.mark.asyncio + @pytest.mark.skipif(not _LIVE_API_KEY, reason="AZURE_OPENAI_API_KEY not set.") + async def test_live_generate_embeddings_with_string_key(self): + async with AzureOpenAIEmbeddingProvider(credential=_LIVE_API_KEY) as provider: + result = await provider.generate_embeddings( + _LIVE_TEXTS, + endpoint=_LIVE_ENDPOINT, + deployment_name=_LIVE_DEPLOYMENT, + dimensions=_LIVE_DIMENSIONS, + ) + _assert_valid_live_result(result, len(_LIVE_TEXTS)) + + @pytest.mark.asyncio + @pytest.mark.skipif(not _LIVE_API_KEY, reason="AZURE_OPENAI_API_KEY not set.") + async def test_live_generate_embeddings_with_azure_key_credential(self): + async with AzureOpenAIEmbeddingProvider( + credential=AzureKeyCredential(_LIVE_API_KEY) + ) as provider: + result = await provider.generate_embeddings( + _LIVE_TEXTS, + endpoint=_LIVE_ENDPOINT, + deployment_name=_LIVE_DEPLOYMENT, + dimensions=_LIVE_DIMENSIONS, + ) + _assert_valid_live_result(result, len(_LIVE_TEXTS)) + + @pytest.mark.asyncio + async def test_live_generate_embeddings_with_default_azure_credential(self): + try: + credential = DefaultAzureCredential() + except Exception as exc: # pylint: disable=broad-except + pytest.skip(f"Async DefaultAzureCredential unavailable: {exc}") + try: + async with AzureOpenAIEmbeddingProvider(credential=credential) as provider: + try: + result = await provider.generate_embeddings( + _LIVE_TEXTS, + endpoint=_LIVE_ENDPOINT, + deployment_name=_LIVE_DEPLOYMENT, + dimensions=_LIVE_DIMENSIONS, + ) + except Exception as exc: # pylint: disable=broad-except + pytest.skip(f"Entra auth to Azure OpenAI failed (RBAC not granted?): {exc}") + _assert_valid_live_result(result, len(_LIVE_TEXTS)) + finally: + await credential.close() + + @pytest.mark.asyncio + async def test_live_empty_texts_short_circuits_no_network(self): + async with AzureOpenAIEmbeddingProvider(credential=_LIVE_API_KEY or "unused") as provider: + result = await provider.generate_embeddings( + [], + endpoint=_LIVE_ENDPOINT, + deployment_name=_LIVE_DEPLOYMENT, + dimensions=_LIVE_DIMENSIONS, + ) + assert result.vectors == [] + assert result.total_tokens == 0 + assert result.latency is None + + @pytest.mark.asyncio + @pytest.mark.skipif(not _LIVE_API_KEY, reason="AZURE_OPENAI_API_KEY not set.") + async def test_live_underlying_client_is_cached_across_calls(self): + async with AzureOpenAIEmbeddingProvider(credential=_LIVE_API_KEY) as provider: + await provider.generate_embeddings( + _LIVE_TEXTS[:1], + endpoint=_LIVE_ENDPOINT, + deployment_name=_LIVE_DEPLOYMENT, + dimensions=_LIVE_DIMENSIONS, + ) + first_clients = dict(provider._clients) # pylint: disable=protected-access + assert len(first_clients) == 1 + await provider.generate_embeddings( + _LIVE_TEXTS[:1], + endpoint=_LIVE_ENDPOINT, + deployment_name=_LIVE_DEPLOYMENT, + dimensions=_LIVE_DIMENSIONS, + ) + second_clients = dict(provider._clients) # pylint: disable=protected-access + assert first_clients.keys() == second_clients.keys() + for key in first_clients: + assert first_clients[key] is second_clients[key] diff --git a/sdk/cosmos/ci.yml b/sdk/cosmos/ci.yml index 9565a7aeb76e..6104ead306c7 100644 --- a/sdk/cosmos/ci.yml +++ b/sdk/cosmos/ci.yml @@ -31,5 +31,7 @@ extends: Artifacts: - name: azure-cosmos safeName: azurecosmos + - name: azure-cosmos-ai + safeName: azurecosmosai - name: azure-mgmt-cosmosdb safeName: azuremgmtcosmosdb