diff --git a/tidb-cloud/vector-search-auto-embedding-jina-ai.md b/tidb-cloud/vector-search-auto-embedding-jina-ai.md index 51d93a9e41bb4..c607dd445489f 100644 --- a/tidb-cloud/vector-search-auto-embedding-jina-ai.md +++ b/tidb-cloud/vector-search-auto-embedding-jina-ai.md @@ -51,7 +51,7 @@ SET @@GLOBAL.TIDB_EXP_EMBED_JINA_AI_API_KEY = 'your-jina-ai-api-key-here'; CREATE TABLE sample ( `id` INT, `content` TEXT, - `embedding` VECTOR(1024) GENERATED ALWAYS AS (EMBED_TEXT( + `embedding` VECTOR(2048) GENERATED ALWAYS AS (EMBED_TEXT( "jina_ai/jina-embeddings-v4", `content` )) STORED diff --git a/tidb-cloud/vector-search-auto-embedding-openai.md b/tidb-cloud/vector-search-auto-embedding-openai.md index 17afe2ab683f4..f545d4a38ff58 100644 --- a/tidb-cloud/vector-search-auto-embedding-openai.md +++ b/tidb-cloud/vector-search-auto-embedding-openai.md @@ -49,8 +49,8 @@ SET @@GLOBAL.TIDB_EXP_EMBED_OPENAI_API_KEY = 'your-openai-api-key-here'; CREATE TABLE sample ( `id` INT, `content` TEXT, - `embedding` VECTOR(1536) GENERATED ALWAYS AS (EMBED_TEXT( - "openai/text-embedding-3-small", + `embedding` VECTOR(3072) GENERATED ALWAYS AS (EMBED_TEXT( + "openai/text-embedding-3-large", `content` )) STORED ); @@ -85,6 +85,54 @@ Result: +------+----------------------------------------------------------------+ ``` +## Use Azure OpenAI + +To use OpenAI embedding models on Azure, set the global variable `TIDB_EXP_EMBED_OPENAI_API_BASE` to the URL of your Azure resource. For example: + +```sql +SET @@GLOBAL.TIDB_EXP_EMBED_OPENAI_API_KEY = 'your-openai-api-key-here'; +SET @@GLOBAL.TIDB_EXP_EMBED_OPENAI_API_BASE = 'https://.openai.azure.com/openai/v1'; + +CREATE TABLE sample ( + `id` INT, + `content` TEXT, + `embedding` VECTOR(3072) GENERATED ALWAYS AS (EMBED_TEXT( + "openai/text-embedding-3-large", + `content` + )) STORED +); + +INSERT INTO sample + (`id`, `content`) +VALUES + (1, "Java: Object-oriented language for cross-platform development."), + (2, "Java coffee: Bold Indonesian beans with low acidity."), + (3, "Java island: Densely populated, home to Jakarta."), + (4, "Java's syntax is used in Android apps."), + (5, "Dark roast Java beans enhance espresso blends."); + +SELECT `id`, `content` FROM sample +ORDER BY + VEC_EMBED_COSINE_DISTANCE( + embedding, + "How to start learning Java programming?" + ) +LIMIT 2; +``` + +Note that even if your resource URL appears as `https://.cognitiveservices.azure.com/`, you must use `https://.openai.azure.com/openai/v1` as the API base to ensure OpenAI-compatible request and response formats. + +To switch from Azure OpenAI to OpenAI directly, set `TIDB_EXP_EMBED_OPENAI_API_BASE` to an empty string: + +```sql +SET @@GLOBAL.TIDB_EXP_EMBED_OPENAI_API_BASE = ''; +``` + +> **Note:** +> +> - For security reasons, you can only set the API base to an Azure OpenAI URL or the OpenAI URL. Arbitrary base URLs are not allowed. +> - To use another OpenAI-compatible embedding service, contact [TiDB Cloud Support](/tidb-cloud/tidb-cloud-support.md). + ## Options All [OpenAI embedding options](https://platform.openai.com/docs/api-reference/embeddings/create) are supported via the `additional_json_options` parameter of the `EMBED_TEXT()` function.