From 93e78c69b6e96f44ebde5cfd03f9cc004c798bab Mon Sep 17 00:00:00 2001 From: Wish Date: Tue, 13 Jan 2026 22:16:03 +0800 Subject: [PATCH 1/5] Add OpenAI API Base support Signed-off-by: Wish --- .../vector-search-auto-embedding-jina-ai.md | 2 +- .../vector-search-auto-embedding-openai.md | 54 ++++++++++++++++++- 2 files changed, 53 insertions(+), 3 deletions(-) diff --git a/tidb-cloud/vector-search-auto-embedding-jina-ai.md b/tidb-cloud/vector-search-auto-embedding-jina-ai.md index 51d93a9e41bb4..c607dd445489f 100644 --- a/tidb-cloud/vector-search-auto-embedding-jina-ai.md +++ b/tidb-cloud/vector-search-auto-embedding-jina-ai.md @@ -51,7 +51,7 @@ SET @@GLOBAL.TIDB_EXP_EMBED_JINA_AI_API_KEY = 'your-jina-ai-api-key-here'; CREATE TABLE sample ( `id` INT, `content` TEXT, - `embedding` VECTOR(1024) GENERATED ALWAYS AS (EMBED_TEXT( + `embedding` VECTOR(2048) GENERATED ALWAYS AS (EMBED_TEXT( "jina_ai/jina-embeddings-v4", `content` )) STORED diff --git a/tidb-cloud/vector-search-auto-embedding-openai.md b/tidb-cloud/vector-search-auto-embedding-openai.md index 17afe2ab683f4..9f51610756144 100644 --- a/tidb-cloud/vector-search-auto-embedding-openai.md +++ b/tidb-cloud/vector-search-auto-embedding-openai.md @@ -49,8 +49,8 @@ SET @@GLOBAL.TIDB_EXP_EMBED_OPENAI_API_KEY = 'your-openai-api-key-here'; CREATE TABLE sample ( `id` INT, `content` TEXT, - `embedding` VECTOR(1536) GENERATED ALWAYS AS (EMBED_TEXT( - "openai/text-embedding-3-small", + `embedding` VECTOR(3072) GENERATED ALWAYS AS (EMBED_TEXT( + "openai/text-embedding-3-large", `content` )) STORED ); @@ -85,6 +85,56 @@ Result: +------+----------------------------------------------------------------+ ``` +## Azure OpenAI + +To use OpenAI embedding models on Azure, set global varable `TIDB_EXP_EMBED_OPENAI_API_BASE` to the URL of your Azure resource. Example: + +```sql +SET @@GLOBAL.TIDB_EXP_EMBED_OPENAI_API_KEY = 'your-openai-api-key-here'; +SET @@GLOBAL.TIDB_EXP_EMBED_OPENAI_API_BASE = 'https://your-resource-name.openai.azure.com/openai/v1'; + +CREATE TABLE sample ( + `id` INT, + `content` TEXT, + `embedding` VECTOR(3072) GENERATED ALWAYS AS (EMBED_TEXT( + "openai/text-embedding-3-large", + `content` + )) STORED +); + +INSERT INTO sample + (`id`, `content`) +VALUES + (1, "Java: Object-oriented language for cross-platform development."), + (2, "Java coffee: Bold Indonesian beans with low acidity."), + (3, "Java island: Densely populated, home to Jakarta."), + (4, "Java's syntax is used in Android apps."), + (5, "Dark roast Java beans enhance espresso blends."); + + +SELECT `id`, `content` FROM sample +ORDER BY + VEC_EMBED_COSINE_DISTANCE( + embedding, + "How to start learning Java programming?" + ) +LIMIT 2; +``` + +Note that, even if your resource URL looks like `https://.cognitiveservices.azure.com/`, you still still use `https://.openai.azure.com/openai/v1` as the API base, which provides OpenAI format compatibility. + +To switch from using Azure OpenAI to use OpenAI directly, set `TIDB_EXP_EMBED_OPENAI_API_BASE` to empty value: + +```sql +SET @@GLOBAL.TIDB_EXP_EMBED_OPENAI_API_BASE = ''; +``` + +> **Note:** +> +> For security reasons, currently we only allow setting API base to either Azure OpenAI URL or OpenAI URL. **Arbitrary base URL is forbidden.** +> +> If you want to use other OpenAI compatible embedding service, contact our support. We will evaluate and add the service provider to the allow list. + ## Options All [OpenAI embedding options](https://platform.openai.com/docs/api-reference/embeddings/create) are supported via the `additional_json_options` parameter of the `EMBED_TEXT()` function. From ac4c0639b27acead080521f1033f2bd6096f590c Mon Sep 17 00:00:00 2001 From: Wish Date: Tue, 13 Jan 2026 22:23:10 +0800 Subject: [PATCH 2/5] Apply suggestions from code review Co-authored-by: gemini-code-assist[bot] <176961590+gemini-code-assist[bot]@users.noreply.github.com> --- tidb-cloud/vector-search-auto-embedding-openai.md | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/tidb-cloud/vector-search-auto-embedding-openai.md b/tidb-cloud/vector-search-auto-embedding-openai.md index 9f51610756144..b9896458702d8 100644 --- a/tidb-cloud/vector-search-auto-embedding-openai.md +++ b/tidb-cloud/vector-search-auto-embedding-openai.md @@ -87,7 +87,7 @@ Result: ## Azure OpenAI -To use OpenAI embedding models on Azure, set global varable `TIDB_EXP_EMBED_OPENAI_API_BASE` to the URL of your Azure resource. Example: +To use OpenAI embedding models on Azure, set the global variable `TIDB_EXP_EMBED_OPENAI_API_BASE` to the URL of your Azure resource. Example: ```sql SET @@GLOBAL.TIDB_EXP_EMBED_OPENAI_API_KEY = 'your-openai-api-key-here'; @@ -121,9 +121,9 @@ ORDER BY LIMIT 2; ``` -Note that, even if your resource URL looks like `https://.cognitiveservices.azure.com/`, you still still use `https://.openai.azure.com/openai/v1` as the API base, which provides OpenAI format compatibility. +Note that, even if your resource URL looks like `https://.cognitiveservices.azure.com/`, you should still use `https://.openai.azure.com/openai/v1` as the API base, which provides OpenAI format compatibility. -To switch from using Azure OpenAI to use OpenAI directly, set `TIDB_EXP_EMBED_OPENAI_API_BASE` to empty value: +To switch from using Azure OpenAI to using OpenAI directly, set `TIDB_EXP_EMBED_OPENAI_API_BASE` to an empty string: ```sql SET @@GLOBAL.TIDB_EXP_EMBED_OPENAI_API_BASE = ''; From 0cfcadd95b587ae29cf529ef76ecf2e8dd728dc1 Mon Sep 17 00:00:00 2001 From: xixirangrang Date: Thu, 15 Jan 2026 10:03:37 +0800 Subject: [PATCH 3/5] Apply suggestions from code review --- tidb-cloud/vector-search-auto-embedding-openai.md | 9 ++++----- 1 file changed, 4 insertions(+), 5 deletions(-) diff --git a/tidb-cloud/vector-search-auto-embedding-openai.md b/tidb-cloud/vector-search-auto-embedding-openai.md index b9896458702d8..8589c8f2b971d 100644 --- a/tidb-cloud/vector-search-auto-embedding-openai.md +++ b/tidb-cloud/vector-search-auto-embedding-openai.md @@ -121,9 +121,9 @@ ORDER BY LIMIT 2; ``` -Note that, even if your resource URL looks like `https://.cognitiveservices.azure.com/`, you should still use `https://.openai.azure.com/openai/v1` as the API base, which provides OpenAI format compatibility. +Note that even if your resource URL appears as `https://.cognitiveservices.azure.com/`, you must use `https://.openai.azure.com/openai/v1` as the API base to ensure OpenAI-compatible formatting. -To switch from using Azure OpenAI to using OpenAI directly, set `TIDB_EXP_EMBED_OPENAI_API_BASE` to an empty string: +To switch from Azure OpenAI to OpenAI directly, set `TIDB_EXP_EMBED_OPENAI_API_BASE` to an empty string: ```sql SET @@GLOBAL.TIDB_EXP_EMBED_OPENAI_API_BASE = ''; @@ -131,9 +131,8 @@ SET @@GLOBAL.TIDB_EXP_EMBED_OPENAI_API_BASE = ''; > **Note:** > -> For security reasons, currently we only allow setting API base to either Azure OpenAI URL or OpenAI URL. **Arbitrary base URL is forbidden.** -> -> If you want to use other OpenAI compatible embedding service, contact our support. We will evaluate and add the service provider to the allow list. +> - For security reasons, currently the API base can only be set to an Azure OpenAI URL or the OpenAI URL. Arbitrary base URLs are not allowed. +> - To use another OpenAI-compatible embedding service, contact the [PingCAP support team](/tidb-cloud/tidb-cloud-support.md). ## Options From 248b99146c24e43413cb51532fd6bedb8f933f78 Mon Sep 17 00:00:00 2001 From: houfaxin Date: Thu, 15 Jan 2026 10:10:11 +0800 Subject: [PATCH 4/5] Update vector-search-auto-embedding-openai.md --- tidb-cloud/vector-search-auto-embedding-openai.md | 1 - 1 file changed, 1 deletion(-) diff --git a/tidb-cloud/vector-search-auto-embedding-openai.md b/tidb-cloud/vector-search-auto-embedding-openai.md index 8589c8f2b971d..a0b15c963e251 100644 --- a/tidb-cloud/vector-search-auto-embedding-openai.md +++ b/tidb-cloud/vector-search-auto-embedding-openai.md @@ -111,7 +111,6 @@ VALUES (4, "Java's syntax is used in Android apps."), (5, "Dark roast Java beans enhance espresso blends."); - SELECT `id`, `content` FROM sample ORDER BY VEC_EMBED_COSINE_DISTANCE( From 17696a4510f623ae35e3924888a1e84a57b3c062 Mon Sep 17 00:00:00 2001 From: Wish Date: Thu, 15 Jan 2026 17:10:22 +0800 Subject: [PATCH 5/5] Apply suggestions from code review Co-authored-by: Aolin --- tidb-cloud/vector-search-auto-embedding-openai.md | 12 ++++++------ 1 file changed, 6 insertions(+), 6 deletions(-) diff --git a/tidb-cloud/vector-search-auto-embedding-openai.md b/tidb-cloud/vector-search-auto-embedding-openai.md index a0b15c963e251..f545d4a38ff58 100644 --- a/tidb-cloud/vector-search-auto-embedding-openai.md +++ b/tidb-cloud/vector-search-auto-embedding-openai.md @@ -85,13 +85,13 @@ Result: +------+----------------------------------------------------------------+ ``` -## Azure OpenAI +## Use Azure OpenAI -To use OpenAI embedding models on Azure, set the global variable `TIDB_EXP_EMBED_OPENAI_API_BASE` to the URL of your Azure resource. Example: +To use OpenAI embedding models on Azure, set the global variable `TIDB_EXP_EMBED_OPENAI_API_BASE` to the URL of your Azure resource. For example: ```sql SET @@GLOBAL.TIDB_EXP_EMBED_OPENAI_API_KEY = 'your-openai-api-key-here'; -SET @@GLOBAL.TIDB_EXP_EMBED_OPENAI_API_BASE = 'https://your-resource-name.openai.azure.com/openai/v1'; +SET @@GLOBAL.TIDB_EXP_EMBED_OPENAI_API_BASE = 'https://.openai.azure.com/openai/v1'; CREATE TABLE sample ( `id` INT, @@ -120,7 +120,7 @@ ORDER BY LIMIT 2; ``` -Note that even if your resource URL appears as `https://.cognitiveservices.azure.com/`, you must use `https://.openai.azure.com/openai/v1` as the API base to ensure OpenAI-compatible formatting. +Note that even if your resource URL appears as `https://.cognitiveservices.azure.com/`, you must use `https://.openai.azure.com/openai/v1` as the API base to ensure OpenAI-compatible request and response formats. To switch from Azure OpenAI to OpenAI directly, set `TIDB_EXP_EMBED_OPENAI_API_BASE` to an empty string: @@ -130,8 +130,8 @@ SET @@GLOBAL.TIDB_EXP_EMBED_OPENAI_API_BASE = ''; > **Note:** > -> - For security reasons, currently the API base can only be set to an Azure OpenAI URL or the OpenAI URL. Arbitrary base URLs are not allowed. -> - To use another OpenAI-compatible embedding service, contact the [PingCAP support team](/tidb-cloud/tidb-cloud-support.md). +> - For security reasons, you can only set the API base to an Azure OpenAI URL or the OpenAI URL. Arbitrary base URLs are not allowed. +> - To use another OpenAI-compatible embedding service, contact [TiDB Cloud Support](/tidb-cloud/tidb-cloud-support.md). ## Options