From c2e08e8eafdf5785e524208dc56c1a94fb71ca8e Mon Sep 17 00:00:00 2001 From: Gary Date: Wed, 4 Feb 2026 23:29:05 +0800 Subject: [PATCH 1/9] variant schema template auto cast --- .../sql-data-types/semi-structured/VARIANT.md | 63 +++++++++++++++++++ .../sql-data-types/semi-structured/VARIANT.md | 63 +++++++++++++++++++ 2 files changed, 126 insertions(+) diff --git a/i18n/zh-CN/docusaurus-plugin-content-docs/version-4.x/sql-manual/basic-element/sql-data-types/semi-structured/VARIANT.md b/i18n/zh-CN/docusaurus-plugin-content-docs/version-4.x/sql-manual/basic-element/sql-data-types/semi-structured/VARIANT.md index eb65b641eb983..4bcaac8318220 100644 --- a/i18n/zh-CN/docusaurus-plugin-content-docs/version-4.x/sql-manual/basic-element/sql-data-types/semi-structured/VARIANT.md +++ b/i18n/zh-CN/docusaurus-plugin-content-docs/version-4.x/sql-manual/basic-element/sql-data-types/semi-structured/VARIANT.md @@ -388,6 +388,69 @@ SELECT * FROM tbl WHERE v['str'] MATCH 'Doris'; | `VARCHAR` | ✔ | ✔ | | `JSON` | ✔ | ✔ | +### 基于 Schema Template 自动 CAST + +当 VARIANT 列定义了 schema template 时,且 `enable_variant_schema_auto_cast` 设为 true 时,语义分析阶段会为命中 schema template 的子列自动插入对应类型的 CAST,无需自行手写。 + +- 覆盖 SELECT、WHERE、ORDER BY、GROUP BY、HAVING、JOIN KEY 或聚合参数等场景。 +- 若需关闭此行为,将 `enable_variant_schema_auto_cast` 设为 false。 + +示例: +```sql +CREATE TABLE t ( + id BIGINT, + data VARIANT<'num_*': BIGINT, 'str_*': STRING> +); + +-- 1) 过滤 + 排序 +SELECT id +FROM t +WHERE data['num_a'] > 10 +ORDER BY data['num_a']; + +-- 2) 分组 + 聚合 + Alias +SELECT data['str_name'] AS username, SUM(data['num_a']) AS total +FROM t +GROUP BY username +HAVING data['num_a'] > 100; + +-- 3) JOIN ON +SELECT * +FROM t1 JOIN t2 +ON t1.data['num_id'] = t2.data['num_id']; +``` + +**注意**:自动 CAST 功能无法感知给定的 Path 是否为叶子,它只是对所有符合 schema template 规则的 Path 都加对应的 CAST。 + +因此,对于下述这种情况需要额外注意,为保证结果正确,请设置 `enable_variant_schema_auto_cast` 设为 false,并手动添加 CAST。 + +```sql +-- Schema Template:所有 int_* 视为 INT +CREATE TABLE t ( + id INT, + data VARIANT<'int_*': INT> +); + +INSERT INTO t VALUES +(1, '{"int_1": 1, "int_nested": {"level1_num_1": 1011111, "level1_num_2": 102}}'); + +-- 自动 CAST 开启 +SET enable_variant_schema_auto_cast = true; + +-- int_nested 匹配 int_*,错误自动 CAST 为 INT,查询结果返回 NULL +SELECT + data['int_nested'] +FROM t; + +-- 自动 CAST 关闭 +SET enable_variant_schema_auto_cast = false; + +-- int_nested 匹配 int_*,查询结果正确返回 +SELECT + data['int_nested'] +FROM t; +``` + ## 限制 - `variant_max_subcolumns_count`:默认 0(不限制 Path 物化列数)。建议在生产设置为 2048(Tablet 级别)以控制列数。超过阈值后,低频/稀疏路径会被收敛到共享数据结构,从该结构查询可能带来性能下降(详见“配置”)。 diff --git a/versioned_docs/version-4.x/sql-manual/basic-element/sql-data-types/semi-structured/VARIANT.md b/versioned_docs/version-4.x/sql-manual/basic-element/sql-data-types/semi-structured/VARIANT.md index b5116669ea318..920b03fd8f9ac 100644 --- a/versioned_docs/version-4.x/sql-manual/basic-element/sql-data-types/semi-structured/VARIANT.md +++ b/versioned_docs/version-4.x/sql-manual/basic-element/sql-data-types/semi-structured/VARIANT.md @@ -388,6 +388,69 @@ SELECT * FROM tbl WHERE v['str'] MATCH 'Doris'; | `VARCHAR` | ✔ | ✔ | | `JSON` | ✔ | ✔ | +### Schema Template based auto CAST + +When a VARIANT column defines a Schema Template and `enable_variant_schema_auto_cast` is set to true, the analyzer automatically inserts CASTs to the declared types for subpaths that match the Schema Template, so you do not need to write CASTs manually. + +- Applies to SELECT, WHERE, ORDER BY, GROUP BY, HAVING, JOIN keys, and aggregate arguments. +- To disable this behavior, set `enable_variant_schema_auto_cast` to false. + +Example: +```sql +CREATE TABLE t ( + id BIGINT, + data VARIANT<'num_*': BIGINT, 'str_*': STRING> +); + +-- 1) Filter + order +SELECT id +FROM t +WHERE data['num_a'] > 10 +ORDER BY data['num_a']; + +-- 2) Group + aggregate + alias +SELECT data['str_name'] AS username, SUM(data['num_a']) AS total +FROM t +GROUP BY username +HAVING data['num_a'] > 100; + +-- 3) JOIN ON +SELECT * +FROM t1 JOIN t2 +ON t1.data['num_id'] = t2.data['num_id']; +``` + +**Note**: Auto CAST cannot determine whether a path is a leaf; it simply casts all paths that match the Schema Template. + +Therefore, in cases like the following, to ensure correct results, set `enable_variant_schema_auto_cast` to false and add CASTs manually. + +```sql +-- Schema Template: treat all int_* as INT +CREATE TABLE t ( + id INT, + data VARIANT<'int_*': INT> +); + +INSERT INTO t VALUES +(1, '{"int_1": 1, "int_nested": {"level1_num_1": 1011111, "level1_num_2": 102}}'); + +-- Auto CAST enabled +SET enable_variant_schema_auto_cast = true; + +-- int_nested matches int_*, is incorrectly CAST to INT, and the query returns NULL +SELECT + data['int_nested'] +FROM t; + +-- Auto CAST disabled +SET enable_variant_schema_auto_cast = false; + +-- int_nested matches int_*, and the query returns the correct result +SELECT + data['int_nested'] +FROM t; +``` + ## Limitations - `variant_max_subcolumns_count`: default 0 (no limit). In production, set to 2048 (tablet level) to control the number of materialized paths. Above the threshold, low-frequency/sparse paths are moved to a shared data structure; reading from it may be slower (see “Configuration”). From 8e242ceda6834fd9168e4c7eb3548f5f4a44be6a Mon Sep 17 00:00:00 2001 From: Gary Date: Thu, 5 Feb 2026 15:17:51 +0800 Subject: [PATCH 2/9] json path forms --- .../sql-data-types/semi-structured/VARIANT.md | 6 ++++++ .../sql-data-types/semi-structured/VARIANT.md | 8 ++++++-- 2 files changed, 12 insertions(+), 2 deletions(-) diff --git a/i18n/zh-CN/docusaurus-plugin-content-docs/version-4.x/sql-manual/basic-element/sql-data-types/semi-structured/VARIANT.md b/i18n/zh-CN/docusaurus-plugin-content-docs/version-4.x/sql-manual/basic-element/sql-data-types/semi-structured/VARIANT.md index 4bcaac8318220..e026246ee7f2f 100644 --- a/i18n/zh-CN/docusaurus-plugin-content-docs/version-4.x/sql-manual/basic-element/sql-data-types/semi-structured/VARIANT.md +++ b/i18n/zh-CN/docusaurus-plugin-content-docs/version-4.x/sql-manual/basic-element/sql-data-types/semi-structured/VARIANT.md @@ -57,6 +57,12 @@ FROM ${table_name} WHERE ARRAY_CONTAINS(CAST(v['tags'] AS ARRAY), 'Doris'); ``` +VARIANT 查询中, JSON Path 的表示有如下几种类型,除此之外的表示均为未定义行为: + +1. `v['properties']['title']` +2. `v['properties.title']` +3. `v.properties.title` + ## 基本类型 VARIANT 自动推断的子列基础类型包括: diff --git a/versioned_docs/version-4.x/sql-manual/basic-element/sql-data-types/semi-structured/VARIANT.md b/versioned_docs/version-4.x/sql-manual/basic-element/sql-data-types/semi-structured/VARIANT.md index 920b03fd8f9ac..a66ac2dcb2bfe 100644 --- a/versioned_docs/version-4.x/sql-manual/basic-element/sql-data-types/semi-structured/VARIANT.md +++ b/versioned_docs/version-4.x/sql-manual/basic-element/sql-data-types/semi-structured/VARIANT.md @@ -57,6 +57,12 @@ FROM ${table_name} WHERE ARRAY_CONTAINS(CAST(v['tags'] AS ARRAY), 'Doris'); ``` +In VARIANT queries, JSON Path can be expressed in the following forms; any other form is undefined: + +1. `v['properties']['title']` +2. `v['properties.title']` +3. `v.properties.title` + ## Primitive types VARIANT infers subcolumn types automatically. Supported types include: @@ -565,5 +571,3 @@ ClickBench (43 queries): - No. They are equivalent. 2. Why doesn’t my query/index work? - Check whether you CAST paths to the correct types; whether the type was promoted to JSONB due to conflicts; or whether you mistakenly expect an index on the whole VARIANT instead of on subpaths. - - From 02a9e104890a5e02607f33b21442a88e6ac045fc Mon Sep 17 00:00:00 2001 From: Gary Date: Thu, 5 Feb 2026 17:25:57 +0800 Subject: [PATCH 3/9] Wildcard syntax --- .../sql-data-types/semi-structured/VARIANT.md | 68 ++++++++++++++++++- .../sql-data-types/semi-structured/VARIANT.md | 66 +++++++++++++++++- 2 files changed, 131 insertions(+), 3 deletions(-) diff --git a/i18n/zh-CN/docusaurus-plugin-content-docs/version-4.x/sql-manual/basic-element/sql-data-types/semi-structured/VARIANT.md b/i18n/zh-CN/docusaurus-plugin-content-docs/version-4.x/sql-manual/basic-element/sql-data-types/semi-structured/VARIANT.md index e026246ee7f2f..c606d25d280c9 100644 --- a/i18n/zh-CN/docusaurus-plugin-content-docs/version-4.x/sql-manual/basic-element/sql-data-types/semi-structured/VARIANT.md +++ b/i18n/zh-CN/docusaurus-plugin-content-docs/version-4.x/sql-manual/basic-element/sql-data-types/semi-structured/VARIANT.md @@ -171,7 +171,7 @@ Schema 仅指导“存储层”的持久化类型,计算逻辑仍以实际数 SELECT variant_type(CAST('{"a" : "12345"}' AS VARIANT<'a' : INT>)['a']); ``` -通配符与匹配顺序: +### 通配符与匹配顺序 ```sql CREATE TABLE test_var_schema ( @@ -197,6 +197,70 @@ v1 VARIANT< 匹配成功的子路径默认会展开为独立列。若匹配子列过多导致列数暴增,建议开启 `variant_enable_typed_paths_to_sparse`(见“配置”)。 +### 通配符语法 + +Schema Template 模式匹配算法**只支持受限 glob 语法子集**。 + +#### 支持的 glob 语法 + +以下示例均为可匹配示例。 + +| 语法 | 含义 | 示例(模式 → JSON Path) | +|------|------|-------------------| +| `*` | 任意长度字符串 | `num_*` → `num_latency` | +| `?` | 任意单字符 | `a?b` → `acb` | +| `[abc]` | 字符类 | `a[bc]d` → `abd` | +| `[a-z]` | 字符范围 | `int_[0-9]` → `int_3` | +| `[!abc]` | 取反字符类 | `int_[!0-9]` → `int_a` | +| `[^abc]` | 取反字符类 | `int_[^0-9]` → `int_a` | +| `\` | 转义下一个字符 | `a\*b` → `a*b` | + +#### 转义规则 + +- `\*` 表示字面量 `*` +- `\?` 表示字面量 `?` +- `\[` 表示字面量 `[` +- 末尾孤立 `\` 会被视为字面量 `\` + +#### 不支持的语法 + +以下语法会被当成普通字符处理,或导致匹配失败,请尽可能避免: + +| 语法 | 在某些 glob 实现中的语义 | 当前行为 | +|------|--------------------------|----------| +| `{a,b}` | 花括号展开 | **不支持**(当作字面量 `{` `}`) | +| `**` | 递归目录匹配 | **不支持特殊语义**(等价于 `*` `*` 连用) | + +- 类似于 `[]`、`[!]`、`[^]`、`a[]b` 的空字符模式无效,不匹配任何 JSON Path +- 类似于 `int_[0-9` 的未闭合字符模式无效,不匹配任何 JSON Path + +#### 典型示例 + +1. 正常匹配 +- 模式:`num_*` + - √ `num_a` + - √ `num_1` + - × `number_a` + +- 模式:`a\*b` + - √ `a*b` + - × `axxb` + +- 模式:`int_[0-9]` + - √ `int_1` + - × `int_a` + +2. 全量匹配(不是“包含”的语义) +- 模式:`a*b` + - √ `ab` + - √ `axxxb` + - × `xxaxxxbxx` + +3. `.` 与 `/` 不特殊,为普通字符 +- 模式:`int_*` + - √ `int_nested.level1` + - √ `int_nested/level1` + ## 类型冲突与提升规则 当同一路径出现不兼容类型(如同一字段既出现整数又出现字符串)时,将提升为 JSONB 类型以避免信息丢失: @@ -570,4 +634,4 @@ DESCRIBE ${table_name} PARTITION ($partition_name); 1. VARIANT 中的 `null` 与 SQL `NULL` 有区别吗? - 没有区别,两者等价。 2. 为什么我的查询/索引没有生效? - - 请检查是否对路径做了正确的 CAST、是否因为类型冲突被提升为 JSONB、或是否误以为给 VARIANT“整体”建的索引可用于子列。 \ No newline at end of file + - 请检查是否对路径做了正确的 CAST、是否因为类型冲突被提升为 JSONB、或是否误以为给 VARIANT“整体”建的索引可用于子列。 diff --git a/versioned_docs/version-4.x/sql-manual/basic-element/sql-data-types/semi-structured/VARIANT.md b/versioned_docs/version-4.x/sql-manual/basic-element/sql-data-types/semi-structured/VARIANT.md index a66ac2dcb2bfe..9fffc1be2ffb9 100644 --- a/versioned_docs/version-4.x/sql-manual/basic-element/sql-data-types/semi-structured/VARIANT.md +++ b/versioned_docs/version-4.x/sql-manual/basic-element/sql-data-types/semi-structured/VARIANT.md @@ -171,7 +171,7 @@ Schema only guides the persisted storage type. During query execution, the effec SELECT variant_type(CAST('{"a" : "12345"}' AS VARIANT<'a' : INT>)['a']); ``` -Wildcard matching and order: +### Wildcard matching and order ```sql CREATE TABLE test_var_schema ( @@ -197,6 +197,70 @@ v1 VARIANT< Matched subpaths are materialized as columns by default. If too many paths match and generate excessive columns, consider enabling `variant_enable_typed_paths_to_sparse` (see “Configuration”). +### Wildcard syntax + +The Schema Template pattern-matching algorithm supports **only a restricted subset of glob syntax**. + +#### Supported glob syntax + +All examples below are matching examples. + +| Syntax | Meaning | Example (pattern → JSON Path) | +|------|---------|------------------------------| +| `*` | Any-length string | `num_*` → `num_latency` | +| `?` | Any single character | `a?b` → `acb` | +| `[abc]` | Character class | `a[bc]d` → `abd` | +| `[a-z]` | Character range | `int_[0-9]` → `int_3` | +| `[!abc]` | Negated character class | `int_[!0-9]` → `int_a` | +| `[^abc]` | Negated character class | `int_[^0-9]` → `int_a` | +| `\` | Escape the next character | `a\*b` → `a*b` | + +#### Escaping rules + +- `\*` is a literal `*` +- `\?` is a literal `?` +- `\[` is a literal `[` +- A trailing standalone `\` is treated as a literal `\` + +#### Unsupported syntax + +The following are treated as ordinary characters or cause matching to fail; avoid them whenever possible: + +| Syntax | Semantics in some glob implementations | Current behavior | +|------|----------------------------------------|------------------| +| `{a,b}` | Brace expansion | **Not supported** (treated as literal `{` `}`) | +| `**` | Recursive directory match | **No special semantics** (equivalent to `*` `*`) | + +- Empty character patterns like `[]`, `[!]`, `[^]`, and `a[]b` are invalid and match no JSON Path. +- Unterminated character patterns like `int_[0-9` are invalid and match no JSON Path. + +#### Typical examples + +1. Normal match +- Pattern: `num_*` + - √ `num_a` + - √ `num_1` + - × `number_a` + +- Pattern: `a\*b` + - √ `a*b` + - × `axxb` + +- Pattern: `int_[0-9]` + - √ `int_1` + - × `int_a` + +2. Full match (not “contains” semantics) +- Pattern: `a*b` + - √ `ab` + - √ `axxxb` + - × `xxaxxxbxx` + +3. `.` and `/` are not special; they are ordinary characters +- Pattern: `int_*` + - √ `int_nested.level1` + - √ `int_nested/level1` + ## Type conflicts and promotion rules When incompatible types appear on the same path (e.g., the same field shows up as both integer and string), the type is promoted to JSONB to avoid information loss: From 27e0ab135f3484f3f033db9ebf14d180ebc74540 Mon Sep 17 00:00:00 2001 From: Gary Date: Fri, 6 Feb 2026 10:36:03 +0800 Subject: [PATCH 4/9] fix --- .../basic-element/sql-data-types/semi-structured/VARIANT.md | 2 +- .../basic-element/sql-data-types/semi-structured/VARIANT.md | 6 +++--- 2 files changed, 4 insertions(+), 4 deletions(-) diff --git a/i18n/zh-CN/docusaurus-plugin-content-docs/version-4.x/sql-manual/basic-element/sql-data-types/semi-structured/VARIANT.md b/i18n/zh-CN/docusaurus-plugin-content-docs/version-4.x/sql-manual/basic-element/sql-data-types/semi-structured/VARIANT.md index c606d25d280c9..ba4d11dd12000 100644 --- a/i18n/zh-CN/docusaurus-plugin-content-docs/version-4.x/sql-manual/basic-element/sql-data-types/semi-structured/VARIANT.md +++ b/i18n/zh-CN/docusaurus-plugin-content-docs/version-4.x/sql-manual/basic-element/sql-data-types/semi-structured/VARIANT.md @@ -515,7 +515,7 @@ FROM t; -- 自动 CAST 关闭 SET enable_variant_schema_auto_cast = false; --- int_nested 匹配 int_*,查询结果正确返回 +-- 查询结果正确返回 SELECT data['int_nested'] FROM t; diff --git a/versioned_docs/version-4.x/sql-manual/basic-element/sql-data-types/semi-structured/VARIANT.md b/versioned_docs/version-4.x/sql-manual/basic-element/sql-data-types/semi-structured/VARIANT.md index 9fffc1be2ffb9..d070fb705a17b 100644 --- a/versioned_docs/version-4.x/sql-manual/basic-element/sql-data-types/semi-structured/VARIANT.md +++ b/versioned_docs/version-4.x/sql-manual/basic-element/sql-data-types/semi-structured/VARIANT.md @@ -472,13 +472,13 @@ CREATE TABLE t ( data VARIANT<'num_*': BIGINT, 'str_*': STRING> ); --- 1) Filter + order +-- 1) FILTER + ORDER SELECT id FROM t WHERE data['num_a'] > 10 ORDER BY data['num_a']; --- 2) Group + aggregate + alias +-- 2) GROUP + AGGREGATE + ALIAS SELECT data['str_name'] AS username, SUM(data['num_a']) AS total FROM t GROUP BY username @@ -515,7 +515,7 @@ FROM t; -- Auto CAST disabled SET enable_variant_schema_auto_cast = false; --- int_nested matches int_*, and the query returns the correct result +-- The query returns the correct result SELECT data['int_nested'] FROM t; From ddb18950cddd1069ea01b6defbfddb7050c68ad6 Mon Sep 17 00:00:00 2001 From: Gary Date: Fri, 6 Feb 2026 10:43:42 +0800 Subject: [PATCH 5/9] add dev doc --- .../sql-data-types/semi-structured/VARIANT.md | 135 +++++++++++++++++- .../sql-data-types/semi-structured/VARIANT.md | 135 +++++++++++++++++- 2 files changed, 268 insertions(+), 2 deletions(-) diff --git a/docs/sql-manual/basic-element/sql-data-types/semi-structured/VARIANT.md b/docs/sql-manual/basic-element/sql-data-types/semi-structured/VARIANT.md index 43752c3cdad9c..b7674d2947535 100644 --- a/docs/sql-manual/basic-element/sql-data-types/semi-structured/VARIANT.md +++ b/docs/sql-manual/basic-element/sql-data-types/semi-structured/VARIANT.md @@ -57,6 +57,12 @@ FROM ${table_name} WHERE ARRAY_CONTAINS(CAST(v['tags'] AS ARRAY), 'Doris'); ``` +In VARIANT queries, JSON Path can be expressed in the following forms; any other form is undefined: + +1. `v['properties']['title']` +2. `v['properties.title']` +3. `v.properties.title` + ## Primitive types VARIANT infers subcolumn types automatically. Supported types include: @@ -168,7 +174,7 @@ Schema only guides the persisted storage type. During query execution, the effec SELECT variant_type(CAST('{"a" : "12345"}' AS VARIANT<'a' : INT>)['a']); ``` -Wildcard matching and order: +### Wildcard matching and order ```sql CREATE TABLE test_var_schema ( @@ -194,6 +200,70 @@ v1 VARIANT< Matched subpaths are materialized as columns by default. If too many paths match and generate excessive columns, consider enabling `variant_enable_typed_paths_to_sparse` (see “Configuration”). +### Wildcard syntax + +The Schema Template pattern-matching algorithm supports **only a restricted subset of glob syntax**. + +#### Supported glob syntax + +All examples below are matching examples. + +| Syntax | Meaning | Example (pattern → JSON Path) | +|------|---------|------------------------------| +| `*` | Any-length string | `num_*` → `num_latency` | +| `?` | Any single character | `a?b` → `acb` | +| `[abc]` | Character class | `a[bc]d` → `abd` | +| `[a-z]` | Character range | `int_[0-9]` → `int_3` | +| `[!abc]` | Negated character class | `int_[!0-9]` → `int_a` | +| `[^abc]` | Negated character class | `int_[^0-9]` → `int_a` | +| `\` | Escape the next character | `a\*b` → `a*b` | + +#### Escaping rules + +- `\*` is a literal `*` +- `\?` is a literal `?` +- `\[` is a literal `[` +- A trailing standalone `\` is treated as a literal `\` + +#### Unsupported syntax + +The following are treated as ordinary characters or cause matching to fail; avoid them whenever possible: + +| Syntax | Semantics in some glob implementations | Current behavior | +|------|----------------------------------------|------------------| +| `{a,b}` | Brace expansion | **Not supported** (treated as literal `{` `}`) | +| `**` | Recursive directory match | **No special semantics** (equivalent to `*` `*`) | + +- Empty character patterns like `[]`, `[!]`, `[^]`, and `a[]b` are invalid and match no JSON Path. +- Unterminated character patterns like `int_[0-9` are invalid and match no JSON Path. + +#### Typical examples + +1. Normal match +- Pattern: `num_*` + - √ `num_a` + - √ `num_1` + - × `number_a` + +- Pattern: `a\*b` + - √ `a*b` + - × `axxb` + +- Pattern: `int_[0-9]` + - √ `int_1` + - × `int_a` + +2. Full match (not “contains” semantics) +- Pattern: `a*b` + - √ `ab` + - √ `axxxb` + - × `xxaxxxbxx` + +3. `.` and `/` are not special; they are ordinary characters +- Pattern: `int_*` + - √ `int_nested.level1` + - √ `int_nested/level1` + ## Type conflicts and promotion rules When incompatible types appear on the same path (e.g., the same field shows up as both integer and string), the type is promoted to JSONB to avoid information loss: @@ -391,6 +461,69 @@ SELECT * FROM tbl WHERE v['str'] MATCH 'Doris'; | `VARCHAR` | ✔ | ✔ | | `JSON` | ✔ | ✔ | +### Schema Template based auto CAST + +When a VARIANT column defines a Schema Template and `enable_variant_schema_auto_cast` is set to true, the analyzer automatically inserts CASTs to the declared types for subpaths that match the Schema Template, so you do not need to write CASTs manually. + +- Applies to SELECT, WHERE, ORDER BY, GROUP BY, HAVING, JOIN keys, and aggregate arguments. +- To disable this behavior, set `enable_variant_schema_auto_cast` to false. + +Example: +```sql +CREATE TABLE t ( + id BIGINT, + data VARIANT<'num_*': BIGINT, 'str_*': STRING> +); + +-- 1) FILTER + ORDER +SELECT id +FROM t +WHERE data['num_a'] > 10 +ORDER BY data['num_a']; + +-- 2) GROUP + AGGREGATE + ALIAS +SELECT data['str_name'] AS username, SUM(data['num_a']) AS total +FROM t +GROUP BY username +HAVING data['num_a'] > 100; + +-- 3) JOIN ON +SELECT * +FROM t1 JOIN t2 +ON t1.data['num_id'] = t2.data['num_id']; +``` + +**Note**: Auto CAST cannot determine whether a path is a leaf; it simply casts all paths that match the Schema Template. + +Therefore, in cases like the following, to ensure correct results, set `enable_variant_schema_auto_cast` to false and add CASTs manually. + +```sql +-- Schema Template: treat all int_* as INT +CREATE TABLE t ( + id INT, + data VARIANT<'int_*': INT> +); + +INSERT INTO t VALUES +(1, '{"int_1": 1, "int_nested": {"level1_num_1": 1011111, "level1_num_2": 102}}'); + +-- Auto CAST enabled +SET enable_variant_schema_auto_cast = true; + +-- int_nested matches int_*, is incorrectly CAST to INT, and the query returns NULL +SELECT + data['int_nested'] +FROM t; + +-- Auto CAST disabled +SET enable_variant_schema_auto_cast = false; + +-- The query returns the correct result +SELECT + data['int_nested'] +FROM t; +``` + ## Wide columns When ingested data contains many distinct JSON keys, VARIANT materialized subcolumns can grow rapidly; at scale this may cause metadata bloat, higher write/merge cost, and query slowdowns. To address “wide columns” (too many subcolumns), VARIANT provides two mechanisms: **Sparse columns** and **DOC encoding**. diff --git a/i18n/zh-CN/docusaurus-plugin-content-docs/current/sql-manual/basic-element/sql-data-types/semi-structured/VARIANT.md b/i18n/zh-CN/docusaurus-plugin-content-docs/current/sql-manual/basic-element/sql-data-types/semi-structured/VARIANT.md index a187bb63ae528..131f2548e436a 100644 --- a/i18n/zh-CN/docusaurus-plugin-content-docs/current/sql-manual/basic-element/sql-data-types/semi-structured/VARIANT.md +++ b/i18n/zh-CN/docusaurus-plugin-content-docs/current/sql-manual/basic-element/sql-data-types/semi-structured/VARIANT.md @@ -57,6 +57,12 @@ FROM ${table_name} WHERE ARRAY_CONTAINS(CAST(v['tags'] AS ARRAY), 'Doris'); ``` +VARIANT 查询中, JSON Path 的表示有如下几种类型,除此之外的表示均为未定义行为: + +1. `v['properties']['title']` +2. `v['properties.title']` +3. `v.properties.title` + ## 基本类型 VARIANT 自动推断的子列基础类型包括: @@ -168,7 +174,7 @@ Schema 仅指导“存储层”的持久化类型,计算逻辑仍以实际数 SELECT variant_type(CAST('{"a" : "12345"}' AS VARIANT<'a' : INT>)['a']); ``` -通配符与匹配顺序: +### 通配符与匹配顺序 ```sql CREATE TABLE test_var_schema ( @@ -194,6 +200,70 @@ v1 VARIANT< 匹配成功的子路径默认会展开为独立列。若匹配子列过多导致列数暴增,建议开启 `variant_enable_typed_paths_to_sparse`(见“配置”)。 +### 通配符语法 + +Schema Template 模式匹配算法**只支持受限 glob 语法子集**。 + +#### 支持的 glob 语法 + +以下示例均为可匹配示例。 + +| 语法 | 含义 | 示例(模式 → JSON Path) | +|------|------|-------------------| +| `*` | 任意长度字符串 | `num_*` → `num_latency` | +| `?` | 任意单字符 | `a?b` → `acb` | +| `[abc]` | 字符类 | `a[bc]d` → `abd` | +| `[a-z]` | 字符范围 | `int_[0-9]` → `int_3` | +| `[!abc]` | 取反字符类 | `int_[!0-9]` → `int_a` | +| `[^abc]` | 取反字符类 | `int_[^0-9]` → `int_a` | +| `\` | 转义下一个字符 | `a\*b` → `a*b` | + +#### 转义规则 + +- `\*` 表示字面量 `*` +- `\?` 表示字面量 `?` +- `\[` 表示字面量 `[` +- 末尾孤立 `\` 会被视为字面量 `\` + +#### 不支持的语法 + +以下语法会被当成普通字符处理,或导致匹配失败,请尽可能避免: + +| 语法 | 在某些 glob 实现中的语义 | 当前行为 | +|------|--------------------------|----------| +| `{a,b}` | 花括号展开 | **不支持**(当作字面量 `{` `}`) | +| `**` | 递归目录匹配 | **不支持特殊语义**(等价于 `*` `*` 连用) | + +- 类似于 `[]`、`[!]`、`[^]`、`a[]b` 的空字符模式无效,不匹配任何 JSON Path +- 类似于 `int_[0-9` 的未闭合字符模式无效,不匹配任何 JSON Path + +#### 典型示例 + +1. 正常匹配 +- 模式:`num_*` + - √ `num_a` + - √ `num_1` + - × `number_a` + +- 模式:`a\*b` + - √ `a*b` + - × `axxb` + +- 模式:`int_[0-9]` + - √ `int_1` + - × `int_a` + +2. 全量匹配(不是“包含”的语义) +- 模式:`a*b` + - √ `ab` + - √ `axxxb` + - × `xxaxxxbxx` + +3. `.` 与 `/` 不特殊,为普通字符 +- 模式:`int_*` + - √ `int_nested.level1` + - √ `int_nested/level1` + ## 类型冲突与提升规则 当同一路径出现不兼容类型(如同一字段既出现整数又出现字符串)时,将提升为 JSONB 类型以避免信息丢失: @@ -391,6 +461,69 @@ SELECT * FROM tbl WHERE v['str'] MATCH 'Doris'; | `VARCHAR` | ✔ | ✔ | | `JSON` | ✔ | ✔ | +### 基于 Schema Template 自动 CAST + +当 VARIANT 列定义了 schema template 时,且 `enable_variant_schema_auto_cast` 设为 true 时,语义分析阶段会为命中 schema template 的子列自动插入对应类型的 CAST,无需自行手写。 + +- 覆盖 SELECT、WHERE、ORDER BY、GROUP BY、HAVING、JOIN KEY 或聚合参数等场景。 +- 若需关闭此行为,将 `enable_variant_schema_auto_cast` 设为 false。 + +示例: +```sql +CREATE TABLE t ( + id BIGINT, + data VARIANT<'num_*': BIGINT, 'str_*': STRING> +); + +-- 1) 过滤 + 排序 +SELECT id +FROM t +WHERE data['num_a'] > 10 +ORDER BY data['num_a']; + +-- 2) 分组 + 聚合 + Alias +SELECT data['str_name'] AS username, SUM(data['num_a']) AS total +FROM t +GROUP BY username +HAVING data['num_a'] > 100; + +-- 3) JOIN ON +SELECT * +FROM t1 JOIN t2 +ON t1.data['num_id'] = t2.data['num_id']; +``` + +**注意**:自动 CAST 功能无法感知给定的 Path 是否为叶子,它只是对所有符合 schema template 规则的 Path 都加对应的 CAST。 + +因此,对于下述这种情况需要额外注意,为保证结果正确,请设置 `enable_variant_schema_auto_cast` 设为 false,并手动添加 CAST。 + +```sql +-- Schema Template:所有 int_* 视为 INT +CREATE TABLE t ( + id INT, + data VARIANT<'int_*': INT> +); + +INSERT INTO t VALUES +(1, '{"int_1": 1, "int_nested": {"level1_num_1": 1011111, "level1_num_2": 102}}'); + +-- 自动 CAST 开启 +SET enable_variant_schema_auto_cast = true; + +-- int_nested 匹配 int_*,错误自动 CAST 为 INT,查询结果返回 NULL +SELECT + data['int_nested'] +FROM t; + +-- 自动 CAST 关闭 +SET enable_variant_schema_auto_cast = false; + +-- 查询结果正确返回 +SELECT + data['int_nested'] +FROM t; +``` + ## 宽列 当导入数据包含大量不同的 JSON key 时,VARIANT 的子列会迅速增多;当规模达到一定程度,可能出现元数据膨胀、写入/合并开销增大、查询性能下降等问题。为应对“宽列”(子列过多),VARIANT 提供两种机制:**稀疏列** 与 **DOC 编码**。 From 196d72436bc2b1d5a33f9b0c0eb9484ff670fdb3 Mon Sep 17 00:00:00 2001 From: Gary Date: Fri, 6 Feb 2026 10:47:13 +0800 Subject: [PATCH 6/9] space --- .../basic-element/sql-data-types/semi-structured/VARIANT.md | 2 ++ 1 file changed, 2 insertions(+) diff --git a/versioned_docs/version-4.x/sql-manual/basic-element/sql-data-types/semi-structured/VARIANT.md b/versioned_docs/version-4.x/sql-manual/basic-element/sql-data-types/semi-structured/VARIANT.md index d070fb705a17b..e89c0941a6555 100644 --- a/versioned_docs/version-4.x/sql-manual/basic-element/sql-data-types/semi-structured/VARIANT.md +++ b/versioned_docs/version-4.x/sql-manual/basic-element/sql-data-types/semi-structured/VARIANT.md @@ -635,3 +635,5 @@ ClickBench (43 queries): - No. They are equivalent. 2. Why doesn’t my query/index work? - Check whether you CAST paths to the correct types; whether the type was promoted to JSONB due to conflicts; or whether you mistakenly expect an index on the whole VARIANT instead of on subpaths. + + From 50633d4b3ba8e90d4c380eab2539491fdee966ad Mon Sep 17 00:00:00 2001 From: Gary Date: Fri, 6 Feb 2026 16:02:29 +0800 Subject: [PATCH 7/9] add backslashes explain --- .../sql-data-types/semi-structured/VARIANT.md | 32 +++++++++++++------ .../sql-data-types/semi-structured/VARIANT.md | 31 ++++++++++++------ .../sql-data-types/semi-structured/VARIANT.md | 31 ++++++++++++------ .../sql-data-types/semi-structured/VARIANT.md | 32 +++++++++++++------ 4 files changed, 88 insertions(+), 38 deletions(-) diff --git a/docs/sql-manual/basic-element/sql-data-types/semi-structured/VARIANT.md b/docs/sql-manual/basic-element/sql-data-types/semi-structured/VARIANT.md index b7674d2947535..00797c5120ab1 100644 --- a/docs/sql-manual/basic-element/sql-data-types/semi-structured/VARIANT.md +++ b/docs/sql-manual/basic-element/sql-data-types/semi-structured/VARIANT.md @@ -206,17 +206,19 @@ The Schema Template pattern-matching algorithm supports **only a restricted subs #### Supported glob syntax +In SQL strings, we write `\\\\` to express a literal `\` in glob patterns. + All examples below are matching examples. -| Syntax | Meaning | Example (pattern → JSON Path) | -|------|---------|------------------------------| -| `*` | Any-length string | `num_*` → `num_latency` | -| `?` | Any single character | `a?b` → `acb` | -| `[abc]` | Character class | `a[bc]d` → `abd` | -| `[a-z]` | Character range | `int_[0-9]` → `int_3` | -| `[!abc]` | Negated character class | `int_[!0-9]` → `int_a` | -| `[^abc]` | Negated character class | `int_[^0-9]` → `int_a` | -| `\` | Escape the next character | `a\*b` → `a*b` | +| Syntax | Meaning | Example (pattern → JSON Path) | SQL literal | +|------|---------|------------------------------|-------------| +| `*` | Any-length string | `num_*` → `num_latency` | `'num_*'` | +| `?` | Any single character | `a?b` → `acb` | `'a?b'` | +| `[abc]` | Character class | `a[bc]d` → `abd` | `'a[bc]d'` | +| `[a-z]` | Character range | `int_[0-9]` → `int_3` | `'int_[0-9]'` | +| `[!abc]` | Negated character class | `int_[!0-9]` → `int_a` | `'int_[!0-9]'` | +| `[^abc]` | Negated character class | `int_[^0-9]` → `int_a` | `'int_[^0-9]'` | +| `\` | Escape the next character | `a\*b` → `a*b`
`a\?b` → `a?b`
`a\[b` → `a[b`
`\` → `\` | `'a\\\\*b'`
`'a\\\\?b'`
`'a\\\\[b'`
`'\\\\'` | #### Escaping rules @@ -246,9 +248,20 @@ The following are treated as ordinary characters or cause matching to fail; avoi - × `number_a` - Pattern: `a\*b` + - SQL: `'a\\\\*b'` - √ `a*b` - × `axxb` +- Pattern: `\*` + - SQL: `'\\\\*'` + - √ `*` + - × `a*` + +- Pattern: `\` + - SQL: `'\\\\'` + - √ `\` + - × `\\` + - Pattern: `int_[0-9]` - √ `int_1` - × `int_a` @@ -710,4 +723,3 @@ ClickBench (43 queries): 2. Why doesn’t my query/index work? - Check whether you CAST paths to the correct types; whether the type was promoted to JSONB due to conflicts; or whether you mistakenly expect an index on the whole VARIANT instead of on subpaths. - diff --git a/i18n/zh-CN/docusaurus-plugin-content-docs/current/sql-manual/basic-element/sql-data-types/semi-structured/VARIANT.md b/i18n/zh-CN/docusaurus-plugin-content-docs/current/sql-manual/basic-element/sql-data-types/semi-structured/VARIANT.md index 131f2548e436a..6011499e82f90 100644 --- a/i18n/zh-CN/docusaurus-plugin-content-docs/current/sql-manual/basic-element/sql-data-types/semi-structured/VARIANT.md +++ b/i18n/zh-CN/docusaurus-plugin-content-docs/current/sql-manual/basic-element/sql-data-types/semi-structured/VARIANT.md @@ -206,17 +206,19 @@ Schema Template 模式匹配算法**只支持受限 glob 语法子集**。 #### 支持的 glob 语法 +SQL 字符串需要写成 `\\\\` 才能表达 glob 中的 `\`。 + 以下示例均为可匹配示例。 -| 语法 | 含义 | 示例(模式 → JSON Path) | -|------|------|-------------------| -| `*` | 任意长度字符串 | `num_*` → `num_latency` | -| `?` | 任意单字符 | `a?b` → `acb` | -| `[abc]` | 字符类 | `a[bc]d` → `abd` | -| `[a-z]` | 字符范围 | `int_[0-9]` → `int_3` | -| `[!abc]` | 取反字符类 | `int_[!0-9]` → `int_a` | -| `[^abc]` | 取反字符类 | `int_[^0-9]` → `int_a` | -| `\` | 转义下一个字符 | `a\*b` → `a*b` | +| 语法 | 含义 | 示例(模式 → JSON Path) | SQL 字面量写法 | +|------|------|-------------------|----------------| +| `*` | 任意长度字符串 | `num_*` → `num_latency` | `'num_*'` | +| `?` | 任意单字符 | `a?b` → `acb` | `'a?b'` | +| `[abc]` | 字符类 | `a[bc]d` → `abd` | `'a[bc]d'` | +| `[a-z]` | 字符范围 | `int_[0-9]` → `int_3` | `'int_[0-9]'` | +| `[!abc]` | 取反字符类 | `int_[!0-9]` → `int_a` | `'int_[!0-9]'` | +| `[^abc]` | 取反字符类 | `int_[^0-9]` → `int_a` | `'int_[^0-9]'` | +| `\` | 转义下一个字符 | `a\*b` → `a*b`
`a\?b` → `a?b`
`a\[b` → `a[b`
`\` → `\` | `'a\\\\*b'`
`'a\\\\?b'`
`'a\\\\[b'`
`'\\\\'` | #### 转义规则 @@ -246,9 +248,20 @@ Schema Template 模式匹配算法**只支持受限 glob 语法子集**。 - × `number_a` - 模式:`a\*b` + - SQL:`'a\\\\*b'` - √ `a*b` - × `axxb` +- 模式:`\*` + - SQL:`'\\\\*'` + - √ `*` + - × `a*` + +- 模式:`\` + - SQL:`'\\\\'` + - √ `\` + - × `\\` + - 模式:`int_[0-9]` - √ `int_1` - × `int_a` diff --git a/i18n/zh-CN/docusaurus-plugin-content-docs/version-4.x/sql-manual/basic-element/sql-data-types/semi-structured/VARIANT.md b/i18n/zh-CN/docusaurus-plugin-content-docs/version-4.x/sql-manual/basic-element/sql-data-types/semi-structured/VARIANT.md index ba4d11dd12000..958307d7fe353 100644 --- a/i18n/zh-CN/docusaurus-plugin-content-docs/version-4.x/sql-manual/basic-element/sql-data-types/semi-structured/VARIANT.md +++ b/i18n/zh-CN/docusaurus-plugin-content-docs/version-4.x/sql-manual/basic-element/sql-data-types/semi-structured/VARIANT.md @@ -203,17 +203,19 @@ Schema Template 模式匹配算法**只支持受限 glob 语法子集**。 #### 支持的 glob 语法 +SQL 字符串需要写成 `\\\\` 才能表达 glob 中的 `\`。 + 以下示例均为可匹配示例。 -| 语法 | 含义 | 示例(模式 → JSON Path) | -|------|------|-------------------| -| `*` | 任意长度字符串 | `num_*` → `num_latency` | -| `?` | 任意单字符 | `a?b` → `acb` | -| `[abc]` | 字符类 | `a[bc]d` → `abd` | -| `[a-z]` | 字符范围 | `int_[0-9]` → `int_3` | -| `[!abc]` | 取反字符类 | `int_[!0-9]` → `int_a` | -| `[^abc]` | 取反字符类 | `int_[^0-9]` → `int_a` | -| `\` | 转义下一个字符 | `a\*b` → `a*b` | +| 语法 | 含义 | 示例(模式 → JSON Path) | SQL 字面量写法 | +|------|------|-------------------|----------------| +| `*` | 任意长度字符串 | `num_*` → `num_latency` | `'num_*'` | +| `?` | 任意单字符 | `a?b` → `acb` | `'a?b'` | +| `[abc]` | 字符类 | `a[bc]d` → `abd` | `'a[bc]d'` | +| `[a-z]` | 字符范围 | `int_[0-9]` → `int_3` | `'int_[0-9]'` | +| `[!abc]` | 取反字符类 | `int_[!0-9]` → `int_a` | `'int_[!0-9]'` | +| `[^abc]` | 取反字符类 | `int_[^0-9]` → `int_a` | `'int_[^0-9]'` | +| `\` | 转义下一个字符 | `a\*b` → `a*b`
`a\?b` → `a?b`
`a\[b` → `a[b`
`\` → `\` | `'a\\\\*b'`
`'a\\\\?b'`
`'a\\\\[b'`
`'\\\\'` | #### 转义规则 @@ -243,9 +245,20 @@ Schema Template 模式匹配算法**只支持受限 glob 语法子集**。 - × `number_a` - 模式:`a\*b` + - SQL:`'a\\\\*b'` - √ `a*b` - × `axxb` +- 模式:`\*` + - SQL:`'\\\\*'` + - √ `*` + - × `a*` + +- 模式:`\` + - SQL:`'\\\\'` + - √ `\` + - × `\\` + - 模式:`int_[0-9]` - √ `int_1` - × `int_a` diff --git a/versioned_docs/version-4.x/sql-manual/basic-element/sql-data-types/semi-structured/VARIANT.md b/versioned_docs/version-4.x/sql-manual/basic-element/sql-data-types/semi-structured/VARIANT.md index e89c0941a6555..5295144dd7dfe 100644 --- a/versioned_docs/version-4.x/sql-manual/basic-element/sql-data-types/semi-structured/VARIANT.md +++ b/versioned_docs/version-4.x/sql-manual/basic-element/sql-data-types/semi-structured/VARIANT.md @@ -203,17 +203,19 @@ The Schema Template pattern-matching algorithm supports **only a restricted subs #### Supported glob syntax +In SQL strings, we should write `\\\\` to express a literal `\` in glob patterns. + All examples below are matching examples. -| Syntax | Meaning | Example (pattern → JSON Path) | -|------|---------|------------------------------| -| `*` | Any-length string | `num_*` → `num_latency` | -| `?` | Any single character | `a?b` → `acb` | -| `[abc]` | Character class | `a[bc]d` → `abd` | -| `[a-z]` | Character range | `int_[0-9]` → `int_3` | -| `[!abc]` | Negated character class | `int_[!0-9]` → `int_a` | -| `[^abc]` | Negated character class | `int_[^0-9]` → `int_a` | -| `\` | Escape the next character | `a\*b` → `a*b` | +| Syntax | Meaning | Example (pattern → JSON Path) | SQL literal | +|------|---------|------------------------------|-------------| +| `*` | Any-length string | `num_*` → `num_latency` | `'num_*'` | +| `?` | Any single character | `a?b` → `acb` | `'a?b'` | +| `[abc]` | Character class | `a[bc]d` → `abd` | `'a[bc]d'` | +| `[a-z]` | Character range | `int_[0-9]` → `int_3` | `'int_[0-9]'` | +| `[!abc]` | Negated character class | `int_[!0-9]` → `int_a` | `'int_[!0-9]'` | +| `[^abc]` | Negated character class | `int_[^0-9]` → `int_a` | `'int_[^0-9]'` | +| `\` | Escape the next character | `a\*b` → `a*b`
`a\?b` → `a?b`
`a\[b` → `a[b`
`\` → `\` | `'a\\\\*b'`
`'a\\\\?b'`
`'a\\\\[b'`
`'\\\\'` | #### Escaping rules @@ -243,9 +245,20 @@ The following are treated as ordinary characters or cause matching to fail; avoi - × `number_a` - Pattern: `a\*b` + - SQL: `'a\\\\*b'` - √ `a*b` - × `axxb` +- Pattern: `\*` + - SQL: `'\\\\*'` + - √ `*` + - × `a*` + +- Pattern: `\` + - SQL: `'\\\\'` + - √ `\` + - × `\\` + - Pattern: `int_[0-9]` - √ `int_1` - × `int_a` @@ -636,4 +649,3 @@ ClickBench (43 queries): 2. Why doesn’t my query/index work? - Check whether you CAST paths to the correct types; whether the type was promoted to JSONB due to conflicts; or whether you mistakenly expect an index on the whole VARIANT instead of on subpaths. - From e8aa9cc9c51a8a0c67be725713012b34957025d1 Mon Sep 17 00:00:00 2001 From: Gary Date: Fri, 6 Feb 2026 16:04:48 +0800 Subject: [PATCH 8/9] delete escape rules --- .../sql-data-types/semi-structured/VARIANT.md | 7 ------- .../sql-data-types/semi-structured/VARIANT.md | 7 ------- .../sql-data-types/semi-structured/VARIANT.md | 7 ------- .../sql-data-types/semi-structured/VARIANT.md | 7 ------- 4 files changed, 28 deletions(-) diff --git a/docs/sql-manual/basic-element/sql-data-types/semi-structured/VARIANT.md b/docs/sql-manual/basic-element/sql-data-types/semi-structured/VARIANT.md index 00797c5120ab1..c4908ac665aa7 100644 --- a/docs/sql-manual/basic-element/sql-data-types/semi-structured/VARIANT.md +++ b/docs/sql-manual/basic-element/sql-data-types/semi-structured/VARIANT.md @@ -220,13 +220,6 @@ All examples below are matching examples. | `[^abc]` | Negated character class | `int_[^0-9]` → `int_a` | `'int_[^0-9]'` | | `\` | Escape the next character | `a\*b` → `a*b`
`a\?b` → `a?b`
`a\[b` → `a[b`
`\` → `\` | `'a\\\\*b'`
`'a\\\\?b'`
`'a\\\\[b'`
`'\\\\'` | -#### Escaping rules - -- `\*` is a literal `*` -- `\?` is a literal `?` -- `\[` is a literal `[` -- A trailing standalone `\` is treated as a literal `\` - #### Unsupported syntax The following are treated as ordinary characters or cause matching to fail; avoid them whenever possible: diff --git a/i18n/zh-CN/docusaurus-plugin-content-docs/current/sql-manual/basic-element/sql-data-types/semi-structured/VARIANT.md b/i18n/zh-CN/docusaurus-plugin-content-docs/current/sql-manual/basic-element/sql-data-types/semi-structured/VARIANT.md index 6011499e82f90..5a27a1f90e319 100644 --- a/i18n/zh-CN/docusaurus-plugin-content-docs/current/sql-manual/basic-element/sql-data-types/semi-structured/VARIANT.md +++ b/i18n/zh-CN/docusaurus-plugin-content-docs/current/sql-manual/basic-element/sql-data-types/semi-structured/VARIANT.md @@ -220,13 +220,6 @@ SQL 字符串需要写成 `\\\\` 才能表达 glob 中的 `\`。 | `[^abc]` | 取反字符类 | `int_[^0-9]` → `int_a` | `'int_[^0-9]'` | | `\` | 转义下一个字符 | `a\*b` → `a*b`
`a\?b` → `a?b`
`a\[b` → `a[b`
`\` → `\` | `'a\\\\*b'`
`'a\\\\?b'`
`'a\\\\[b'`
`'\\\\'` | -#### 转义规则 - -- `\*` 表示字面量 `*` -- `\?` 表示字面量 `?` -- `\[` 表示字面量 `[` -- 末尾孤立 `\` 会被视为字面量 `\` - #### 不支持的语法 以下语法会被当成普通字符处理,或导致匹配失败,请尽可能避免: diff --git a/i18n/zh-CN/docusaurus-plugin-content-docs/version-4.x/sql-manual/basic-element/sql-data-types/semi-structured/VARIANT.md b/i18n/zh-CN/docusaurus-plugin-content-docs/version-4.x/sql-manual/basic-element/sql-data-types/semi-structured/VARIANT.md index 958307d7fe353..c7d4445e9c405 100644 --- a/i18n/zh-CN/docusaurus-plugin-content-docs/version-4.x/sql-manual/basic-element/sql-data-types/semi-structured/VARIANT.md +++ b/i18n/zh-CN/docusaurus-plugin-content-docs/version-4.x/sql-manual/basic-element/sql-data-types/semi-structured/VARIANT.md @@ -217,13 +217,6 @@ SQL 字符串需要写成 `\\\\` 才能表达 glob 中的 `\`。 | `[^abc]` | 取反字符类 | `int_[^0-9]` → `int_a` | `'int_[^0-9]'` | | `\` | 转义下一个字符 | `a\*b` → `a*b`
`a\?b` → `a?b`
`a\[b` → `a[b`
`\` → `\` | `'a\\\\*b'`
`'a\\\\?b'`
`'a\\\\[b'`
`'\\\\'` | -#### 转义规则 - -- `\*` 表示字面量 `*` -- `\?` 表示字面量 `?` -- `\[` 表示字面量 `[` -- 末尾孤立 `\` 会被视为字面量 `\` - #### 不支持的语法 以下语法会被当成普通字符处理,或导致匹配失败,请尽可能避免: diff --git a/versioned_docs/version-4.x/sql-manual/basic-element/sql-data-types/semi-structured/VARIANT.md b/versioned_docs/version-4.x/sql-manual/basic-element/sql-data-types/semi-structured/VARIANT.md index 5295144dd7dfe..de53bb0995497 100644 --- a/versioned_docs/version-4.x/sql-manual/basic-element/sql-data-types/semi-structured/VARIANT.md +++ b/versioned_docs/version-4.x/sql-manual/basic-element/sql-data-types/semi-structured/VARIANT.md @@ -217,13 +217,6 @@ All examples below are matching examples. | `[^abc]` | Negated character class | `int_[^0-9]` → `int_a` | `'int_[^0-9]'` | | `\` | Escape the next character | `a\*b` → `a*b`
`a\?b` → `a?b`
`a\[b` → `a[b`
`\` → `\` | `'a\\\\*b'`
`'a\\\\?b'`
`'a\\\\[b'`
`'\\\\'` | -#### Escaping rules - -- `\*` is a literal `*` -- `\?` is a literal `?` -- `\[` is a literal `[` -- A trailing standalone `\` is treated as a literal `\` - #### Unsupported syntax The following are treated as ordinary characters or cause matching to fail; avoid them whenever possible: From fc3cb4cc6c0da79e4326af81de4334b7f8308414 Mon Sep 17 00:00:00 2001 From: Gary Date: Fri, 6 Feb 2026 16:26:30 +0800 Subject: [PATCH 9/9] double escape --- .../sql-data-types/semi-structured/VARIANT.md | 10 +++++----- .../sql-data-types/semi-structured/VARIANT.md | 10 +++++----- .../sql-data-types/semi-structured/VARIANT.md | 10 +++++----- .../sql-data-types/semi-structured/VARIANT.md | 10 +++++----- 4 files changed, 20 insertions(+), 20 deletions(-) diff --git a/docs/sql-manual/basic-element/sql-data-types/semi-structured/VARIANT.md b/docs/sql-manual/basic-element/sql-data-types/semi-structured/VARIANT.md index c4908ac665aa7..60d60b2f77ef2 100644 --- a/docs/sql-manual/basic-element/sql-data-types/semi-structured/VARIANT.md +++ b/docs/sql-manual/basic-element/sql-data-types/semi-structured/VARIANT.md @@ -206,7 +206,7 @@ The Schema Template pattern-matching algorithm supports **only a restricted subs #### Supported glob syntax -In SQL strings, we write `\\\\` to express a literal `\` in glob patterns. +In SQL strings, we write `\\` to express a literal `\` in glob patterns. All examples below are matching examples. @@ -218,7 +218,7 @@ All examples below are matching examples. | `[a-z]` | Character range | `int_[0-9]` → `int_3` | `'int_[0-9]'` | | `[!abc]` | Negated character class | `int_[!0-9]` → `int_a` | `'int_[!0-9]'` | | `[^abc]` | Negated character class | `int_[^0-9]` → `int_a` | `'int_[^0-9]'` | -| `\` | Escape the next character | `a\*b` → `a*b`
`a\?b` → `a?b`
`a\[b` → `a[b`
`\` → `\` | `'a\\\\*b'`
`'a\\\\?b'`
`'a\\\\[b'`
`'\\\\'` | +| `\` | Escape the next character | `a\*b` → `a*b`
`a\?b` → `a?b`
`a\[b` → `a[b`
`\` → `\` | `'a\\*b'`
`'a\\?b'`
`'a\\[b'`
`'\\'` | #### Unsupported syntax @@ -241,17 +241,17 @@ The following are treated as ordinary characters or cause matching to fail; avoi - × `number_a` - Pattern: `a\*b` - - SQL: `'a\\\\*b'` + - SQL: `'a\\*b'` - √ `a*b` - × `axxb` - Pattern: `\*` - - SQL: `'\\\\*'` + - SQL: `'\\*'` - √ `*` - × `a*` - Pattern: `\` - - SQL: `'\\\\'` + - SQL: `'\\'` - √ `\` - × `\\` diff --git a/i18n/zh-CN/docusaurus-plugin-content-docs/current/sql-manual/basic-element/sql-data-types/semi-structured/VARIANT.md b/i18n/zh-CN/docusaurus-plugin-content-docs/current/sql-manual/basic-element/sql-data-types/semi-structured/VARIANT.md index 5a27a1f90e319..ce013bbf97c7f 100644 --- a/i18n/zh-CN/docusaurus-plugin-content-docs/current/sql-manual/basic-element/sql-data-types/semi-structured/VARIANT.md +++ b/i18n/zh-CN/docusaurus-plugin-content-docs/current/sql-manual/basic-element/sql-data-types/semi-structured/VARIANT.md @@ -206,7 +206,7 @@ Schema Template 模式匹配算法**只支持受限 glob 语法子集**。 #### 支持的 glob 语法 -SQL 字符串需要写成 `\\\\` 才能表达 glob 中的 `\`。 +SQL 字符串需要写成 `\\` 才能表达 glob 中的 `\`。 以下示例均为可匹配示例。 @@ -218,7 +218,7 @@ SQL 字符串需要写成 `\\\\` 才能表达 glob 中的 `\`。 | `[a-z]` | 字符范围 | `int_[0-9]` → `int_3` | `'int_[0-9]'` | | `[!abc]` | 取反字符类 | `int_[!0-9]` → `int_a` | `'int_[!0-9]'` | | `[^abc]` | 取反字符类 | `int_[^0-9]` → `int_a` | `'int_[^0-9]'` | -| `\` | 转义下一个字符 | `a\*b` → `a*b`
`a\?b` → `a?b`
`a\[b` → `a[b`
`\` → `\` | `'a\\\\*b'`
`'a\\\\?b'`
`'a\\\\[b'`
`'\\\\'` | +| `\` | 转义下一个字符 | `a\*b` → `a*b`
`a\?b` → `a?b`
`a\[b` → `a[b`
`\` → `\` | `'a\\*b'`
`'a\\?b'`
`'a\\[b'`
`'\\'` | #### 不支持的语法 @@ -241,17 +241,17 @@ SQL 字符串需要写成 `\\\\` 才能表达 glob 中的 `\`。 - × `number_a` - 模式:`a\*b` - - SQL:`'a\\\\*b'` + - SQL:`'a\\*b'` - √ `a*b` - × `axxb` - 模式:`\*` - - SQL:`'\\\\*'` + - SQL:`'\\*'` - √ `*` - × `a*` - 模式:`\` - - SQL:`'\\\\'` + - SQL:`'\\'` - √ `\` - × `\\` diff --git a/i18n/zh-CN/docusaurus-plugin-content-docs/version-4.x/sql-manual/basic-element/sql-data-types/semi-structured/VARIANT.md b/i18n/zh-CN/docusaurus-plugin-content-docs/version-4.x/sql-manual/basic-element/sql-data-types/semi-structured/VARIANT.md index c7d4445e9c405..c2becf32b8388 100644 --- a/i18n/zh-CN/docusaurus-plugin-content-docs/version-4.x/sql-manual/basic-element/sql-data-types/semi-structured/VARIANT.md +++ b/i18n/zh-CN/docusaurus-plugin-content-docs/version-4.x/sql-manual/basic-element/sql-data-types/semi-structured/VARIANT.md @@ -203,7 +203,7 @@ Schema Template 模式匹配算法**只支持受限 glob 语法子集**。 #### 支持的 glob 语法 -SQL 字符串需要写成 `\\\\` 才能表达 glob 中的 `\`。 +SQL 字符串需要写成 `\\` 才能表达 glob 中的 `\`。 以下示例均为可匹配示例。 @@ -215,7 +215,7 @@ SQL 字符串需要写成 `\\\\` 才能表达 glob 中的 `\`。 | `[a-z]` | 字符范围 | `int_[0-9]` → `int_3` | `'int_[0-9]'` | | `[!abc]` | 取反字符类 | `int_[!0-9]` → `int_a` | `'int_[!0-9]'` | | `[^abc]` | 取反字符类 | `int_[^0-9]` → `int_a` | `'int_[^0-9]'` | -| `\` | 转义下一个字符 | `a\*b` → `a*b`
`a\?b` → `a?b`
`a\[b` → `a[b`
`\` → `\` | `'a\\\\*b'`
`'a\\\\?b'`
`'a\\\\[b'`
`'\\\\'` | +| `\` | 转义下一个字符 | `a\*b` → `a*b`
`a\?b` → `a?b`
`a\[b` → `a[b`
`\` → `\` | `'a\\*b'`
`'a\\?b'`
`'a\\[b'`
`'\\'` | #### 不支持的语法 @@ -238,17 +238,17 @@ SQL 字符串需要写成 `\\\\` 才能表达 glob 中的 `\`。 - × `number_a` - 模式:`a\*b` - - SQL:`'a\\\\*b'` + - SQL:`'a\\*b'` - √ `a*b` - × `axxb` - 模式:`\*` - - SQL:`'\\\\*'` + - SQL:`'\\*'` - √ `*` - × `a*` - 模式:`\` - - SQL:`'\\\\'` + - SQL:`'\\'` - √ `\` - × `\\` diff --git a/versioned_docs/version-4.x/sql-manual/basic-element/sql-data-types/semi-structured/VARIANT.md b/versioned_docs/version-4.x/sql-manual/basic-element/sql-data-types/semi-structured/VARIANT.md index de53bb0995497..b05698659ae72 100644 --- a/versioned_docs/version-4.x/sql-manual/basic-element/sql-data-types/semi-structured/VARIANT.md +++ b/versioned_docs/version-4.x/sql-manual/basic-element/sql-data-types/semi-structured/VARIANT.md @@ -203,7 +203,7 @@ The Schema Template pattern-matching algorithm supports **only a restricted subs #### Supported glob syntax -In SQL strings, we should write `\\\\` to express a literal `\` in glob patterns. +In SQL strings, we should write `\\` to express a literal `\` in glob patterns. All examples below are matching examples. @@ -215,7 +215,7 @@ All examples below are matching examples. | `[a-z]` | Character range | `int_[0-9]` → `int_3` | `'int_[0-9]'` | | `[!abc]` | Negated character class | `int_[!0-9]` → `int_a` | `'int_[!0-9]'` | | `[^abc]` | Negated character class | `int_[^0-9]` → `int_a` | `'int_[^0-9]'` | -| `\` | Escape the next character | `a\*b` → `a*b`
`a\?b` → `a?b`
`a\[b` → `a[b`
`\` → `\` | `'a\\\\*b'`
`'a\\\\?b'`
`'a\\\\[b'`
`'\\\\'` | +| `\` | Escape the next character | `a\*b` → `a*b`
`a\?b` → `a?b`
`a\[b` → `a[b`
`\` → `\` | `'a\\*b'`
`'a\\?b'`
`'a\\[b'`
`'\\'` | #### Unsupported syntax @@ -238,17 +238,17 @@ The following are treated as ordinary characters or cause matching to fail; avoi - × `number_a` - Pattern: `a\*b` - - SQL: `'a\\\\*b'` + - SQL: `'a\\*b'` - √ `a*b` - × `axxb` - Pattern: `\*` - - SQL: `'\\\\*'` + - SQL: `'\\*'` - √ `*` - × `a*` - Pattern: `\` - - SQL: `'\\\\'` + - SQL: `'\\'` - √ `\` - × `\\`