Skip to content

Commit be7cbc7

Browse files
committed
feat: support accentuated identifiers as column in all db
1 parent 67d3c70 commit be7cbc7

File tree

10 files changed

+41
-35
lines changed

10 files changed

+41
-35
lines changed

pegjs/athena.pegjs

Lines changed: 4 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -1097,7 +1097,7 @@ column_list_item
10971097

10981098
value_alias_clause
10991099
= KW_AS? __ i:(func_call / alias_ident) { return i; }
1100-
1100+
11011101
alias_clause
11021102
= KW_AS __ i:(func_call / alias_ident) { return i; }
11031103
/ KW_AS? __ i:ident { return i; }
@@ -2015,7 +2015,7 @@ ident_start = [A-Za-z_]
20152015
ident_part = [A-Za-z0-9_]
20162016

20172017
// to support column name like `cf1:name` in hbase
2018-
column_part = [A-Za-z0-9_:]
2018+
column_part = [A-Za-z0-9_:\u4e00-\u9fa5\u00C0-\u017F]
20192019

20202020
param
20212021
= l:(':' ident_name) {
@@ -2114,7 +2114,7 @@ distinct_args
21142114
}
21152115
return { distinct: d, expr: result, orderby: or };
21162116
}
2117-
2117+
21182118
count_arg
21192119
= e:star_expr { return { expr: e }; }
21202120
/ distinct_args
@@ -2172,7 +2172,7 @@ trim_func_clause
21722172
args,
21732173
};
21742174
}
2175-
2175+
21762176
func_call
21772177
= name:scalar_func __ LPAREN __ l:expr_list? __ RPAREN __ bc:over_partition? {
21782178
return {

pegjs/bigquery.pegjs

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -195,7 +195,7 @@
195195
columnList.clear()
196196
columns.forEach(col => columnList.add(col))
197197
}
198-
198+
199199
function getSurroundFromLiteralType(literal) {
200200
switch (literal.type) {
201201
case 'double_quote_string':
@@ -2484,7 +2484,7 @@ column_ref
24842484
return {
24852485
type: 'column_ref',
24862486
table: null,
2487-
column,
2487+
column,
24882488
collate: ce && ce[1],
24892489
...getLocationObject()
24902490
};
@@ -2579,7 +2579,7 @@ ident_start = [A-Za-z_]
25792579
ident_part = [A-Za-z0-9_-]
25802580

25812581
// to support column name like `cf1:name` in hbase
2582-
column_part = [A-Za-z0-9_:]
2582+
column_part = [A-Za-z0-9_:\u4e00-\u9fa5\u00C0-\u017F]
25832583

25842584
param
25852585
= s:(':'/'@') n:ident_name {

pegjs/db2.pegjs

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -1047,7 +1047,7 @@ select_stmt_nake
10471047
g:group_by_clause? __
10481048
h:having_clause? __
10491049
o:order_by_clause? __
1050-
l:limit_clause?
1050+
l:limit_clause?
10511051
iso:isolation_clause? __ {
10521052
if(f) f.forEach(info => info.table && tableList.add(`select::${info.db}::${info.table}`));
10531053
return {
@@ -1972,7 +1972,7 @@ ident_start = [A-Za-z_\u4e00-\u9fa5]
19721972
ident_part = [A-Za-z0-9_$\u4e00-\u9fa5\u00C0-\u017F]
19731973

19741974
// to support column name like `cf1:name` in hbase
1975-
column_part = [A-Za-z0-9_:]
1975+
column_part = [A-Za-z0-9_:\u4e00-\u9fa5\u00C0-\u017F]
19761976

19771977
param
19781978
= l:(':' ident_name) {

pegjs/hive.pegjs

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1929,7 +1929,7 @@ ident_start = [A-Za-z_\u4e00-\u9fa5]
19291929
ident_part = [A-Za-z0-9_$\u4e00-\u9fa5\u00C0-\u017F]
19301930

19311931
// to support column name like `cf1:name` in hbase
1932-
column_part = [A-Za-z0-9_:]
1932+
column_part = [A-Za-z0-9_:\u4e00-\u9fa5\u00C0-\u017F]
19331933

19341934
param
19351935
= l:(':' ident_name) {

pegjs/mariadb.pegjs

Lines changed: 5 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -1094,7 +1094,7 @@ ALTER_OPERATE_PARTITION
10941094
}
10951095
return expr
10961096
}
1097-
1097+
10981098
ALTER_ADD_COLUMN
10991099
= KW_ADD __
11001100
kc:KW_COLUMN? __
@@ -1745,7 +1745,7 @@ transaction_mode_isolation_level
17451745
value: `read ${e.toLowerCase()}`
17461746
}
17471747
}
1748-
1748+
17491749
transaction_mode
17501750
= 'ISOLATION'i __ 'LEVEL'i __ l:transaction_mode_isolation_level {
17511751
return {
@@ -1808,7 +1808,7 @@ transaction_stmt
18081808
}
18091809
}
18101810
}
1811-
1811+
18121812
load_data_field
18131813
= k:('FIELDS'i / 'COLUMNS'i) __ t:('TERMINATED'i __ 'BY'i __ ident_without_kw_type)? __ en:(('OPTIONALLY'i)? __ 'ENCLOSED'i __ 'BY'i __ ident_without_kw_type)? __ es:('ESCAPED'i __ 'BY'i __ ident_without_kw_type)? {
18141814
if (t) t[4].prefix = 'TERMINATED BY'
@@ -3203,7 +3203,7 @@ ident_start = [A-Za-z_\u4e00-\u9fa5]
32033203
ident_part = [A-Za-z0-9_$$\u4e00-\u9fa5\u00C0-\u017F]
32043204

32053205
// to support column name like `cf1:name` in hbase
3206-
column_part = [A-Za-z0-9_:]
3206+
column_part = [A-Za-z0-9_:\u4e00-\u9fa5\u00C0-\u017F]
32073207

32083208
param
32093209
= l:(':' ident_name) {
@@ -3612,7 +3612,7 @@ literal_basic
36123612
/ literal_bool
36133613
/ literal_null
36143614
/ literal_datetime
3615-
3615+
36163616
literal
36173617
= literal_basic / literal_numeric
36183618

pegjs/mysql.pegjs

Lines changed: 6 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -1297,7 +1297,7 @@ ALTER_OPERATE_PARTITION
12971297
}
12981298
return expr
12991299
}
1300-
1300+
13011301
ALTER_ADD_COLUMN
13021302
= KW_ADD __
13031303
kc:KW_COLUMN __
@@ -2028,7 +2028,7 @@ transaction_mode_isolation_level
20282028
value: `read ${e.toLowerCase()}`
20292029
}
20302030
}
2031-
2031+
20322032
transaction_mode
20332033
= 'ISOLATION'i __ 'LEVEL'i __ l:transaction_mode_isolation_level {
20342034
return {
@@ -3483,7 +3483,7 @@ ident_start = [A-Za-z_\u4e00-\u9fa5]
34833483
ident_part = [A-Za-z0-9_$\u0080-\uffff]
34843484

34853485
// to support column name like `cf1:name` in hbase
3486-
column_part = [A-Za-z0-9_:]
3486+
column_part = [A-Za-z0-9_:\u4e00-\u9fa5\u00C0-\u017F]
34873487

34883488
param
34893489
= l:(':' ident_name) {
@@ -3893,10 +3893,10 @@ literal_basic
38933893
/ literal_bool
38943894
/ literal_null
38953895
/ literal_datetime
3896-
3896+
38973897
literal
38983898
= literal_basic / literal_numeric
3899-
3899+
39003900

39013901
literal_list
39023902
= head:literal tail:(__ COMMA __ literal)* {
@@ -3965,7 +3965,7 @@ literal_string
39653965
value: ca[1].join('')
39663966
};
39673967
}
3968-
3968+
39693969

39703970
literal_datetime
39713971
= type:(KW_TIME / KW_DATE / KW_TIMESTAMP / KW_DATETIME) __ ca:("'" single_char* "'") {

pegjs/sqlite.pegjs

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -2375,7 +2375,7 @@ ident_start = [A-Za-z_]
23752375
ident_part = [A-Za-z0-9_]
23762376

23772377
// to support column name like `cf1:name` in hbase
2378-
column_part = [A-Za-z0-9_:]
2378+
column_part = [A-Za-z0-9_:\u4e00-\u9fa5\u00C0-\u017F]
23792379

23802380
param
23812381
= l:(':' ident_name) {

pegjs/transactsql.pegjs

Lines changed: 4 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -1104,7 +1104,7 @@ create_constraint_default
11041104
with_values: w && { type: 'origin', value: 'with values' },
11051105
}
11061106
}
1107-
1107+
11081108
create_constraint_foreign
11091109
= kc:constraint_name? __
11101110
p:('FOREIGN KEY'i) __
@@ -2603,7 +2603,7 @@ ident_start = [A-Za-z_@#\u4e00-\u9fa5]
26032603
ident_part = [A-Za-z0-9_\-@$$\u4e00-\u9fa5\u00C0-\u017F]
26042604

26052605
// to support column name like `cf1:name` in hbase
2606-
column_part = [A-Za-z0-9_:]
2606+
column_part = [A-Za-z0-9_:\u4e00-\u9fa5\u00C0-\u017F]
26072607

26082608
param
26092609
= l:(':' ident_name) {
@@ -2794,7 +2794,7 @@ distinct_args
27942794
/ d:KW_DISTINCT? __ c:or_and_expr __ s:concat_separator? __ or:order_by_clause? {
27952795
return { distinct: d, expr: c, orderby: or, separator: s };
27962796
}
2797-
2797+
27982798
count_arg
27992799
= e:star_expr { return { expr: e }; }
28002800
/ distinct_args
@@ -2816,7 +2816,7 @@ within_group
28162816
= 'WITHIN'i __ KW_GROUP __ LPAREN __ or:order_by_clause __ RPAREN {
28172817
return { type: 'within', keyword: 'group', orderby: or };
28182818
}
2819-
2819+
28202820
func_call
28212821
= name:scalar_func __ LPAREN __ l:expr_list? __ RPAREN __ bc:over_partition? {
28222822
return {
@@ -3541,4 +3541,3 @@ uniqueidentifier_type
35413541
= lb:LBRAKE? __ t:(KW_UNIQUEIDENTIFIER) __ rb:RBRAKE? !{ return (lb && !rb) || (!lb && rb) } {
35423542
return { dataType: t }
35433543
}
3544-

pegjs/trino.pegjs

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -3558,7 +3558,7 @@ ident_start = [A-Za-z_\u4e00-\u9fa5]
35583558
ident_part = [A-Za-z0-9_$\u0080-\uffff]
35593559

35603560
// to support column name like `cf1:name` in hbase
3561-
column_part = [A-Za-z0-9_\u4e00-\u9fa5]
3561+
column_part = [A-Za-z0-9_\u4e00-\u9fa5\u00C0-\u017F]
35623562

35633563
param
35643564
= l:(':' ident_name) {
@@ -3970,7 +3970,7 @@ cast_data_type
39703970
if (p && s) t.quoted = '"'
39713971
return t
39723972
}
3973-
3973+
39743974
cast_double_colon
39753975
= s:(KW_DOUBLE_COLON __ cast_data_type)+ __ alias:alias_clause? {
39763976
return {

test/mysql-mariadb.spec.js

Lines changed: 13 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -382,7 +382,7 @@ describe('mysql', () => {
382382
'SELECT * FROM (`t1` AS `eti` INNER JOIN `bagel` ON `bagel`.`id` = `eti`.`id`) ; SELECT * FROM ((`t1`))'
383383
]
384384
},
385-
385+
386386
{
387387
title: 'blob data type',
388388
sql: [
@@ -1229,8 +1229,8 @@ describe('mysql', () => {
12291229
inner join salaries using (emp_no)
12301230
order by emp_no desc;
12311231
1232-
SELECT * FROM \`employees\`
1233-
INNER JOIN \`salaries\` USING (\`emp_no\`)
1232+
SELECT * FROM \`employees\`
1233+
INNER JOIN \`salaries\` USING (\`emp_no\`)
12341234
ORDER BY \`emp_no\` DESC;`,
12351235
'SELECT * FROM `employees` INNER JOIN `salaries` USING (emp_no) ORDER BY `emp_no` DESC ; SELECT * FROM `employees` INNER JOIN `salaries` USING (`emp_no`) ORDER BY `emp_no` DESC'
12361236
]
@@ -1312,6 +1312,13 @@ describe('mysql', () => {
13121312
"SELECT * FROM `T` WHERE `a` LIKE 'foobar%' XOR `c` = `d`"
13131313
]
13141314
},
1315+
{
1316+
title: 'support accentuated identifiers',
1317+
sql: [
1318+
"SELECT crème AS brûlée FROM café WHERE théâtre = 'Molière'",
1319+
"SELECT `crème` AS `brûlée` FROM `café` WHERE `théâtre` = 'Molière'"
1320+
]
1321+
},
13151322
]
13161323
SQL_LIST.forEach(sqlInfo => {
13171324
const { title, sql } = sqlInfo
@@ -1322,13 +1329,13 @@ describe('mysql', () => {
13221329
expect(getParsedSql(sql[0], mariadb)).to.equal(sql[1])
13231330
})
13241331
})
1325-
1332+
13261333
it('should throw error when args is not right', () => {
13271334
let sql = `select convert(json_unquote(json_extract('{"thing": "252"}', "$.thing")));`
13281335
expect(parser.astify.bind(parser, sql)).to.throw('Expected "!=", "#", "#-", "#>", "#>>", "%", "&", "&&", "*", "+", ",", "-", "--", "->", "->>", "/", "/*", "<", "<<", "<=", "<>", "<@", "=", ">", ">=", ">>", "?", "?&", "?|", "@>", "AND", "BETWEEN", "IN", "IS", "LIKE", "NOT", "ON", "OR", "OVER", "REGEXP", "RLIKE", "USING", "XOR", "^", "div", "mod", "|", "||", or [ \\t\\n\\r] but ")" found.')
13291336
expect(parser.astify.bind(parser, 'select convert("");')).to.throw('Expected "!=", "#", "#-", "#>", "#>>", "%", "&", "&&", "*", "+", ",", "-", "--", "->", "->>", "/", "/*", "<", "<<", "<=", "<>", "<@", "=", ">", ">=", ">>", "?", "?&", "?|", "@>", "AND", "BETWEEN", "COLLATE", "IN", "IS", "LIKE", "NOT", "OR", "REGEXP", "RLIKE", "USING", "XOR", "^", "div", "mod", "|", "||", or [ \\t\\n\\r] but ")" found.')
13301337
sql = 'SELECT AVG(Quantity,age) FROM table1;'
1331-
expect(parser.astify.bind(parser, sql)).to.throw('Expected "!=", "#", "#-", "#>", "#>>", "%", "&", "&&", "(", ")", "*", "+", "-", "--", "->", "->>", ".", "/", "/*", "<", "<<", "<=", "<>", "<@", "=", ">", ">=", ">>", "?", "?&", "?|", "@>", "BETWEEN", "COLLATE", "IN", "IS", "LIKE", "NOT", "REGEXP", "RLIKE", "XOR", "^", "div", "mod", "|", "||", [ \\t\\n\\r], [A-Za-z0-9_$\\x80-], or [A-Za-z0-9_:] but "," found')
1338+
expect(parser.astify.bind(parser, sql)).to.throw('Expected "!=", "#", "#-", "#>", "#>>", "%", "&", "&&", "(", ")", "*", "+", "-", "--", "->", "->>", ".", "/", "/*", "<", "<<", "<=", "<>", "<@", "=", ">", ">=", ">>", "?", "?&", "?|", "@>", "BETWEEN", "COLLATE", "IN", "IS", "LIKE", "NOT", "REGEXP", "RLIKE", "XOR", "^", "div", "mod", "|", "||", [ \\t\\n\\r], [A-Za-z0-9_$\\x80-], or [A-Za-z0-9_:一-龥À-ſ] but "," found')
13321339
})
13331340

13341341
it('should join multiple table with comma', () => {
@@ -1481,4 +1488,4 @@ describe('mysql', () => {
14811488
})
14821489
})
14831490

1484-
})
1491+
})

0 commit comments

Comments
 (0)