Skip to content

Commit 8ea3299

Browse files
committed
wrap bq -> duckdb REGEXP_EXTRACT SUBSTRING() call in NULLIF
1 parent f7458a4 commit 8ea3299

File tree

2 files changed

+6
-3
lines changed

2 files changed

+6
-3
lines changed

sqlglot/dialects/duckdb.py

Lines changed: 4 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1449,7 +1449,10 @@ def regexpextract_sql(self, expression: exp.RegexpExtract) -> str:
14491449
position = expression.args.get("position")
14501450
occurrence = expression.args.get("occurrence")
14511451
if position and (not position.is_int or position.to_py() > 1):
1452-
this = exp.Substring(this=this, start=position)
1452+
# substring returns '' if position > len(string), but the '' shouldn't carry through to REGEXP_EXTRACT
1453+
this = exp.Nullif(
1454+
this=exp.Substring(this=this, start=position), expression=exp.Literal.string("")
1455+
)
14531456

14541457
# Do not render group if there is no following argument,
14551458
# and it's the default value for this dialect

tests/dialects/test_bigquery.py

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -2820,7 +2820,7 @@ def test_regexp_extract(self):
28202820
"SELECT REGEXP_EXTRACT(abc, 'pattern(group)', 2) FROM table",
28212821
write={
28222822
"bigquery": "SELECT REGEXP_EXTRACT(abc, 'pattern(group)', 2) FROM table",
2823-
"duckdb": '''SELECT REGEXP_EXTRACT(SUBSTRING(abc, 2), 'pattern(group)', 1) FROM "table"''',
2823+
"duckdb": '''SELECT REGEXP_EXTRACT(NULLIF(SUBSTRING(abc, 2), ''), 'pattern(group)', 1) FROM "table"''',
28242824
},
28252825
)
28262826

@@ -2838,7 +2838,7 @@ def test_regexp_extract(self):
28382838
"SELECT REGEXP_EXTRACT(abc, 'pattern(group)', 2, 3) FROM table",
28392839
write={
28402840
"bigquery": "SELECT REGEXP_EXTRACT(abc, 'pattern(group)', 2, 3) FROM table",
2841-
"duckdb": '''SELECT ARRAY_EXTRACT(REGEXP_EXTRACT_ALL(SUBSTRING(abc, 2), 'pattern(group)', 1), 3) FROM "table"''',
2841+
"duckdb": '''SELECT ARRAY_EXTRACT(REGEXP_EXTRACT_ALL(NULLIF(SUBSTRING(abc, 2), ''), 'pattern(group)', 1), 3) FROM "table"''',
28422842
},
28432843
)
28442844

0 commit comments

Comments
 (0)