From adf2ebdfa6cafc0250a9a0d53fc73903407a9074 Mon Sep 17 00:00:00 2001 From: Petr Novotnik Date: Wed, 19 Nov 2025 13:42:37 +0100 Subject: [PATCH 1/8] Oracle: Support for MERGE predicates --- src/ast/mod.rs | 37 +++- src/ast/spans.rs | 50 ++++- src/dialect/generic.rs | 16 ++ src/dialect/mod.rs | 111 ++++++++++- src/parser/merge.rs | 369 ++++++++++++++++++++++++++++++++++++ src/parser/mod.rs | 163 +--------------- tests/sqlparser_bigquery.rs | 17 +- tests/sqlparser_common.rs | 77 ++++++++ 8 files changed, 672 insertions(+), 168 deletions(-) create mode 100644 src/parser/merge.rs diff --git a/src/ast/mod.rs b/src/ast/mod.rs index 63a7bebc7..acc8b8a72 100644 --- a/src/ast/mod.rs +++ b/src/ast/mod.rs @@ -8635,6 +8635,7 @@ impl Display for MergeInsertKind { /// /// [Snowflake](https://docs.snowflake.com/en/sql-reference/sql/merge) /// [BigQuery](https://cloud.google.com/bigquery/docs/reference/standard-sql/dml-syntax#merge_statement) +/// [Oracle](https://docs.oracle.com/en/database/oracle/oracle-database/21/sqlrf/MERGE.html) #[derive(Debug, Clone, PartialEq, PartialOrd, Eq, Ord, Hash)] #[cfg_attr(feature = "serde", derive(Serialize, Deserialize))] #[cfg_attr(feature = "visitor", derive(Visit, VisitMut))] @@ -8653,6 +8654,10 @@ pub struct MergeInsertExpr { pub kind_token: AttachedToken, /// The insert type used by the statement. pub kind: MergeInsertKind, + /// An optional condition to restrict the insertion (Oracle specific) + /// + /// Enabled via [`Dialect::supports_merge_insert_predicate`](crate::dialect::Dialect::supports_merge_insert_predicate). + pub insert_predicate: Option, } impl Display for MergeInsertExpr { @@ -8660,7 +8665,11 @@ impl Display for MergeInsertExpr { if !self.columns.is_empty() { write!(f, "({}) ", display_comma_separated(self.columns.as_slice()))?; } - write!(f, "{}", self.kind) + write!(f, "{}", self.kind)?; + if let Some(predicate) = self.insert_predicate.as_ref() { + write!(f, " WHERE {}", predicate)?; + } + Ok(()) } } @@ -8673,6 +8682,7 @@ impl Display for MergeInsertExpr { /// /// [Snowflake](https://docs.snowflake.com/en/sql-reference/sql/merge) /// [BigQuery](https://cloud.google.com/bigquery/docs/reference/standard-sql/dml-syntax#merge_statement) +/// [Oracle](https://docs.oracle.com/en/database/oracle/oracle-database/21/sqlrf/MERGE.html) #[derive(Debug, Clone, PartialEq, PartialOrd, Eq, Ord, Hash)] #[cfg_attr(feature = "serde", derive(Serialize, Deserialize))] #[cfg_attr(feature = "visitor", derive(Visit, VisitMut))] @@ -8693,7 +8703,16 @@ pub enum MergeAction { Update { /// The `UPDATE` token that starts the sub-expression. update_token: AttachedToken, + /// The update assiment expressions assignments: Vec, + /// `where_clause` for the update (Oralce specific) + /// + /// Enabled via [`Dialect::supports_merge_update_predicate`](crate::dialect::Dialect::supports_merge_update_predicate). + update_predicate: Option, + /// `delete_clause` for the update "delete where" (Oracle specific) + /// + /// Enabled via [`Dialect::supports_merge_update_delete_predicate`](crate::dialect::Dialect::supports_merge_update_delete_predicate). + delete_predicate: Option, }, /// A plain `DELETE` clause Delete { @@ -8708,8 +8727,20 @@ impl Display for MergeAction { MergeAction::Insert(insert) => { write!(f, "INSERT {insert}") } - MergeAction::Update { assignments, .. } => { - write!(f, "UPDATE SET {}", display_comma_separated(assignments)) + MergeAction::Update { + update_token: _, + assignments, + update_predicate, + delete_predicate, + } => { + write!(f, "UPDATE SET {}", display_comma_separated(assignments))?; + if let Some(predicate) = update_predicate.as_ref() { + write!(f, " WHERE {predicate}")?; + } + if let Some(predicate) = delete_predicate.as_ref() { + write!(f, " DELETE WHERE {predicate}")?; + } + Ok(()) } MergeAction::Delete { .. } => { write!(f, "DELETE") diff --git a/src/ast/spans.rs b/src/ast/spans.rs index 994cee972..8f8ec960e 100644 --- a/src/ast/spans.rs +++ b/src/ast/spans.rs @@ -2421,8 +2421,13 @@ impl Spanned for MergeAction { MergeAction::Update { update_token, assignments, + update_predicate, + delete_predicate, } => union_spans( - core::iter::once(update_token.0.span).chain(assignments.iter().map(Spanned::span)), + core::iter::once(update_token.0.span) + .chain(assignments.iter().map(Spanned::span)) + .chain(update_predicate.iter().map(Spanned::span)) + .chain(delete_predicate.iter().map(Spanned::span)), ), MergeAction::Delete { delete_token } => delete_token.0.span, } @@ -2441,6 +2446,7 @@ impl Spanned for MergeInsertExpr { }, ] .into_iter() + .chain(self.insert_predicate.iter().map(Spanned::span)) .chain(self.columns.iter().map(|i| i.span)), ) } @@ -2814,6 +2820,8 @@ WHERE id = 1 if let MergeAction::Update { update_token, assignments: _, + update_predicate: _, + delete_predicate: _, } = &clauses[1].action { assert_eq!( @@ -2934,4 +2942,44 @@ WHERE id = 1 panic!("not a MERGE statement"); }; } + + #[test] + fn test_merge_statement_spans_with_update_predicates() { + let sql = r#" + MERGE INTO a USING b ON a.id = b.id + WHEN MATCHED THEN + UPDATE set a.x = a.x + b.x + WHERE b.x != 2 + DELETE WHERE a.x <> 3"#; + + let r = Parser::parse_sql(&crate::dialect::GenericDialect, sql).unwrap(); + assert_eq!(1, r.len()); + + // ~ assert the span of the whole statement + let stmt_span = r[0].span(); + assert_eq!( + stmt_span, + Span::new(Location::new(2, 8), Location::new(6, 36)) + ); + } + + #[test] + fn test_merge_statement_spans_with_insert_predicate() { + let sql = r#" + MERGE INTO a USING b ON a.id = b.id + WHEN NOT MATCHED THEN + INSERT VALUES (b.x, b.y) WHERE b.x != 2 +-- qed +"#; + + let r = Parser::parse_sql(&crate::dialect::GenericDialect, sql).unwrap(); + assert_eq!(1, r.len()); + + // ~ assert the span of the whole statement + let stmt_span = r[0].span(); + assert_eq!( + stmt_span, + Span::new(Location::new(2, 8), Location::new(4, 52)) + ); + } } diff --git a/src/dialect/generic.rs b/src/dialect/generic.rs index dffc5b527..b606ad9e2 100644 --- a/src/dialect/generic.rs +++ b/src/dialect/generic.rs @@ -195,4 +195,20 @@ impl Dialect for GenericDialect { fn supports_interval_options(&self) -> bool { true } + + fn supports_merge_insert_qualified_columns(&self) -> bool { + true + } + + fn supports_merge_insert_predicate(&self) -> bool { + true + } + + fn supports_merge_update_predicate(&self) -> bool { + true + } + + fn supports_merge_update_delete_predicate(&self) -> bool { + true + } } diff --git a/src/dialect/mod.rs b/src/dialect/mod.rs index ef4e1cdde..8c532c021 100644 --- a/src/dialect/mod.rs +++ b/src/dialect/mod.rs @@ -601,13 +601,122 @@ pub trait Dialect: Debug + Any { false } - /// Return true if the dialect supports specifying multiple options + /// Returns true if the dialect supports specifying multiple options /// in a `CREATE TABLE` statement for the structure of the new table. For example: /// `CREATE TABLE t (a INT, b INT) AS SELECT 1 AS b, 2 AS a` fn supports_create_table_multi_schema_info_sources(&self) -> bool { false } + /// Returns `true` if the dialect supports qualified column names + /// as part of a MERGE's INSERT's column list. Example: + /// + /// ```sql + /// MERGE INTO FOO + /// USING FOO_IMP + /// ON (FOO.ID = FOO_IMP.ID) + /// WHEN NOT MATCHED THEN + /// -- no qualifier + /// INSERT (ID, NAME) + /// VALUES (FOO_IMP.ID, UPPER(FOO_IMP.NAME)) + /// ``` + /// vs. + /// ```sql + /// MERGE INTO FOO + /// USING FOO_IMP + /// ON (FOO.ID = FOO_IMP.ID) + /// WHEN NOT MATCHED THEN + /// -- here: qualified + /// INSERT (FOO.ID, FOO.NAME) + /// VALUES (FOO_IMP.ID, UPPER(FOO_IMP.NAME)) + /// ``` + /// or + /// ```sql + /// MERGE INTO FOO X + /// USING FOO_IMP + /// ON (X.ID = FOO_IMP.ID) + /// WHEN NOT MATCHED THEN + /// -- here: qualified using the alias + /// INSERT (X.ID, X.NAME) + /// VALUES (FOO_IMP.ID, UPPER(FOO_IMP.NAME)) + /// ``` + /// + /// Note: in the latter case, the qualifier must match the target table + /// name or its alias if one is present. The parser will enforce this. + /// + /// The default implementation always returns `false` not allowing the + /// qualifiers. + fn supports_merge_insert_qualified_columns(&self) -> bool { + false + } + + /// Returns `true` if the dialect supports specify an INSERT predicate in + /// MERGE statements. Example: + /// + /// ```sql + /// MERGE INTO FOO + /// USING FOO_IMP + /// ON (FOO.ID = FOO_IMP.ID) + /// WHEN NOT MATCHED THEN + /// INSERT (ID, NAME) + /// VALUES (FOO_IMP.ID, UPPER(FOO_IMP.NAME)) + /// -- insert predicate + /// WHERE NOT FOO_IMP.NAME like '%.IGNORE' + /// ``` + /// + /// The default implementation always returns `false` indicating no + /// support for the additional predicate. + /// + /// See also [Dialect::supports_merge_update_predicate] and + /// [Dialect::supports_merge_update_delete_predicate]. + fn supports_merge_insert_predicate(&self) -> bool { + false + } + + /// Indicates the supports of UPDATE predicates in MERGE + /// statements. Example: + /// + /// ```sql + /// MERGE INTO FOO + /// USING FOO_IMPORT + /// ON (FOO.ID = FOO_IMPORT.ID) + /// WHEN MATCHED THEN + /// UPDATE SET FOO.NAME = FOO_IMPORT.NAME + /// -- update predicate + /// WHERE FOO.NAME <> 'pete' + /// ``` + /// + /// The default implementation always returns false indicating no support + /// for the additional predicate. + /// + /// See also [Dialect::supports_merge_insert_predicate] and + /// [Dialect::supports_merge_update_delete_predicate]. + fn supports_merge_update_predicate(&self) -> bool { + false + } + + /// Indicates the supports of UPDATE ... DELETEs and associated predicates + /// in MERGE statements. Example: + /// + /// ```sql + /// MERGE INTO FOO + /// USING FOO_IMPORT + /// ON (FOO.ID = FOO_IMPORT.ID) + /// WHEN MATCHED THEN + /// UPDATE SET FOO.NAME = FOO_IMPORT.NAME + /// -- update delete with predicate + /// DELETE WHERE UPPER(FOO.NAME) == FOO.NAME + /// ``` + /// + /// The default implementation always returns false indicating no support + /// for the `UPDATE ... DELETE` and its associated predicate. + /// + /// See also [Dialect::supports_merge_insert_predicate] and + /// [Dialect::supports_merge_update_predicate]. + fn supports_merge_update_delete_predicate(&self) -> bool { + false + } + /// Dialect-specific infix parser override /// /// This method is called to parse the next infix expression. diff --git a/src/parser/merge.rs b/src/parser/merge.rs new file mode 100644 index 000000000..992ed3917 --- /dev/null +++ b/src/parser/merge.rs @@ -0,0 +1,369 @@ +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +//! SQL Parser for MERGE + +#[cfg(not(feature = "std"))] +use alloc::{boxed::Box, format, string::ToString, vec, vec::Vec}; + +use crate::{ + ast::{ + Ident, MergeAction, MergeClause, MergeClauseKind, MergeInsertExpr, MergeInsertKind, + ObjectName, ObjectNamePart, SetExpr, Statement, TableFactor, + }, + dialect::{BigQueryDialect, GenericDialect, MySqlDialect}, + keywords::Keyword, + parser::IsOptional, + tokenizer::TokenWithSpan, +}; + +use super::{Parser, ParserError}; + +impl Parser<'_> { + /// Parse a MERGE statement, returning a `Box`ed SetExpr + /// + /// This is used to reduce the size of the stack frames in debug builds + pub(super) fn parse_merge_setexpr_boxed( + &mut self, + merge_token: TokenWithSpan, + ) -> Result, ParserError> { + Ok(Box::new(SetExpr::Merge(self.parse_merge(merge_token)?))) + } + + pub fn parse_merge(&mut self, merge_token: TokenWithSpan) -> Result { + let into = self.parse_keyword(Keyword::INTO); + + let table = self.parse_table_factor()?; + + self.expect_keyword_is(Keyword::USING)?; + let source = self.parse_table_factor()?; + self.expect_keyword_is(Keyword::ON)?; + let on = self.parse_expr()?; + let clauses = self.parse_merge_clauses(&table)?; + let output = match self.parse_one_of_keywords(&[Keyword::OUTPUT, Keyword::RETURNING]) { + Some(keyword) => Some(self.parse_output(keyword, self.get_current_token().clone())?), + None => None, + }; + + Ok(Statement::Merge { + merge_token: merge_token.into(), + into, + table, + source, + on: Box::new(on), + clauses, + output, + }) + } + + fn parse_merge_clauses( + &mut self, + target_table: &TableFactor, + ) -> Result, ParserError> { + let mut clauses = vec![]; + loop { + if !(self.parse_keyword(Keyword::WHEN)) { + break; + } + let when_token = self.get_current_token().clone(); + + let mut clause_kind = MergeClauseKind::Matched; + if self.parse_keyword(Keyword::NOT) { + clause_kind = MergeClauseKind::NotMatched; + } + self.expect_keyword_is(Keyword::MATCHED)?; + + if matches!(clause_kind, MergeClauseKind::NotMatched) + && self.parse_keywords(&[Keyword::BY, Keyword::SOURCE]) + { + clause_kind = MergeClauseKind::NotMatchedBySource; + } else if matches!(clause_kind, MergeClauseKind::NotMatched) + && self.parse_keywords(&[Keyword::BY, Keyword::TARGET]) + { + clause_kind = MergeClauseKind::NotMatchedByTarget; + } + + let predicate = if self.parse_keyword(Keyword::AND) { + Some(self.parse_expr()?) + } else { + None + }; + + self.expect_keyword_is(Keyword::THEN)?; + + let merge_clause = match self.parse_one_of_keywords(&[ + Keyword::UPDATE, + Keyword::INSERT, + Keyword::DELETE, + ]) { + Some(Keyword::UPDATE) => { + if matches!( + clause_kind, + MergeClauseKind::NotMatched | MergeClauseKind::NotMatchedByTarget + ) { + return parser_err!( + format_args!("UPDATE is not allowed in a {clause_kind} merge clause"), + self.get_current_token().span.start + ); + } + + let update_token = self.get_current_token().clone(); + self.expect_keyword_is(Keyword::SET)?; + let assignments = self.parse_comma_separated(Parser::parse_assignment)?; + let update_predicate = if self.dialect.supports_merge_update_predicate() + && self.parse_keyword(Keyword::WHERE) + { + Some(self.parse_expr()?) + } else { + None + }; + let delete_predicate = if self.dialect.supports_merge_update_delete_predicate() + && self.parse_keyword(Keyword::DELETE) + { + let _ = self.expect_keyword(Keyword::WHERE)?; + Some(self.parse_expr()?) + } else { + None + }; + MergeAction::Update { + update_token: update_token.into(), + assignments, + update_predicate, + delete_predicate, + } + } + Some(Keyword::DELETE) => { + if matches!( + clause_kind, + MergeClauseKind::NotMatched | MergeClauseKind::NotMatchedByTarget + ) { + return parser_err!( + format_args!("DELETE is not allowed in a {clause_kind} merge clause"), + self.get_current_token().span.start + ); + }; + + let delete_token = self.get_current_token().clone(); + MergeAction::Delete { + delete_token: delete_token.into(), + } + } + Some(Keyword::INSERT) => { + if !matches!( + clause_kind, + MergeClauseKind::NotMatched | MergeClauseKind::NotMatchedByTarget + ) { + return parser_err!( + format_args!("INSERT is not allowed in a {clause_kind} merge clause"), + self.get_current_token().span.start + ); + }; + + let insert_token = self.get_current_token().clone(); + let is_mysql = dialect_of!(self is MySqlDialect); + + let columns = self.parse_merge_clause_insert_columns( + target_table, + &clause_kind, + is_mysql, + )?; + let (kind, kind_token) = if dialect_of!(self is BigQueryDialect | GenericDialect) + && self.parse_keyword(Keyword::ROW) + { + (MergeInsertKind::Row, self.get_current_token().clone()) + } else { + self.expect_keyword_is(Keyword::VALUES)?; + let values_token = self.get_current_token().clone(); + let values = self.parse_values(is_mysql, false)?; + (MergeInsertKind::Values(values), values_token) + }; + let insert_predicate = if self.dialect.supports_merge_insert_predicate() + && self.parse_keyword(Keyword::WHERE) + { + Some(self.parse_expr()?) + } else { + None + }; + + MergeAction::Insert(MergeInsertExpr { + insert_token: insert_token.into(), + columns, + kind_token: kind_token.into(), + kind, + insert_predicate, + }) + } + _ => { + return parser_err!( + "expected UPDATE, DELETE or INSERT in merge clause", + self.peek_token_ref().span.start + ); + } + }; + clauses.push(MergeClause { + when_token: when_token.into(), + clause_kind, + predicate, + action: merge_clause, + }); + } + Ok(clauses) + } + + fn parse_merge_clause_insert_columns( + &mut self, + target_table: &TableFactor, + clause_kind: &MergeClauseKind, + allow_empty: bool, + ) -> Result, ParserError> { + if self.dialect.supports_merge_insert_qualified_columns() { + let cols = + self.parse_parenthesized_qualified_column_list(IsOptional::Optional, allow_empty)?; + if let TableFactor::Table { name, alias, .. } = target_table { + if let Some(alias) = alias { + if alias.columns.is_empty() { + // ~ only the alias is supported at this point + unqualify_columns(cols, None, Some(&alias.name)).map_err(|e| { + ParserError::ParserError(format!( + "Invalid column for INSERT in a {clause_kind} merge clause: {e}" + )) + }) + } else { + Err(ParserError::ParserError(format!( + "Invalid target ALIAS for INSERT in a {clause_kind} merge clause; must be an identifier" + ))) + } + } else { + // ~ allow the full qualifier, but also just the table name + if name.0.len() == 1 { + unqualify_columns(cols, Some(name), None).map_err(|e| { + ParserError::ParserError(format!( + "Invalid column for INSERT in a {clause_kind} merge clause: {e}" + )) + }) + } else if let Some(table_name) = + name.0.last().and_then(ObjectNamePart::as_ident) + { + unqualify_columns(cols, Some(name), Some(table_name)).map_err(|e| { + ParserError::ParserError(format!( + "Invalid column for INSERT in a {clause_kind} merge clause: {e}" + )) + }) + } else { + Err(ParserError::ParserError(format!( + "Invalid target table NAME for INSERT in a {clause_kind} merge clause; must be an identifier" + ))) + } + } + } else { + Err(ParserError::ParserError(format!( + "Invalid target for INSERT in a {clause_kind} merge clause; must be a TABLE identifier" + ))) + } + } else { + self.parse_parenthesized_column_list(IsOptional::Optional, allow_empty) + } + } +} + +/// Helper to unqualify a list of columns with either a qualified prefix or a +/// qualifier identifier +/// +/// Oracle allows `INSERT ([qualifier.]column_name, ...)` in MERGE statements +/// with `qualifier` referring to the alias of the target table (if one is +/// present) or, if no alias is present, to the target table name itself - +/// either qualified or unqualified. +fn unqualify_columns( + columns: Vec, + allowed_qualifier_1: Option<&ObjectName>, + allowed_qualifier_2: Option<&Ident>, +) -> Result, &'static str> { + // ~ helper to turn a column name (part) into a plain `ident` + // possibly bailing with error + fn to_ident(name: ObjectNamePart) -> Result { + match name { + ObjectNamePart::Identifier(ident) => Ok(ident), + ObjectNamePart::Function(_) => Err("not an identifier"), + } + } + + // ~ helper to return the last part of `name` if it is + // preceded by `prefix` + fn unqualify_column( + mut name: ObjectName, + prefix: &ObjectName, + ) -> Result { + let mut name_iter = name.0.iter(); + let mut prefix_iter = prefix.0.iter(); + loop { + match (name_iter.next(), prefix_iter.next()) { + (Some(_), None) => { + if name_iter.next().is_none() { + return Ok(name.0.pop().expect("missing name part")); + } else { + return Err(name); + } + } + (Some(c), Some(q)) if c == q => { + // ~ continue matching next part + } + _ => { + return Err(name); + } + } + } + } + + let mut unqualified = Vec::::with_capacity(columns.len()); + for mut name in columns { + if name.0.is_empty() { + return Err("empty column name"); + } + + if name.0.len() == 1 { + unqualified.push(to_ident(name.0.pop().expect("missing name part"))?); + continue; + } + + // ~ try matching by the primary prefix + if let Some(allowed_qualifier) = allowed_qualifier_1 { + match unqualify_column(name, allowed_qualifier) { + Ok(ident) => { + unqualified.push(to_ident(ident)?); + continue; + } + Err(n) => { + // ~ continue trying with the alternate prefix below + name = n; + } + } + } + + // ~ try matching by the alternate prefix + if let Some(allowed_qualifier) = allowed_qualifier_2 { + if name.0.len() == 2 + && name + .0 + .first() + .and_then(ObjectNamePart::as_ident) + .map(|i| i == allowed_qualifier) + .unwrap_or(false) + { + unqualified.push(to_ident(name.0.pop().expect("missing name part"))?); + continue; + } + } + + return Err("not matching target table"); + } + Ok(unqualified) +} diff --git a/src/parser/mod.rs b/src/parser/mod.rs index f3daf628a..32fa6e7ec 100644 --- a/src/parser/mod.rs +++ b/src/parser/mod.rs @@ -45,8 +45,6 @@ use crate::keywords::{Keyword, ALL_KEYWORDS}; use crate::tokenizer::*; use sqlparser::parser::ParserState::ColumnDefinition; -mod alter; - #[derive(Debug, Clone, PartialEq, Eq)] pub enum ParserError { TokenizerError(String), @@ -61,6 +59,9 @@ macro_rules! parser_err { }; } +mod alter; +mod merge; + #[cfg(feature = "std")] /// Implementation [`RecursionCounter`] if std is available mod recursion { @@ -11710,7 +11711,7 @@ impl<'a> Parser<'a> { token => { return Err(ParserError::ParserError(format!( "Unexpected token in identifier: {token}" - )))? + )))?; } } } @@ -12214,16 +12215,6 @@ impl<'a> Parser<'a> { Ok(Box::new(SetExpr::Delete(self.parse_delete(delete_token)?))) } - /// Parse a MERGE statement, returning a `Box`ed SetExpr - /// - /// This is used to reduce the size of the stack frames in debug builds - fn parse_merge_setexpr_boxed( - &mut self, - merge_token: TokenWithSpan, - ) -> Result, ParserError> { - Ok(Box::new(SetExpr::Merge(self.parse_merge(merge_token)?))) - } - pub fn parse_delete(&mut self, delete_token: TokenWithSpan) -> Result { let (tables, with_from_keyword) = if !self.parse_keyword(Keyword::FROM) { // `FROM` keyword is optional in BigQuery SQL. @@ -17294,126 +17285,6 @@ impl<'a> Parser<'a> { }) } - pub fn parse_merge_clauses(&mut self) -> Result, ParserError> { - let mut clauses = vec![]; - loop { - if !(self.parse_keyword(Keyword::WHEN)) { - break; - } - let when_token = self.get_current_token().clone(); - - let mut clause_kind = MergeClauseKind::Matched; - if self.parse_keyword(Keyword::NOT) { - clause_kind = MergeClauseKind::NotMatched; - } - self.expect_keyword_is(Keyword::MATCHED)?; - - if matches!(clause_kind, MergeClauseKind::NotMatched) - && self.parse_keywords(&[Keyword::BY, Keyword::SOURCE]) - { - clause_kind = MergeClauseKind::NotMatchedBySource; - } else if matches!(clause_kind, MergeClauseKind::NotMatched) - && self.parse_keywords(&[Keyword::BY, Keyword::TARGET]) - { - clause_kind = MergeClauseKind::NotMatchedByTarget; - } - - let predicate = if self.parse_keyword(Keyword::AND) { - Some(self.parse_expr()?) - } else { - None - }; - - self.expect_keyword_is(Keyword::THEN)?; - - let merge_clause = match self.parse_one_of_keywords(&[ - Keyword::UPDATE, - Keyword::INSERT, - Keyword::DELETE, - ]) { - Some(Keyword::UPDATE) => { - if matches!( - clause_kind, - MergeClauseKind::NotMatched | MergeClauseKind::NotMatchedByTarget - ) { - return parser_err!( - format_args!("UPDATE is not allowed in a {clause_kind} merge clause"), - self.get_current_token().span.start - ); - } - - let update_token = self.get_current_token().clone(); - self.expect_keyword_is(Keyword::SET)?; - MergeAction::Update { - update_token: update_token.into(), - assignments: self.parse_comma_separated(Parser::parse_assignment)?, - } - } - Some(Keyword::DELETE) => { - if matches!( - clause_kind, - MergeClauseKind::NotMatched | MergeClauseKind::NotMatchedByTarget - ) { - return parser_err!( - format_args!("DELETE is not allowed in a {clause_kind} merge clause"), - self.get_current_token().span.start - ); - }; - - let delete_token = self.get_current_token().clone(); - MergeAction::Delete { - delete_token: delete_token.into(), - } - } - Some(Keyword::INSERT) => { - if !matches!( - clause_kind, - MergeClauseKind::NotMatched | MergeClauseKind::NotMatchedByTarget - ) { - return parser_err!( - format_args!("INSERT is not allowed in a {clause_kind} merge clause"), - self.get_current_token().span.start - ); - }; - - let insert_token = self.get_current_token().clone(); - let is_mysql = dialect_of!(self is MySqlDialect); - - let columns = self.parse_parenthesized_column_list(Optional, is_mysql)?; - let (kind, kind_token) = if dialect_of!(self is BigQueryDialect | GenericDialect) - && self.parse_keyword(Keyword::ROW) - { - (MergeInsertKind::Row, self.get_current_token().clone()) - } else { - self.expect_keyword_is(Keyword::VALUES)?; - let values_token = self.get_current_token().clone(); - let values = self.parse_values(is_mysql, false)?; - (MergeInsertKind::Values(values), values_token) - }; - MergeAction::Insert(MergeInsertExpr { - insert_token: insert_token.into(), - columns, - kind_token: kind_token.into(), - kind, - }) - } - _ => { - return parser_err!( - "expected UPDATE, DELETE or INSERT in merge clause", - self.peek_token_ref().span.start - ); - } - }; - clauses.push(MergeClause { - when_token: when_token.into(), - clause_kind, - predicate, - action: merge_clause, - }); - } - Ok(clauses) - } - fn parse_output( &mut self, start_keyword: Keyword, @@ -17457,32 +17328,6 @@ impl<'a> Parser<'a> { }) } - pub fn parse_merge(&mut self, merge_token: TokenWithSpan) -> Result { - let into = self.parse_keyword(Keyword::INTO); - - let table = self.parse_table_factor()?; - - self.expect_keyword_is(Keyword::USING)?; - let source = self.parse_table_factor()?; - self.expect_keyword_is(Keyword::ON)?; - let on = self.parse_expr()?; - let clauses = self.parse_merge_clauses()?; - let output = match self.parse_one_of_keywords(&[Keyword::OUTPUT, Keyword::RETURNING]) { - Some(keyword) => Some(self.parse_output(keyword, self.get_current_token().clone())?), - None => None, - }; - - Ok(Statement::Merge { - merge_token: merge_token.into(), - into, - table, - source, - on: Box::new(on), - clauses, - output, - }) - } - fn parse_pragma_value(&mut self) -> Result { match self.parse_value()?.value { v @ Value::SingleQuotedString(_) => Ok(v), diff --git a/tests/sqlparser_bigquery.rs b/tests/sqlparser_bigquery.rs index f2b9f2aff..7e82be3cc 100644 --- a/tests/sqlparser_bigquery.rs +++ b/tests/sqlparser_bigquery.rs @@ -1813,6 +1813,7 @@ fn parse_merge() { explicit_row: false, rows: vec![vec![Expr::value(number("1")), Expr::value(number("2"))]], }), + insert_predicate: None, }); let update_action = MergeAction::Update { update_token: AttachedToken::empty(), @@ -1826,6 +1827,8 @@ fn parse_merge() { value: Expr::value(number("2")), }, ], + update_predicate: None, + delete_predicate: None, }; match bigquery_and_generic().verified_stmt(sql) { @@ -1920,6 +1923,7 @@ fn parse_merge() { columns: vec![Ident::new("product"), Ident::new("quantity"),], kind_token: AttachedToken::empty(), kind: MergeInsertKind::Row, + insert_predicate: None, }) }, MergeClause { @@ -1931,6 +1935,7 @@ fn parse_merge() { columns: vec![Ident::new("product"), Ident::new("quantity"),], kind_token: AttachedToken::empty(), kind: MergeInsertKind::Row, + insert_predicate: None, }) }, MergeClause { @@ -1941,7 +1946,8 @@ fn parse_merge() { insert_token: AttachedToken::empty(), columns: vec![], kind_token: AttachedToken::empty(), - kind: MergeInsertKind::Row + kind: MergeInsertKind::Row, + insert_predicate: None, }) }, MergeClause { @@ -1952,7 +1958,8 @@ fn parse_merge() { insert_token: AttachedToken::empty(), columns: vec![], kind_token: AttachedToken::empty(), - kind: MergeInsertKind::Row + kind: MergeInsertKind::Row, + insert_predicate: None, }) }, MergeClause { @@ -1984,7 +1991,8 @@ fn parse_merge() { Expr::value(number("1")), Expr::Identifier(Ident::new("DEFAULT")), ]] - }) + }), + insert_predicate: None, }) }, MergeClause { @@ -2002,7 +2010,8 @@ fn parse_merge() { Expr::value(number("1")), Expr::Identifier(Ident::new("DEFAULT")), ]] - }) + }), + insert_predicate: None, }) }, ], diff --git a/tests/sqlparser_common.rs b/tests/sqlparser_common.rs index 91952b8c0..0d27f069c 100644 --- a/tests/sqlparser_common.rs +++ b/tests/sqlparser_common.rs @@ -1637,6 +1637,10 @@ fn ms_and_generic() -> TestedDialects { TestedDialects::new(vec![Box::new(MsSqlDialect {}), Box::new(GenericDialect {})]) } +fn only_generic() -> TestedDialects { + TestedDialects::new(vec![Box::new(GenericDialect {})]) +} + #[test] fn parse_json_ops_without_colon() { use self::BinaryOperator::*; @@ -9937,6 +9941,7 @@ fn parse_merge() { ]), ]] }), + insert_predicate: None, }), }, MergeClause { @@ -9976,6 +9981,8 @@ fn parse_merge() { ]), }, ], + update_predicate: None, + delete_predicate: None, }, }, MergeClause { @@ -10068,6 +10075,76 @@ fn test_merge_with_delimiter() { } } +#[test] +fn test_merge_with_predicates() { + let sql = "\ +MERGE INTO FOO \ +USING FOO_IMPORT \ +ON (FOO.ID = FOO_IMPORT.ID) \ +WHEN MATCHED THEN \ +UPDATE SET FOO.NAME = FOO_IMPORT.NAME \ +WHERE 1 = 1 \ +DELETE WHERE FOO.NAME LIKE '%.DELETE' \ +WHEN NOT MATCHED THEN \ +INSERT (ID, NAME) \ +VALUES (FOO_IMPORT.ID, UPPER(FOO_IMPORT.NAME)) \ +WHERE NOT FOO_IMPORT.NAME LIKE '%.DO_NOT_INSERT'"; + only_generic().verified_stmt(sql); +} + +#[test] +fn test_merge_with_insert_qualified_columns() { + let sql = "\ +MERGE INTO FOO USING FOO_IMPORT ON (FOO.ID = FOO_IMPORT.ID) \ +WHEN NOT MATCHED THEN \ +INSERT (FOO.ID, FOO.NAME) \ +VALUES (1, 2)"; + + let expected = "\ +MERGE INTO FOO USING FOO_IMPORT ON (FOO.ID = FOO_IMPORT.ID) \ +WHEN NOT MATCHED THEN \ +INSERT (ID, NAME) \ +VALUES (1, 2)"; + + only_generic().one_statement_parses_to(sql, expected); +} + +#[test] +fn test_merge_with_insert_qualified_columns_via_alias() { + let sql = "\ +MERGE INTO FOO F USING FOO_IMPORT ON (F.ID = FOO_IMPORT.ID) \ +WHEN NOT MATCHED THEN \ +INSERT (F.ID, F.NAME) \ +VALUES (1, 2)"; + + // note: this serialized form will break execution on an Oracle database + // as it doesn't allow the "AS" keyword; Issue #1784 + let expected = "\ +MERGE INTO FOO AS F USING FOO_IMPORT ON (F.ID = FOO_IMPORT.ID) \ +WHEN NOT MATCHED THEN \ +INSERT (ID, NAME) \ +VALUES (1, 2)"; + + only_generic().one_statement_parses_to(sql, expected); +} + +#[test] +fn test_merge_with_insert_qualified_columns_with_schema() { + let sql = "\ +MERGE INTO PLAYGROUND.FOO USING FOO_IMPORT ON (PLAYGROUND.FOO.ID = FOO_IMPORT.ID) \ +WHEN NOT MATCHED THEN \ +INSERT (PLAYGROUND.FOO.ID, PLAYGROUND.FOO.NAME) \ +VALUES (1, 2)"; + + let expected = "\ +MERGE INTO PLAYGROUND.FOO USING FOO_IMPORT ON (PLAYGROUND.FOO.ID = FOO_IMPORT.ID) \ +WHEN NOT MATCHED THEN \ +INSERT (ID, NAME) \ +VALUES (1, 2)"; + + only_generic().one_statement_parses_to(sql, expected); +} + #[test] fn test_merge_invalid_statements() { let dialects = all_dialects(); From 3953c5086617715b42ec44966da2b47b53ab0ec6 Mon Sep 17 00:00:00 2001 From: Petr Novotnik Date: Sun, 23 Nov 2025 09:11:43 +0100 Subject: [PATCH 2/8] `Merge` as top-level struct --- src/ast/dml.rs | 346 +++++++++++++++++++++++++++++++++++- src/ast/mod.rs | 326 +-------------------------------- src/ast/spans.rs | 79 ++++---- src/parser/merge.rs | 39 +++- src/parser/mod.rs | 27 --- tests/sqlparser_bigquery.rs | 8 +- tests/sqlparser_common.rs | 12 +- 7 files changed, 431 insertions(+), 406 deletions(-) diff --git a/src/ast/dml.rs b/src/ast/dml.rs index d6009ce8a..cf689f703 100644 --- a/src/ast/dml.rs +++ b/src/ast/dml.rs @@ -24,13 +24,16 @@ use serde::{Deserialize, Serialize}; #[cfg(feature = "visitor")] use sqlparser_derive::{Visit, VisitMut}; -use crate::display_utils::{indented_list, Indent, SpaceOrNewline}; +use crate::{ + ast::display_separated, + display_utils::{indented_list, Indent, SpaceOrNewline}, +}; use super::{ display_comma_separated, helpers::attached_token::AttachedToken, query::InputFormatClause, Assignment, Expr, FromTable, Ident, InsertAliases, MysqlInsertPriority, ObjectName, OnInsert, - OrderByExpr, Query, SelectItem, Setting, SqliteOnConflict, TableObject, TableWithJoins, - UpdateTableFromKind, + OrderByExpr, Query, SelectInto, SelectItem, Setting, SqliteOnConflict, TableFactor, + TableObject, TableWithJoins, UpdateTableFromKind, Values, }; /// INSERT statement. @@ -310,3 +313,340 @@ impl Display for Update { Ok(()) } } + +/// MERGE statement. +#[derive(Debug, Clone, PartialEq, PartialOrd, Eq, Ord, Hash)] +#[cfg_attr(feature = "serde", derive(Serialize, Deserialize))] +#[cfg_attr(feature = "visitor", derive(Visit, VisitMut))] +pub struct Merge { + /// The `MERGE` token that starts the statement. + pub merge_token: AttachedToken, + /// optional INTO keyword + pub into: bool, + /// Specifies the table to merge + pub table: TableFactor, + /// Specifies the table or subquery to join with the target table + pub source: TableFactor, + /// Specifies the expression on which to join the target table and source + pub on: Box, + /// Specifies the actions to perform when values match or do not match. + pub clauses: Vec, + // Specifies the output to save changes in MSSQL + pub output: Option, +} + +impl Display for Merge { + fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { + write!( + f, + "MERGE{int} {table} USING {source} ", + int = if self.into { " INTO" } else { "" }, + table = self.table, + source = self.source, + )?; + write!(f, "ON {on} ", on = self.on)?; + write!(f, "{}", display_separated(&self.clauses, " "))?; + if let Some(ref output) = self.output { + write!(f, " {output}")?; + } + Ok(()) + } +} + +/// A `WHEN` clause within a `MERGE` Statement +/// +/// Example: +/// ```sql +/// WHEN NOT MATCHED BY SOURCE AND product LIKE '%washer%' THEN DELETE +/// ``` +/// [Snowflake](https://docs.snowflake.com/en/sql-reference/sql/merge) +/// [BigQuery](https://cloud.google.com/bigquery/docs/reference/standard-sql/dml-syntax#merge_statement) +#[derive(Debug, Clone, PartialEq, PartialOrd, Eq, Ord, Hash)] +#[cfg_attr(feature = "serde", derive(Serialize, Deserialize))] +#[cfg_attr(feature = "visitor", derive(Visit, VisitMut))] +pub struct MergeClause { + /// The `WHEN` token that starts the sub-expression. + pub when_token: AttachedToken, + pub clause_kind: MergeClauseKind, + pub predicate: Option, + pub action: MergeAction, +} + +impl Display for MergeClause { + fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result { + let MergeClause { + when_token: _, + clause_kind, + predicate, + action, + } = self; + + write!(f, "WHEN {clause_kind}")?; + if let Some(pred) = predicate { + write!(f, " AND {pred}")?; + } + write!(f, " THEN {action}") + } +} + +/// Variant of `WHEN` clause used within a `MERGE` Statement. +/// +/// Example: +/// ```sql +/// MERGE INTO T USING U ON FALSE WHEN MATCHED THEN DELETE +/// ``` +/// [Snowflake](https://docs.snowflake.com/en/sql-reference/sql/merge) +/// [BigQuery](https://cloud.google.com/bigquery/docs/reference/standard-sql/dml-syntax#merge_statement) +#[derive(Debug, Clone, PartialEq, PartialOrd, Eq, Ord, Hash)] +#[cfg_attr(feature = "serde", derive(Serialize, Deserialize))] +#[cfg_attr(feature = "visitor", derive(Visit, VisitMut))] +pub enum MergeClauseKind { + /// `WHEN MATCHED` + Matched, + /// `WHEN NOT MATCHED` + NotMatched, + /// `WHEN MATCHED BY TARGET` + /// + /// [BigQuery](https://cloud.google.com/bigquery/docs/reference/standard-sql/dml-syntax#merge_statement) + NotMatchedByTarget, + /// `WHEN MATCHED BY SOURCE` + /// + /// [BigQuery](https://cloud.google.com/bigquery/docs/reference/standard-sql/dml-syntax#merge_statement) + NotMatchedBySource, +} + +impl Display for MergeClauseKind { + fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result { + match self { + MergeClauseKind::Matched => write!(f, "MATCHED"), + MergeClauseKind::NotMatched => write!(f, "NOT MATCHED"), + MergeClauseKind::NotMatchedByTarget => write!(f, "NOT MATCHED BY TARGET"), + MergeClauseKind::NotMatchedBySource => write!(f, "NOT MATCHED BY SOURCE"), + } + } +} + +/// Underlying statement of a `WHEN` clause within a `MERGE` Statement +/// +/// Example +/// ```sql +/// INSERT (product, quantity) VALUES(product, quantity) +/// ``` +/// +/// [Snowflake](https://docs.snowflake.com/en/sql-reference/sql/merge) +/// [BigQuery](https://cloud.google.com/bigquery/docs/reference/standard-sql/dml-syntax#merge_statement) +/// [Oracle](https://docs.oracle.com/en/database/oracle/oracle-database/21/sqlrf/MERGE.html) +#[derive(Debug, Clone, PartialEq, PartialOrd, Eq, Ord, Hash)] +#[cfg_attr(feature = "serde", derive(Serialize, Deserialize))] +#[cfg_attr(feature = "visitor", derive(Visit, VisitMut))] +pub enum MergeAction { + /// An `INSERT` clause + /// + /// Example: + /// ```sql + /// INSERT (product, quantity) VALUES(product, quantity) + /// ``` + Insert(MergeInsertExpr), + /// An `UPDATE` clause + /// + /// Example: + /// ```sql + /// UPDATE SET quantity = T.quantity + S.quantity + /// ``` + Update(MergeUpdateExpr), + /// A plain `DELETE` clause + Delete { + /// The `DELETE` token that starts the sub-expression. + delete_token: AttachedToken, + }, +} + +impl Display for MergeAction { + fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result { + match self { + MergeAction::Insert(insert) => { + write!(f, "INSERT {insert}") + } + MergeAction::Update(update) => { + write!(f, "UPDATE {update}") + } + MergeAction::Delete { .. } => { + write!(f, "DELETE") + } + } + } +} + +/// The type of expression used to insert rows within a `MERGE` statement. +/// +/// [Snowflake](https://docs.snowflake.com/en/sql-reference/sql/merge) +/// [BigQuery](https://cloud.google.com/bigquery/docs/reference/standard-sql/dml-syntax#merge_statement) +#[derive(Debug, Clone, PartialEq, PartialOrd, Eq, Ord, Hash)] +#[cfg_attr(feature = "serde", derive(Serialize, Deserialize))] +#[cfg_attr(feature = "visitor", derive(Visit, VisitMut))] +pub enum MergeInsertKind { + /// The insert expression is defined from an explicit `VALUES` clause + /// + /// Example: + /// ```sql + /// INSERT VALUES(product, quantity) + /// ``` + Values(Values), + /// The insert expression is defined using only the `ROW` keyword. + /// + /// Example: + /// ```sql + /// INSERT ROW + /// ``` + /// [BigQuery](https://cloud.google.com/bigquery/docs/reference/standard-sql/dml-syntax#merge_statement) + Row, +} + +impl Display for MergeInsertKind { + fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result { + match self { + MergeInsertKind::Values(values) => { + write!(f, "{values}") + } + MergeInsertKind::Row => { + write!(f, "ROW") + } + } + } +} + +/// The expression used to insert rows within a `MERGE` statement. +/// +/// Examples +/// ```sql +/// INSERT (product, quantity) VALUES(product, quantity) +/// INSERT ROW +/// ``` +/// +/// [Snowflake](https://docs.snowflake.com/en/sql-reference/sql/merge) +/// [BigQuery](https://cloud.google.com/bigquery/docs/reference/standard-sql/dml-syntax#merge_statement) +/// [Oracle](https://docs.oracle.com/en/database/oracle/oracle-database/21/sqlrf/MERGE.html) +#[derive(Debug, Clone, PartialEq, PartialOrd, Eq, Ord, Hash)] +#[cfg_attr(feature = "serde", derive(Serialize, Deserialize))] +#[cfg_attr(feature = "visitor", derive(Visit, VisitMut))] +pub struct MergeInsertExpr { + /// The `INSERT` token that starts the sub-expression. + pub insert_token: AttachedToken, + /// Columns (if any) specified by the insert. + /// + /// Example: + /// ```sql + /// INSERT (product, quantity) VALUES(product, quantity) + /// INSERT (product, quantity) ROW + /// ``` + pub columns: Vec, + /// The token, `[VALUES | ROW]` starting `kind`. + pub kind_token: AttachedToken, + /// The insert type used by the statement. + pub kind: MergeInsertKind, + /// An optional condition to restrict the insertion (Oracle specific) + /// + /// Enabled via [`Dialect::supports_merge_insert_predicate`](crate::dialect::Dialect::supports_merge_insert_predicate). + pub insert_predicate: Option, +} + +impl Display for MergeInsertExpr { + fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result { + if !self.columns.is_empty() { + write!(f, "({}) ", display_comma_separated(self.columns.as_slice()))?; + } + write!(f, "{}", self.kind)?; + if let Some(predicate) = self.insert_predicate.as_ref() { + write!(f, " WHERE {}", predicate)?; + } + Ok(()) + } +} + +/// The expression used to update rows within a `MERGE` statement. +/// +/// Examples +/// ```sql +/// UPDATE SET quantity = T.quantity + S.quantity +/// ``` +/// +/// [Snowflake](https://docs.snowflake.com/en/sql-reference/sql/merge) +/// [BigQuery](https://cloud.google.com/bigquery/docs/reference/standard-sql/dml-syntax#merge_statement) +/// [Oracle](https://docs.oracle.com/en/database/oracle/oracle-database/21/sqlrf/MERGE.html) +#[derive(Debug, Clone, PartialEq, PartialOrd, Eq, Ord, Hash)] +#[cfg_attr(feature = "serde", derive(Serialize, Deserialize))] +#[cfg_attr(feature = "visitor", derive(Visit, VisitMut))] +pub struct MergeUpdateExpr { + /// The `UPDATE` token that starts the sub-expression. + pub update_token: AttachedToken, + /// The update assiment expressions + pub assignments: Vec, + /// `where_clause` for the update (Oralce specific) + /// + /// Enabled via [`Dialect::supports_merge_update_predicate`](crate::dialect::Dialect::supports_merge_update_predicate). + pub update_predicate: Option, + /// `delete_clause` for the update "delete where" (Oracle specific) + /// + /// Enabled via [`Dialect::supports_merge_update_delete_predicate`](crate::dialect::Dialect::supports_merge_update_delete_predicate). + pub delete_predicate: Option, +} + +impl Display for MergeUpdateExpr { + fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { + write!(f, "SET {}", display_comma_separated(&self.assignments))?; + if let Some(predicate) = self.update_predicate.as_ref() { + write!(f, " WHERE {predicate}")?; + } + if let Some(predicate) = self.delete_predicate.as_ref() { + write!(f, " DELETE WHERE {predicate}")?; + } + Ok(()) + } +} + +/// A `OUTPUT` Clause in the end of a `MERGE` Statement +/// +/// Example: +/// OUTPUT $action, deleted.* INTO dbo.temp_products; +/// [mssql](https://learn.microsoft.com/en-us/sql/t-sql/queries/output-clause-transact-sql) +#[derive(Debug, Clone, PartialEq, PartialOrd, Eq, Ord, Hash)] +#[cfg_attr(feature = "serde", derive(Serialize, Deserialize))] +#[cfg_attr(feature = "visitor", derive(Visit, VisitMut))] +pub enum OutputClause { + Output { + output_token: AttachedToken, + select_items: Vec, + into_table: Option, + }, + Returning { + returning_token: AttachedToken, + select_items: Vec, + }, +} + +impl fmt::Display for OutputClause { + fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result { + match self { + OutputClause::Output { + output_token: _, + select_items, + into_table, + } => { + f.write_str("OUTPUT ")?; + display_comma_separated(select_items).fmt(f)?; + if let Some(into_table) = into_table { + f.write_str(" ")?; + into_table.fmt(f)?; + } + Ok(()) + } + OutputClause::Returning { + returning_token: _, + select_items, + } => { + f.write_str("RETURNING ")?; + display_comma_separated(select_items).fmt(f) + } + } + } +} diff --git a/src/ast/mod.rs b/src/ast/mod.rs index acc8b8a72..0684d305a 100644 --- a/src/ast/mod.rs +++ b/src/ast/mod.rs @@ -77,7 +77,10 @@ pub use self::ddl::{ UserDefinedTypeRangeOption, UserDefinedTypeRepresentation, UserDefinedTypeSqlDefinitionOption, UserDefinedTypeStorage, ViewColumnDef, }; -pub use self::dml::{Delete, Insert, Update}; +pub use self::dml::{ + Delete, Insert, Merge, MergeAction, MergeClause, MergeClauseKind, MergeInsertExpr, + MergeInsertKind, MergeUpdateExpr, OutputClause, Update, +}; pub use self::operator::{BinaryOperator, UnaryOperator}; pub use self::query::{ AfterMatchSkip, ConnectBy, Cte, CteAsMaterialized, Distinct, EmptyMatchesMode, @@ -4076,22 +4079,7 @@ pub enum Statement { /// [Snowflake](https://docs.snowflake.com/en/sql-reference/sql/merge) /// [BigQuery](https://cloud.google.com/bigquery/docs/reference/standard-sql/dml-syntax#merge_statement) /// [MSSQL](https://learn.microsoft.com/en-us/sql/t-sql/statements/merge-transact-sql?view=sql-server-ver16) - Merge { - /// The `MERGE` token that starts the statement. - merge_token: AttachedToken, - /// optional INTO keyword - into: bool, - /// Specifies the table to merge - table: TableFactor, - /// Specifies the table or subquery to join with the target table - source: TableFactor, - /// Specifies the expression on which to join the target table and source - on: Box, - /// Specifies the actions to perform when values match or do not match. - clauses: Vec, - // Specifies the output to save changes in MSSQL - output: Option, - }, + Merge(Merge), /// ```sql /// CACHE [ FLAG ] TABLE [ OPTIONS('K1' = 'V1', 'K2' = V2) ] [ AS ] [ ] /// ``` @@ -5505,27 +5493,7 @@ impl fmt::Display for Statement { Statement::ReleaseSavepoint { name } => { write!(f, "RELEASE SAVEPOINT {name}") } - Statement::Merge { - merge_token: _, - into, - table, - source, - on, - clauses, - output, - } => { - write!( - f, - "MERGE{int} {table} USING {source} ", - int = if *into { " INTO" } else { "" } - )?; - write!(f, "ON {on} ")?; - write!(f, "{}", display_separated(clauses, " "))?; - if let Some(output) = output { - write!(f, " {output}")?; - } - Ok(()) - } + Statement::Merge(merge) => merge.fmt(f), Statement::Cache { table_name, table_flag, @@ -8550,288 +8518,6 @@ impl fmt::Display for CopyLegacyCsvOption { } } -/// Variant of `WHEN` clause used within a `MERGE` Statement. -/// -/// Example: -/// ```sql -/// MERGE INTO T USING U ON FALSE WHEN MATCHED THEN DELETE -/// ``` -/// [Snowflake](https://docs.snowflake.com/en/sql-reference/sql/merge) -/// [BigQuery](https://cloud.google.com/bigquery/docs/reference/standard-sql/dml-syntax#merge_statement) -#[derive(Debug, Clone, PartialEq, PartialOrd, Eq, Ord, Hash)] -#[cfg_attr(feature = "serde", derive(Serialize, Deserialize))] -#[cfg_attr(feature = "visitor", derive(Visit, VisitMut))] -pub enum MergeClauseKind { - /// `WHEN MATCHED` - Matched, - /// `WHEN NOT MATCHED` - NotMatched, - /// `WHEN MATCHED BY TARGET` - /// - /// [BigQuery](https://cloud.google.com/bigquery/docs/reference/standard-sql/dml-syntax#merge_statement) - NotMatchedByTarget, - /// `WHEN MATCHED BY SOURCE` - /// - /// [BigQuery](https://cloud.google.com/bigquery/docs/reference/standard-sql/dml-syntax#merge_statement) - NotMatchedBySource, -} - -impl Display for MergeClauseKind { - fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result { - match self { - MergeClauseKind::Matched => write!(f, "MATCHED"), - MergeClauseKind::NotMatched => write!(f, "NOT MATCHED"), - MergeClauseKind::NotMatchedByTarget => write!(f, "NOT MATCHED BY TARGET"), - MergeClauseKind::NotMatchedBySource => write!(f, "NOT MATCHED BY SOURCE"), - } - } -} - -/// The type of expression used to insert rows within a `MERGE` statement. -/// -/// [Snowflake](https://docs.snowflake.com/en/sql-reference/sql/merge) -/// [BigQuery](https://cloud.google.com/bigquery/docs/reference/standard-sql/dml-syntax#merge_statement) -#[derive(Debug, Clone, PartialEq, PartialOrd, Eq, Ord, Hash)] -#[cfg_attr(feature = "serde", derive(Serialize, Deserialize))] -#[cfg_attr(feature = "visitor", derive(Visit, VisitMut))] -pub enum MergeInsertKind { - /// The insert expression is defined from an explicit `VALUES` clause - /// - /// Example: - /// ```sql - /// INSERT VALUES(product, quantity) - /// ``` - Values(Values), - /// The insert expression is defined using only the `ROW` keyword. - /// - /// Example: - /// ```sql - /// INSERT ROW - /// ``` - /// [BigQuery](https://cloud.google.com/bigquery/docs/reference/standard-sql/dml-syntax#merge_statement) - Row, -} - -impl Display for MergeInsertKind { - fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result { - match self { - MergeInsertKind::Values(values) => { - write!(f, "{values}") - } - MergeInsertKind::Row => { - write!(f, "ROW") - } - } - } -} - -/// The expression used to insert rows within a `MERGE` statement. -/// -/// Examples -/// ```sql -/// INSERT (product, quantity) VALUES(product, quantity) -/// INSERT ROW -/// ``` -/// -/// [Snowflake](https://docs.snowflake.com/en/sql-reference/sql/merge) -/// [BigQuery](https://cloud.google.com/bigquery/docs/reference/standard-sql/dml-syntax#merge_statement) -/// [Oracle](https://docs.oracle.com/en/database/oracle/oracle-database/21/sqlrf/MERGE.html) -#[derive(Debug, Clone, PartialEq, PartialOrd, Eq, Ord, Hash)] -#[cfg_attr(feature = "serde", derive(Serialize, Deserialize))] -#[cfg_attr(feature = "visitor", derive(Visit, VisitMut))] -pub struct MergeInsertExpr { - /// The `INSERT` token that starts the sub-expression. - pub insert_token: AttachedToken, - /// Columns (if any) specified by the insert. - /// - /// Example: - /// ```sql - /// INSERT (product, quantity) VALUES(product, quantity) - /// INSERT (product, quantity) ROW - /// ``` - pub columns: Vec, - /// The token, `[VALUES | ROW]` starting `kind`. - pub kind_token: AttachedToken, - /// The insert type used by the statement. - pub kind: MergeInsertKind, - /// An optional condition to restrict the insertion (Oracle specific) - /// - /// Enabled via [`Dialect::supports_merge_insert_predicate`](crate::dialect::Dialect::supports_merge_insert_predicate). - pub insert_predicate: Option, -} - -impl Display for MergeInsertExpr { - fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result { - if !self.columns.is_empty() { - write!(f, "({}) ", display_comma_separated(self.columns.as_slice()))?; - } - write!(f, "{}", self.kind)?; - if let Some(predicate) = self.insert_predicate.as_ref() { - write!(f, " WHERE {}", predicate)?; - } - Ok(()) - } -} - -/// Underlying statement of a when clause within a `MERGE` Statement -/// -/// Example -/// ```sql -/// INSERT (product, quantity) VALUES(product, quantity) -/// ``` -/// -/// [Snowflake](https://docs.snowflake.com/en/sql-reference/sql/merge) -/// [BigQuery](https://cloud.google.com/bigquery/docs/reference/standard-sql/dml-syntax#merge_statement) -/// [Oracle](https://docs.oracle.com/en/database/oracle/oracle-database/21/sqlrf/MERGE.html) -#[derive(Debug, Clone, PartialEq, PartialOrd, Eq, Ord, Hash)] -#[cfg_attr(feature = "serde", derive(Serialize, Deserialize))] -#[cfg_attr(feature = "visitor", derive(Visit, VisitMut))] -pub enum MergeAction { - /// An `INSERT` clause - /// - /// Example: - /// ```sql - /// INSERT (product, quantity) VALUES(product, quantity) - /// ``` - Insert(MergeInsertExpr), - /// An `UPDATE` clause - /// - /// Example: - /// ```sql - /// UPDATE SET quantity = T.quantity + S.quantity - /// ``` - Update { - /// The `UPDATE` token that starts the sub-expression. - update_token: AttachedToken, - /// The update assiment expressions - assignments: Vec, - /// `where_clause` for the update (Oralce specific) - /// - /// Enabled via [`Dialect::supports_merge_update_predicate`](crate::dialect::Dialect::supports_merge_update_predicate). - update_predicate: Option, - /// `delete_clause` for the update "delete where" (Oracle specific) - /// - /// Enabled via [`Dialect::supports_merge_update_delete_predicate`](crate::dialect::Dialect::supports_merge_update_delete_predicate). - delete_predicate: Option, - }, - /// A plain `DELETE` clause - Delete { - /// The `DELETE` token that starts the sub-expression. - delete_token: AttachedToken, - }, -} - -impl Display for MergeAction { - fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result { - match self { - MergeAction::Insert(insert) => { - write!(f, "INSERT {insert}") - } - MergeAction::Update { - update_token: _, - assignments, - update_predicate, - delete_predicate, - } => { - write!(f, "UPDATE SET {}", display_comma_separated(assignments))?; - if let Some(predicate) = update_predicate.as_ref() { - write!(f, " WHERE {predicate}")?; - } - if let Some(predicate) = delete_predicate.as_ref() { - write!(f, " DELETE WHERE {predicate}")?; - } - Ok(()) - } - MergeAction::Delete { .. } => { - write!(f, "DELETE") - } - } - } -} - -/// A when clause within a `MERGE` Statement -/// -/// Example: -/// ```sql -/// WHEN NOT MATCHED BY SOURCE AND product LIKE '%washer%' THEN DELETE -/// ``` -/// [Snowflake](https://docs.snowflake.com/en/sql-reference/sql/merge) -/// [BigQuery](https://cloud.google.com/bigquery/docs/reference/standard-sql/dml-syntax#merge_statement) -#[derive(Debug, Clone, PartialEq, PartialOrd, Eq, Ord, Hash)] -#[cfg_attr(feature = "serde", derive(Serialize, Deserialize))] -#[cfg_attr(feature = "visitor", derive(Visit, VisitMut))] -pub struct MergeClause { - /// The `WHEN` token that starts the sub-expression. - pub when_token: AttachedToken, - pub clause_kind: MergeClauseKind, - pub predicate: Option, - pub action: MergeAction, -} - -impl Display for MergeClause { - fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result { - let MergeClause { - when_token: _, - clause_kind, - predicate, - action, - } = self; - - write!(f, "WHEN {clause_kind}")?; - if let Some(pred) = predicate { - write!(f, " AND {pred}")?; - } - write!(f, " THEN {action}") - } -} - -/// A Output Clause in the end of a 'MERGE' Statement -/// -/// Example: -/// OUTPUT $action, deleted.* INTO dbo.temp_products; -/// [mssql](https://learn.microsoft.com/en-us/sql/t-sql/queries/output-clause-transact-sql) -#[derive(Debug, Clone, PartialEq, PartialOrd, Eq, Ord, Hash)] -#[cfg_attr(feature = "serde", derive(Serialize, Deserialize))] -#[cfg_attr(feature = "visitor", derive(Visit, VisitMut))] -pub enum OutputClause { - Output { - output_token: AttachedToken, - select_items: Vec, - into_table: Option, - }, - Returning { - returning_token: AttachedToken, - select_items: Vec, - }, -} - -impl fmt::Display for OutputClause { - fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result { - match self { - OutputClause::Output { - output_token: _, - select_items, - into_table, - } => { - f.write_str("OUTPUT ")?; - display_comma_separated(select_items).fmt(f)?; - if let Some(into_table) = into_table { - f.write_str(" ")?; - into_table.fmt(f)?; - } - Ok(()) - } - OutputClause::Returning { - returning_token: _, - select_items, - } => { - f.write_str("RETURNING ")?; - display_comma_separated(select_items).fmt(f) - } - } - } -} - #[derive(Debug, Copy, Clone, PartialEq, PartialOrd, Eq, Ord, Hash)] #[cfg_attr(feature = "serde", derive(Serialize, Deserialize))] #[cfg_attr(feature = "visitor", derive(Visit, VisitMut))] diff --git a/src/ast/spans.rs b/src/ast/spans.rs index 8f8ec960e..d7bc6583e 100644 --- a/src/ast/spans.rs +++ b/src/ast/spans.rs @@ -38,15 +38,15 @@ use super::{ FunctionArgumentClause, FunctionArgumentList, FunctionArguments, GroupByExpr, HavingBound, IfStatement, IlikeSelectItem, IndexColumn, Insert, Interpolate, InterpolateExpr, Join, JoinConstraint, JoinOperator, JsonPath, JsonPathElem, LateralView, LimitClause, - MatchRecognizePattern, Measure, MergeAction, MergeClause, MergeInsertExpr, MergeInsertKind, - NamedParenthesizedList, NamedWindowDefinition, ObjectName, ObjectNamePart, Offset, OnConflict, - OnConflictAction, OnInsert, OpenStatement, OrderBy, OrderByExpr, OrderByKind, OutputClause, - Partition, PivotValueSource, ProjectionSelect, Query, RaiseStatement, RaiseStatementValue, - ReferentialAction, RenameSelectItem, ReplaceSelectElement, ReplaceSelectItem, Select, - SelectInto, SelectItem, SetExpr, SqlOption, Statement, Subscript, SymbolDefinition, TableAlias, - TableAliasColumnDef, TableConstraint, TableFactor, TableObject, TableOptionsClustered, - TableWithJoins, Update, UpdateTableFromKind, Use, Value, Values, ViewColumnDef, WhileStatement, - WildcardAdditionalOptions, With, WithFill, + MatchRecognizePattern, Measure, Merge, MergeAction, MergeClause, MergeInsertExpr, + MergeInsertKind, MergeUpdateExpr, NamedParenthesizedList, NamedWindowDefinition, ObjectName, + ObjectNamePart, Offset, OnConflict, OnConflictAction, OnInsert, OpenStatement, OrderBy, + OrderByExpr, OrderByKind, OutputClause, Partition, PivotValueSource, ProjectionSelect, Query, + RaiseStatement, RaiseStatementValue, ReferentialAction, RenameSelectItem, ReplaceSelectElement, + ReplaceSelectItem, Select, SelectInto, SelectItem, SetExpr, SqlOption, Statement, Subscript, + SymbolDefinition, TableAlias, TableAliasColumnDef, TableConstraint, TableFactor, TableObject, + TableOptionsClustered, TableWithJoins, Update, UpdateTableFromKind, Use, Value, Values, + ViewColumnDef, WhileStatement, WildcardAdditionalOptions, With, WithFill, }; /// Given an iterator of spans, return the [Span::union] of all spans. @@ -450,20 +450,7 @@ impl Spanned for Statement { Statement::Explain { .. } => Span::empty(), Statement::Savepoint { .. } => Span::empty(), Statement::ReleaseSavepoint { .. } => Span::empty(), - Statement::Merge { - merge_token, - into: _, - table: _, - source: _, - on, - clauses, - output, - } => union_spans( - [merge_token.0.span, on.span()] - .into_iter() - .chain(clauses.iter().map(Spanned::span)) - .chain(output.iter().map(Spanned::span)), - ), + Statement::Merge(merge) => merge.span(), Statement::Cache { .. } => Span::empty(), Statement::UNCache { .. } => Span::empty(), Statement::CreateSequence { .. } => Span::empty(), @@ -924,6 +911,17 @@ impl Spanned for Update { } } +impl Spanned for Merge { + fn span(&self) -> Span { + union_spans( + [self.merge_token.0.span, self.on.span()] + .into_iter() + .chain(self.clauses.iter().map(Spanned::span)) + .chain(self.output.iter().map(Spanned::span)), + ) + } +} + impl Spanned for FromTable { fn span(&self) -> Span { match self { @@ -2418,17 +2416,7 @@ impl Spanned for MergeAction { fn span(&self) -> Span { match self { MergeAction::Insert(expr) => expr.span(), - MergeAction::Update { - update_token, - assignments, - update_predicate, - delete_predicate, - } => union_spans( - core::iter::once(update_token.0.span) - .chain(assignments.iter().map(Spanned::span)) - .chain(update_predicate.iter().map(Spanned::span)) - .chain(delete_predicate.iter().map(Spanned::span)), - ), + MergeAction::Update(expr) => expr.span(), MergeAction::Delete { delete_token } => delete_token.0.span, } } @@ -2452,6 +2440,17 @@ impl Spanned for MergeInsertExpr { } } +impl Spanned for MergeUpdateExpr { + fn span(&self) -> Span { + union_spans( + core::iter::once(self.update_token.0.span) + .chain(self.assignments.iter().map(Spanned::span)) + .chain(self.update_predicate.iter().map(Spanned::span)) + .chain(self.delete_predicate.iter().map(Spanned::span)), + ) + } +} + impl Spanned for OutputClause { fn span(&self) -> Span { match self { @@ -2771,7 +2770,7 @@ WHERE id = 1 assert_eq!(stmt_span.end, (16, 67).into()); // ~ individual tokens within the statement - let Statement::Merge { + let Statement::Merge(Merge { merge_token, into: _, table: _, @@ -2779,7 +2778,7 @@ WHERE id = 1 on: _, clauses, output, - } = &r[0] + }) = &r[0] else { panic!("not a MERGE statement"); }; @@ -2817,12 +2816,12 @@ WHERE id = 1 clauses[1].when_token.0.span, Span::new(Location::new(12, 17), Location::new(12, 21)) ); - if let MergeAction::Update { + if let MergeAction::Update(MergeUpdateExpr { update_token, assignments: _, update_predicate: _, delete_predicate: _, - } = &clauses[1].action + }) = &clauses[1].action { assert_eq!( update_token.0.span, @@ -2895,7 +2894,7 @@ WHERE id = 1 ); // ~ individual tokens within the statement - if let Statement::Merge { output, .. } = &r[0] { + if let Statement::Merge(Merge { output, .. }) = &r[0] { if let Some(OutputClause::Returning { returning_token, .. }) = output @@ -2929,7 +2928,7 @@ WHERE id = 1 ); // ~ individual tokens within the statement - if let Statement::Merge { output, .. } = &r[0] { + if let Statement::Merge(Merge { output, .. }) = &r[0] { if let Some(OutputClause::Output { output_token, .. }) = output { assert_eq!( output_token.0.span, diff --git a/src/parser/merge.rs b/src/parser/merge.rs index 992ed3917..235222689 100644 --- a/src/parser/merge.rs +++ b/src/parser/merge.rs @@ -17,8 +17,8 @@ use alloc::{boxed::Box, format, string::ToString, vec, vec::Vec}; use crate::{ ast::{ - Ident, MergeAction, MergeClause, MergeClauseKind, MergeInsertExpr, MergeInsertKind, - ObjectName, ObjectNamePart, SetExpr, Statement, TableFactor, + Ident, Merge, MergeAction, MergeClause, MergeClauseKind, MergeInsertExpr, MergeInsertKind, + MergeUpdateExpr, ObjectName, ObjectNamePart, OutputClause, SetExpr, Statement, TableFactor, }, dialect::{BigQueryDialect, GenericDialect, MySqlDialect}, keywords::Keyword, @@ -54,7 +54,7 @@ impl Parser<'_> { None => None, }; - Ok(Statement::Merge { + Ok(Statement::Merge(Merge { merge_token: merge_token.into(), into, table, @@ -62,7 +62,7 @@ impl Parser<'_> { on: Box::new(on), clauses, output, - }) + })) } fn parse_merge_clauses( @@ -134,12 +134,12 @@ impl Parser<'_> { } else { None }; - MergeAction::Update { + MergeAction::Update(MergeUpdateExpr { update_token: update_token.into(), assignments, update_predicate, delete_predicate, - } + }) } Some(Keyword::DELETE) => { if matches!( @@ -273,6 +273,33 @@ impl Parser<'_> { self.parse_parenthesized_column_list(IsOptional::Optional, allow_empty) } } + + fn parse_output( + &mut self, + start_keyword: Keyword, + start_token: TokenWithSpan, + ) -> Result { + let select_items = self.parse_projection()?; + let into_table = if start_keyword == Keyword::OUTPUT && self.peek_keyword(Keyword::INTO) { + self.expect_keyword_is(Keyword::INTO)?; + Some(self.parse_select_into()?) + } else { + None + }; + + Ok(if start_keyword == Keyword::OUTPUT { + OutputClause::Output { + output_token: start_token.into(), + select_items, + into_table, + } + } else { + OutputClause::Returning { + returning_token: start_token.into(), + select_items, + } + }) + } } /// Helper to unqualify a list of columns with either a qualified prefix or a diff --git a/src/parser/mod.rs b/src/parser/mod.rs index 32fa6e7ec..028e64588 100644 --- a/src/parser/mod.rs +++ b/src/parser/mod.rs @@ -17285,33 +17285,6 @@ impl<'a> Parser<'a> { }) } - fn parse_output( - &mut self, - start_keyword: Keyword, - start_token: TokenWithSpan, - ) -> Result { - let select_items = self.parse_projection()?; - let into_table = if start_keyword == Keyword::OUTPUT && self.peek_keyword(Keyword::INTO) { - self.expect_keyword_is(Keyword::INTO)?; - Some(self.parse_select_into()?) - } else { - None - }; - - Ok(if start_keyword == Keyword::OUTPUT { - OutputClause::Output { - output_token: start_token.into(), - select_items, - into_table, - } - } else { - OutputClause::Returning { - returning_token: start_token.into(), - select_items, - } - }) - } - fn parse_select_into(&mut self) -> Result { let temporary = self .parse_one_of_keywords(&[Keyword::TEMP, Keyword::TEMPORARY]) diff --git a/tests/sqlparser_bigquery.rs b/tests/sqlparser_bigquery.rs index 7e82be3cc..f82e011c6 100644 --- a/tests/sqlparser_bigquery.rs +++ b/tests/sqlparser_bigquery.rs @@ -1815,7 +1815,7 @@ fn parse_merge() { }), insert_predicate: None, }); - let update_action = MergeAction::Update { + let update_action = MergeAction::Update(MergeUpdateExpr { update_token: AttachedToken::empty(), assignments: vec![ Assignment { @@ -1829,17 +1829,17 @@ fn parse_merge() { ], update_predicate: None, delete_predicate: None, - }; + }); match bigquery_and_generic().verified_stmt(sql) { - Statement::Merge { + Statement::Merge(Merge { into, table, source, on, clauses, .. - } => { + }) => { assert!(!into); assert_eq!( TableFactor::Table { diff --git a/tests/sqlparser_common.rs b/tests/sqlparser_common.rs index 0d27f069c..675ee12be 100644 --- a/tests/sqlparser_common.rs +++ b/tests/sqlparser_common.rs @@ -9793,22 +9793,22 @@ fn parse_merge() { let sql_no_into = "MERGE s.bar AS dest USING (SELECT * FROM s.foo) AS stg ON dest.D = stg.D AND dest.E = stg.E WHEN NOT MATCHED THEN INSERT (A, B, C) VALUES (stg.A, stg.B, stg.C) WHEN MATCHED AND dest.A = 'a' THEN UPDATE SET dest.F = stg.F, dest.G = stg.G WHEN MATCHED THEN DELETE"; match (verified_stmt(sql), verified_stmt(sql_no_into)) { ( - Statement::Merge { + Statement::Merge(Merge { into, table, source, on, clauses, .. - }, - Statement::Merge { + }), + Statement::Merge(Merge { into: no_into, table: table_no_into, source: source_no_into, on: on_no_into, clauses: clauses_no_into, .. - }, + }), ) => { assert!(into); assert!(!no_into); @@ -9957,7 +9957,7 @@ fn parse_merge() { (Value::SingleQuotedString("a".to_string())).with_empty_span() )), }), - action: MergeAction::Update { + action: MergeAction::Update(MergeUpdateExpr { update_token: AttachedToken::empty(), assignments: vec![ Assignment { @@ -9983,7 +9983,7 @@ fn parse_merge() { ], update_predicate: None, delete_predicate: None, - }, + }), }, MergeClause { when_token: AttachedToken::empty(), From 6637f6219f6b45cf7d4c46562f184268162e837f Mon Sep 17 00:00:00 2001 From: Petr Novotnik Date: Sun, 23 Nov 2025 09:51:46 +0100 Subject: [PATCH 3/8] Include location in errors --- src/parser/merge.rs | 75 +++++++++++++++++++++++++-------------------- 1 file changed, 41 insertions(+), 34 deletions(-) diff --git a/src/parser/merge.rs b/src/parser/merge.rs index 235222689..4e1b73740 100644 --- a/src/parser/merge.rs +++ b/src/parser/merge.rs @@ -18,12 +18,13 @@ use alloc::{boxed::Box, format, string::ToString, vec, vec::Vec}; use crate::{ ast::{ Ident, Merge, MergeAction, MergeClause, MergeClauseKind, MergeInsertExpr, MergeInsertKind, - MergeUpdateExpr, ObjectName, ObjectNamePart, OutputClause, SetExpr, Statement, TableFactor, + MergeUpdateExpr, ObjectName, ObjectNamePart, OutputClause, SetExpr, Spanned, Statement, + TableFactor, }, dialect::{BigQueryDialect, GenericDialect, MySqlDialect}, keywords::Keyword, parser::IsOptional, - tokenizer::TokenWithSpan, + tokenizer::{Location, TokenWithSpan}, }; use super::{Parser, ParserError}; @@ -232,42 +233,48 @@ impl Parser<'_> { if let Some(alias) = alias { if alias.columns.is_empty() { // ~ only the alias is supported at this point - unqualify_columns(cols, None, Some(&alias.name)).map_err(|e| { - ParserError::ParserError(format!( - "Invalid column for INSERT in a {clause_kind} merge clause: {e}" - )) - }) + match unqualify_columns(cols, None, Some(&alias.name)) { + Ok(column) => Ok(column), + Err((err, loc)) => parser_err!( + format_args!("Invalid column for INSERT in a {clause_kind} merge clause: {err}"), + loc + ), + } } else { - Err(ParserError::ParserError(format!( - "Invalid target ALIAS for INSERT in a {clause_kind} merge clause; must be an identifier" - ))) + parser_err!( + format_args!("Invalid target ALIAS for INSERT in a {clause_kind} merge clause; must be an identifier"), + alias.name.span.start + ) } } else { // ~ allow the full qualifier, but also just the table name if name.0.len() == 1 { - unqualify_columns(cols, Some(name), None).map_err(|e| { - ParserError::ParserError(format!( - "Invalid column for INSERT in a {clause_kind} merge clause: {e}" - )) - }) - } else if let Some(table_name) = + match unqualify_columns(cols, Some(name), None) { + Ok(column) => Ok(column), + Err((err, loc)) => parser_err!( + format_args!("Invalid column for INSERT in a {clause_kind} merge clause: {err}"), + loc) + } + } else if let Some(unqualified_name) = name.0.last().and_then(ObjectNamePart::as_ident) { - unqualify_columns(cols, Some(name), Some(table_name)).map_err(|e| { - ParserError::ParserError(format!( - "Invalid column for INSERT in a {clause_kind} merge clause: {e}" - )) - }) + match unqualify_columns(cols, Some(name), Some(unqualified_name)) { + Ok(column) => Ok(column), + Err((err, loc)) => parser_err!( + format_args!("Invalid column for INSERT in a {clause_kind} merge clause: {err}"), + loc) + } } else { - Err(ParserError::ParserError(format!( - "Invalid target table NAME for INSERT in a {clause_kind} merge clause; must be an identifier" - ))) + parser_err!( + format_args!("Invalid target table NAME for INSERT in a {clause_kind} merge clause; must be an identifier"), + name.span().start + ) } } } else { - Err(ParserError::ParserError(format!( - "Invalid target for INSERT in a {clause_kind} merge clause; must be a TABLE identifier" - ))) + parser_err!( + format_args!("Invalid target for INSERT in a {clause_kind} merge clause; must be a TABLE identifier"), + target_table.span().start) } } else { self.parse_parenthesized_column_list(IsOptional::Optional, allow_empty) @@ -302,8 +309,8 @@ impl Parser<'_> { } } -/// Helper to unqualify a list of columns with either a qualified prefix or a -/// qualifier identifier +/// Helper to unqualify a list of columns with either a qualified prefix +/// (`allowed_qualifier_1`) or a qualifier identifier (`allowed_qualifier_2`.) /// /// Oracle allows `INSERT ([qualifier.]column_name, ...)` in MERGE statements /// with `qualifier` referring to the alias of the target table (if one is @@ -313,13 +320,13 @@ fn unqualify_columns( columns: Vec, allowed_qualifier_1: Option<&ObjectName>, allowed_qualifier_2: Option<&Ident>, -) -> Result, &'static str> { +) -> Result, (&'static str, Location)> { // ~ helper to turn a column name (part) into a plain `ident` // possibly bailing with error - fn to_ident(name: ObjectNamePart) -> Result { + fn to_ident(name: ObjectNamePart) -> Result { match name { ObjectNamePart::Identifier(ident) => Ok(ident), - ObjectNamePart::Function(_) => Err("not an identifier"), + ObjectNamePart::Function(_) => Err(("not an identifier", name.span().start)), } } @@ -353,7 +360,7 @@ fn unqualify_columns( let mut unqualified = Vec::::with_capacity(columns.len()); for mut name in columns { if name.0.is_empty() { - return Err("empty column name"); + return Err(("empty column name", name.span().start)); } if name.0.len() == 1 { @@ -390,7 +397,7 @@ fn unqualify_columns( } } - return Err("not matching target table"); + return Err(("not matching target table", name.span().start)); } Ok(unqualified) } From 49e9acd5dbd649403486f043c507bc1084801869 Mon Sep 17 00:00:00 2001 From: xitep Date: Tue, 25 Nov 2025 12:48:18 +0100 Subject: [PATCH 4/8] Wording Co-authored-by: Ifeanyi Ubah --- src/ast/dml.rs | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/ast/dml.rs b/src/ast/dml.rs index cf689f703..807637776 100644 --- a/src/ast/dml.rs +++ b/src/ast/dml.rs @@ -314,7 +314,7 @@ impl Display for Update { } } -/// MERGE statement. +/// A `MERGE` statement. #[derive(Debug, Clone, PartialEq, PartialOrd, Eq, Ord, Hash)] #[cfg_attr(feature = "serde", derive(Serialize, Deserialize))] #[cfg_attr(feature = "visitor", derive(Visit, VisitMut))] From 5e109935b461d8896efd9598798543e142a11d76 Mon Sep 17 00:00:00 2001 From: Petr Novotnik Date: Tue, 25 Nov 2025 17:45:49 +0100 Subject: [PATCH 5/8] Publish MERGE INSERT columns as `ObjectName`; no semantic validation --- src/ast/dml.rs | 2 +- src/ast/mod.rs | 6 ++ src/ast/spans.rs | 2 +- src/dialect/mod.rs | 12 ++- src/dialect/postgresql.rs | 8 ++ src/parser/merge.rs | 184 ++++-------------------------------- tests/sqlparser_bigquery.rs | 8 +- tests/sqlparser_common.rs | 60 ++++++------ 8 files changed, 79 insertions(+), 203 deletions(-) diff --git a/src/ast/dml.rs b/src/ast/dml.rs index 807637776..9d1659ded 100644 --- a/src/ast/dml.rs +++ b/src/ast/dml.rs @@ -539,7 +539,7 @@ pub struct MergeInsertExpr { /// INSERT (product, quantity) VALUES(product, quantity) /// INSERT (product, quantity) ROW /// ``` - pub columns: Vec, + pub columns: Vec, /// The token, `[VALUES | ROW]` starting `kind`. pub kind_token: AttachedToken, /// The insert type used by the statement. diff --git a/src/ast/mod.rs b/src/ast/mod.rs index 0684d305a..d4048025f 100644 --- a/src/ast/mod.rs +++ b/src/ast/mod.rs @@ -347,6 +347,12 @@ impl From> for ObjectName { } } +impl From for ObjectName { + fn from(ident: Ident) -> Self { + ObjectName(vec![ObjectNamePart::Identifier(ident)]) + } +} + impl fmt::Display for ObjectName { fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result { write!(f, "{}", display_separated(&self.0, ".")) diff --git a/src/ast/spans.rs b/src/ast/spans.rs index d7bc6583e..0ed0d9e84 100644 --- a/src/ast/spans.rs +++ b/src/ast/spans.rs @@ -2435,7 +2435,7 @@ impl Spanned for MergeInsertExpr { ] .into_iter() .chain(self.insert_predicate.iter().map(Spanned::span)) - .chain(self.columns.iter().map(|i| i.span)), + .chain(self.columns.iter().map(|i| i.span())), ) } } diff --git a/src/dialect/mod.rs b/src/dialect/mod.rs index 8c532c021..c46e5d076 100644 --- a/src/dialect/mod.rs +++ b/src/dialect/mod.rs @@ -632,6 +632,15 @@ pub trait Dialect: Debug + Any { /// ``` /// or /// ```sql + /// MERGE INTO FOO + /// USING FOO_IMP + /// ON (FOO.ID = FOO_IMP.ID) + /// WHEN NOT MATCHED THEN + /// -- here: qualified with array subscripts + /// INSERT (FOO.ID[1], FOO.NAME[1:12]) + /// VALUES (FOO_IMP.ID, UPPER(FOO_IMP.NAME)) + /// or + /// ```sql /// MERGE INTO FOO X /// USING FOO_IMP /// ON (X.ID = FOO_IMP.ID) @@ -641,9 +650,6 @@ pub trait Dialect: Debug + Any { /// VALUES (FOO_IMP.ID, UPPER(FOO_IMP.NAME)) /// ``` /// - /// Note: in the latter case, the qualifier must match the target table - /// name or its alias if one is present. The parser will enforce this. - /// /// The default implementation always returns `false` not allowing the /// qualifiers. fn supports_merge_insert_qualified_columns(&self) -> bool { diff --git a/src/dialect/postgresql.rs b/src/dialect/postgresql.rs index e861cc515..887c42396 100644 --- a/src/dialect/postgresql.rs +++ b/src/dialect/postgresql.rs @@ -280,4 +280,12 @@ impl Dialect for PostgreSqlDialect { fn supports_interval_options(&self) -> bool { true } + + /// [Postgres] supports column names with a subfield name or an array + /// subscript in the MERGE INSERT column lists. + /// + /// [Postgres]: https://www.postgresql.org/docs/current/sql-merge.html + fn supports_merge_insert_qualified_columns(&self) -> bool { + true + } } diff --git a/src/parser/merge.rs b/src/parser/merge.rs index 4e1b73740..0f587ba49 100644 --- a/src/parser/merge.rs +++ b/src/parser/merge.rs @@ -17,14 +17,12 @@ use alloc::{boxed::Box, format, string::ToString, vec, vec::Vec}; use crate::{ ast::{ - Ident, Merge, MergeAction, MergeClause, MergeClauseKind, MergeInsertExpr, MergeInsertKind, - MergeUpdateExpr, ObjectName, ObjectNamePart, OutputClause, SetExpr, Spanned, Statement, - TableFactor, + Merge, MergeAction, MergeClause, MergeClauseKind, MergeInsertExpr, MergeInsertKind, MergeUpdateExpr, ObjectName, ObjectNamePart, OutputClause, SetExpr, Statement }, dialect::{BigQueryDialect, GenericDialect, MySqlDialect}, keywords::Keyword, parser::IsOptional, - tokenizer::{Location, TokenWithSpan}, + tokenizer::TokenWithSpan, }; use super::{Parser, ParserError}; @@ -49,7 +47,7 @@ impl Parser<'_> { let source = self.parse_table_factor()?; self.expect_keyword_is(Keyword::ON)?; let on = self.parse_expr()?; - let clauses = self.parse_merge_clauses(&table)?; + let clauses = self.parse_merge_clauses()?; let output = match self.parse_one_of_keywords(&[Keyword::OUTPUT, Keyword::RETURNING]) { Some(keyword) => Some(self.parse_output(keyword, self.get_current_token().clone())?), None => None, @@ -66,10 +64,7 @@ impl Parser<'_> { })) } - fn parse_merge_clauses( - &mut self, - target_table: &TableFactor, - ) -> Result, ParserError> { + fn parse_merge_clauses(&mut self) -> Result, ParserError> { let mut clauses = vec![]; loop { if !(self.parse_keyword(Keyword::WHEN)) { @@ -172,11 +167,7 @@ impl Parser<'_> { let insert_token = self.get_current_token().clone(); let is_mysql = dialect_of!(self is MySqlDialect); - let columns = self.parse_merge_clause_insert_columns( - target_table, - &clause_kind, - is_mysql, - )?; + let columns = self.parse_merge_clause_insert_columns(is_mysql)?; let (kind, kind_token) = if dialect_of!(self is BigQueryDialect | GenericDialect) && self.parse_keyword(Keyword::ROW) { @@ -220,67 +211,27 @@ impl Parser<'_> { Ok(clauses) } - fn parse_merge_clause_insert_columns( - &mut self, - target_table: &TableFactor, - clause_kind: &MergeClauseKind, - allow_empty: bool, - ) -> Result, ParserError> { + fn parse_merge_clause_insert_columns(&mut self, allow_empty: bool) -> Result, ParserError> { if self.dialect.supports_merge_insert_qualified_columns() { - let cols = - self.parse_parenthesized_qualified_column_list(IsOptional::Optional, allow_empty)?; - if let TableFactor::Table { name, alias, .. } = target_table { - if let Some(alias) = alias { - if alias.columns.is_empty() { - // ~ only the alias is supported at this point - match unqualify_columns(cols, None, Some(&alias.name)) { - Ok(column) => Ok(column), - Err((err, loc)) => parser_err!( - format_args!("Invalid column for INSERT in a {clause_kind} merge clause: {err}"), - loc - ), - } - } else { - parser_err!( - format_args!("Invalid target ALIAS for INSERT in a {clause_kind} merge clause; must be an identifier"), - alias.name.span.start - ) - } - } else { - // ~ allow the full qualifier, but also just the table name - if name.0.len() == 1 { - match unqualify_columns(cols, Some(name), None) { - Ok(column) => Ok(column), - Err((err, loc)) => parser_err!( - format_args!("Invalid column for INSERT in a {clause_kind} merge clause: {err}"), - loc) - } - } else if let Some(unqualified_name) = - name.0.last().and_then(ObjectNamePart::as_ident) - { - match unqualify_columns(cols, Some(name), Some(unqualified_name)) { - Ok(column) => Ok(column), - Err((err, loc)) => parser_err!( - format_args!("Invalid column for INSERT in a {clause_kind} merge clause: {err}"), - loc) - } - } else { - parser_err!( - format_args!("Invalid target table NAME for INSERT in a {clause_kind} merge clause; must be an identifier"), - name.span().start - ) - } - } - } else { - parser_err!( - format_args!("Invalid target for INSERT in a {clause_kind} merge clause; must be a TABLE identifier"), - target_table.span().start) - } + self.parse_parenthesized_qualified_column_list(IsOptional::Optional, allow_empty) } else { - self.parse_parenthesized_column_list(IsOptional::Optional, allow_empty) + self.parse_parenthesized_column_list_as_object_names(IsOptional::Optional, allow_empty) } } + /// Just like [Parser::parse_parenthesized_column_list] parses a + /// parenthesized list of (simple) column names but returns them as object + /// names. + fn parse_parenthesized_column_list_as_object_names( + &mut self, + optional: IsOptional, + allow_empty: bool, + ) -> Result, ParserError> { + self.parse_parenthesized_column_list_inner(optional, allow_empty, |p| { + p.parse_identifier().map(|ident| ObjectName(vec![ObjectNamePart::Identifier(ident)])) + }) + } + fn parse_output( &mut self, start_keyword: Keyword, @@ -308,96 +259,3 @@ impl Parser<'_> { }) } } - -/// Helper to unqualify a list of columns with either a qualified prefix -/// (`allowed_qualifier_1`) or a qualifier identifier (`allowed_qualifier_2`.) -/// -/// Oracle allows `INSERT ([qualifier.]column_name, ...)` in MERGE statements -/// with `qualifier` referring to the alias of the target table (if one is -/// present) or, if no alias is present, to the target table name itself - -/// either qualified or unqualified. -fn unqualify_columns( - columns: Vec, - allowed_qualifier_1: Option<&ObjectName>, - allowed_qualifier_2: Option<&Ident>, -) -> Result, (&'static str, Location)> { - // ~ helper to turn a column name (part) into a plain `ident` - // possibly bailing with error - fn to_ident(name: ObjectNamePart) -> Result { - match name { - ObjectNamePart::Identifier(ident) => Ok(ident), - ObjectNamePart::Function(_) => Err(("not an identifier", name.span().start)), - } - } - - // ~ helper to return the last part of `name` if it is - // preceded by `prefix` - fn unqualify_column( - mut name: ObjectName, - prefix: &ObjectName, - ) -> Result { - let mut name_iter = name.0.iter(); - let mut prefix_iter = prefix.0.iter(); - loop { - match (name_iter.next(), prefix_iter.next()) { - (Some(_), None) => { - if name_iter.next().is_none() { - return Ok(name.0.pop().expect("missing name part")); - } else { - return Err(name); - } - } - (Some(c), Some(q)) if c == q => { - // ~ continue matching next part - } - _ => { - return Err(name); - } - } - } - } - - let mut unqualified = Vec::::with_capacity(columns.len()); - for mut name in columns { - if name.0.is_empty() { - return Err(("empty column name", name.span().start)); - } - - if name.0.len() == 1 { - unqualified.push(to_ident(name.0.pop().expect("missing name part"))?); - continue; - } - - // ~ try matching by the primary prefix - if let Some(allowed_qualifier) = allowed_qualifier_1 { - match unqualify_column(name, allowed_qualifier) { - Ok(ident) => { - unqualified.push(to_ident(ident)?); - continue; - } - Err(n) => { - // ~ continue trying with the alternate prefix below - name = n; - } - } - } - - // ~ try matching by the alternate prefix - if let Some(allowed_qualifier) = allowed_qualifier_2 { - if name.0.len() == 2 - && name - .0 - .first() - .and_then(ObjectNamePart::as_ident) - .map(|i| i == allowed_qualifier) - .unwrap_or(false) - { - unqualified.push(to_ident(name.0.pop().expect("missing name part"))?); - continue; - } - } - - return Err(("not matching target table", name.span().start)); - } - Ok(unqualified) -} diff --git a/tests/sqlparser_bigquery.rs b/tests/sqlparser_bigquery.rs index f82e011c6..57777a2c5 100644 --- a/tests/sqlparser_bigquery.rs +++ b/tests/sqlparser_bigquery.rs @@ -1806,7 +1806,7 @@ fn parse_merge() { ); let insert_action = MergeAction::Insert(MergeInsertExpr { insert_token: AttachedToken::empty(), - columns: vec![Ident::new("product"), Ident::new("quantity")], + columns: vec![Ident::new("product").into(), Ident::new("quantity").into()], kind_token: AttachedToken::empty(), kind: MergeInsertKind::Values(Values { value_keyword: false, @@ -1920,7 +1920,7 @@ fn parse_merge() { predicate: Some(Expr::value(number("1"))), action: MergeAction::Insert(MergeInsertExpr { insert_token: AttachedToken::empty(), - columns: vec![Ident::new("product"), Ident::new("quantity"),], + columns: vec![Ident::new("product").into(), Ident::new("quantity").into(),], kind_token: AttachedToken::empty(), kind: MergeInsertKind::Row, insert_predicate: None, @@ -1932,7 +1932,7 @@ fn parse_merge() { predicate: None, action: MergeAction::Insert(MergeInsertExpr { insert_token: AttachedToken::empty(), - columns: vec![Ident::new("product"), Ident::new("quantity"),], + columns: vec![Ident::new("product").into(), Ident::new("quantity").into(),], kind_token: AttachedToken::empty(), kind: MergeInsertKind::Row, insert_predicate: None, @@ -1982,7 +1982,7 @@ fn parse_merge() { predicate: None, action: MergeAction::Insert(MergeInsertExpr { insert_token: AttachedToken::empty(), - columns: vec![Ident::new("a"), Ident::new("b"),], + columns: vec![Ident::new("a").into(), Ident::new("b").into(),], kind_token: AttachedToken::empty(), kind: MergeInsertKind::Values(Values { value_keyword: false, diff --git a/tests/sqlparser_common.rs b/tests/sqlparser_common.rs index 675ee12be..1c386071b 100644 --- a/tests/sqlparser_common.rs +++ b/tests/sqlparser_common.rs @@ -1637,6 +1637,10 @@ fn ms_and_generic() -> TestedDialects { TestedDialects::new(vec![Box::new(MsSqlDialect {}), Box::new(GenericDialect {})]) } +fn only_ms() -> TestedDialects { + TestedDialects::new(vec![Box::new(MsSqlDialect {})]) +} + fn only_generic() -> TestedDialects { TestedDialects::new(vec![Box::new(GenericDialect {})]) } @@ -9921,7 +9925,7 @@ fn parse_merge() { predicate: None, action: MergeAction::Insert(MergeInsertExpr { insert_token: AttachedToken::empty(), - columns: vec![Ident::new("A"), Ident::new("B"), Ident::new("C")], + columns: vec![Ident::new("A").into(), Ident::new("B").into(), Ident::new("C").into()], kind_token: AttachedToken::empty(), kind: MergeInsertKind::Values(Values { value_keyword: false, @@ -10093,39 +10097,23 @@ WHERE NOT FOO_IMPORT.NAME LIKE '%.DO_NOT_INSERT'"; } #[test] -fn test_merge_with_insert_qualified_columns() { +fn test_merge_with_insert_simple_columns() { let sql = "\ MERGE INTO FOO USING FOO_IMPORT ON (FOO.ID = FOO_IMPORT.ID) \ WHEN NOT MATCHED THEN \ -INSERT (FOO.ID, FOO.NAME) \ -VALUES (1, 2)"; - - let expected = "\ -MERGE INTO FOO USING FOO_IMPORT ON (FOO.ID = FOO_IMPORT.ID) \ -WHEN NOT MATCHED THEN \ INSERT (ID, NAME) \ -VALUES (1, 2)"; - - only_generic().one_statement_parses_to(sql, expected); +VALUES (1, 'abc')"; + all_dialects().verified_stmt(sql); } #[test] -fn test_merge_with_insert_qualified_columns_via_alias() { +fn test_merge_with_insert_qualified_columns() { let sql = "\ -MERGE INTO FOO F USING FOO_IMPORT ON (F.ID = FOO_IMPORT.ID) \ -WHEN NOT MATCHED THEN \ -INSERT (F.ID, F.NAME) \ -VALUES (1, 2)"; - - // note: this serialized form will break execution on an Oracle database - // as it doesn't allow the "AS" keyword; Issue #1784 - let expected = "\ -MERGE INTO FOO AS F USING FOO_IMPORT ON (F.ID = FOO_IMPORT.ID) \ +MERGE INTO FOO USING FOO_IMPORT ON (FOO.ID = FOO_IMPORT.ID) \ WHEN NOT MATCHED THEN \ -INSERT (ID, NAME) \ -VALUES (1, 2)"; - - only_generic().one_statement_parses_to(sql, expected); +INSERT (FOO.ID, FOO.NAME) \ +VALUES (1, 'abc')"; + pg_and_generic().verified_stmt(sql); } #[test] @@ -10134,15 +10122,25 @@ fn test_merge_with_insert_qualified_columns_with_schema() { MERGE INTO PLAYGROUND.FOO USING FOO_IMPORT ON (PLAYGROUND.FOO.ID = FOO_IMPORT.ID) \ WHEN NOT MATCHED THEN \ INSERT (PLAYGROUND.FOO.ID, PLAYGROUND.FOO.NAME) \ -VALUES (1, 2)"; +VALUES (1, 'abc')"; + pg_and_generic().verified_stmt(sql); +} - let expected = "\ -MERGE INTO PLAYGROUND.FOO USING FOO_IMPORT ON (PLAYGROUND.FOO.ID = FOO_IMPORT.ID) \ +#[test] +fn test_merge_insert_with_qualified_columns_not_supported() { + let sql = "\ +MERGE INTO FOO USING FOO_IMPORT ON (FOO.ID = FOO_IMPORT.ID) \ WHEN NOT MATCHED THEN \ -INSERT (ID, NAME) \ -VALUES (1, 2)"; +INSERT (FOO.ID, FOO.NAME) \ +VALUES (1, 'abc')"; + assert!(only_ms().parse_sql_statements(sql).is_err()); - only_generic().one_statement_parses_to(sql, expected); + let sql = "\ +MERGE INTO PLAYGROUND.FOO USING FOO_IMPORT ON (PLAYGROUND.FOO.ID = FOO_IMPORT.ID) \ +WHEN NOT MATCHED THEN \ +INSERT (PLAYGROUND.FOO.ID, PLAYGROUND.FOO.NAME) \ +VALUES (1, 'abc')"; + assert!(only_ms().parse_sql_statements(sql).is_err()); } #[test] From 3e877046a1b127c23eb86bae7fffdf456e48cfa2 Mon Sep 17 00:00:00 2001 From: Petr Novotnik Date: Tue, 25 Nov 2025 18:09:27 +0100 Subject: [PATCH 6/8] Cargo fmt --- src/parser/merge.rs | 11 ++++++++--- tests/sqlparser_bigquery.rs | 10 ++++++++-- tests/sqlparser_common.rs | 6 +++++- 3 files changed, 21 insertions(+), 6 deletions(-) diff --git a/src/parser/merge.rs b/src/parser/merge.rs index 0f587ba49..f544bc3c4 100644 --- a/src/parser/merge.rs +++ b/src/parser/merge.rs @@ -17,7 +17,8 @@ use alloc::{boxed::Box, format, string::ToString, vec, vec::Vec}; use crate::{ ast::{ - Merge, MergeAction, MergeClause, MergeClauseKind, MergeInsertExpr, MergeInsertKind, MergeUpdateExpr, ObjectName, ObjectNamePart, OutputClause, SetExpr, Statement + Merge, MergeAction, MergeClause, MergeClauseKind, MergeInsertExpr, MergeInsertKind, + MergeUpdateExpr, ObjectName, ObjectNamePart, OutputClause, SetExpr, Statement, }, dialect::{BigQueryDialect, GenericDialect, MySqlDialect}, keywords::Keyword, @@ -211,7 +212,10 @@ impl Parser<'_> { Ok(clauses) } - fn parse_merge_clause_insert_columns(&mut self, allow_empty: bool) -> Result, ParserError> { + fn parse_merge_clause_insert_columns( + &mut self, + allow_empty: bool, + ) -> Result, ParserError> { if self.dialect.supports_merge_insert_qualified_columns() { self.parse_parenthesized_qualified_column_list(IsOptional::Optional, allow_empty) } else { @@ -228,7 +232,8 @@ impl Parser<'_> { allow_empty: bool, ) -> Result, ParserError> { self.parse_parenthesized_column_list_inner(optional, allow_empty, |p| { - p.parse_identifier().map(|ident| ObjectName(vec![ObjectNamePart::Identifier(ident)])) + p.parse_identifier() + .map(|ident| ObjectName(vec![ObjectNamePart::Identifier(ident)])) }) } diff --git a/tests/sqlparser_bigquery.rs b/tests/sqlparser_bigquery.rs index 57777a2c5..24b9efcaa 100644 --- a/tests/sqlparser_bigquery.rs +++ b/tests/sqlparser_bigquery.rs @@ -1920,7 +1920,10 @@ fn parse_merge() { predicate: Some(Expr::value(number("1"))), action: MergeAction::Insert(MergeInsertExpr { insert_token: AttachedToken::empty(), - columns: vec![Ident::new("product").into(), Ident::new("quantity").into(),], + columns: vec![ + Ident::new("product").into(), + Ident::new("quantity").into(), + ], kind_token: AttachedToken::empty(), kind: MergeInsertKind::Row, insert_predicate: None, @@ -1932,7 +1935,10 @@ fn parse_merge() { predicate: None, action: MergeAction::Insert(MergeInsertExpr { insert_token: AttachedToken::empty(), - columns: vec![Ident::new("product").into(), Ident::new("quantity").into(),], + columns: vec![ + Ident::new("product").into(), + Ident::new("quantity").into(), + ], kind_token: AttachedToken::empty(), kind: MergeInsertKind::Row, insert_predicate: None, diff --git a/tests/sqlparser_common.rs b/tests/sqlparser_common.rs index 1c386071b..67242a874 100644 --- a/tests/sqlparser_common.rs +++ b/tests/sqlparser_common.rs @@ -9925,7 +9925,11 @@ fn parse_merge() { predicate: None, action: MergeAction::Insert(MergeInsertExpr { insert_token: AttachedToken::empty(), - columns: vec![Ident::new("A").into(), Ident::new("B").into(), Ident::new("C").into()], + columns: vec![ + Ident::new("A").into(), + Ident::new("B").into(), + Ident::new("C").into() + ], kind_token: AttachedToken::empty(), kind: MergeInsertKind::Values(Values { value_keyword: false, From 1312844eb998a5f38dfdba048d74eba5eae55c71 Mon Sep 17 00:00:00 2001 From: Petr Novotnik Date: Tue, 25 Nov 2025 18:42:29 +0100 Subject: [PATCH 7/8] Enable new features by default --- src/dialect/generic.rs | 16 ---------------- src/dialect/mod.rs | 22 ++++++++++------------ src/dialect/mssql.rs | 20 ++++++++++++++++++++ src/dialect/postgresql.rs | 19 +++++++++++++------ 4 files changed, 43 insertions(+), 34 deletions(-) diff --git a/src/dialect/generic.rs b/src/dialect/generic.rs index b606ad9e2..dffc5b527 100644 --- a/src/dialect/generic.rs +++ b/src/dialect/generic.rs @@ -195,20 +195,4 @@ impl Dialect for GenericDialect { fn supports_interval_options(&self) -> bool { true } - - fn supports_merge_insert_qualified_columns(&self) -> bool { - true - } - - fn supports_merge_insert_predicate(&self) -> bool { - true - } - - fn supports_merge_update_predicate(&self) -> bool { - true - } - - fn supports_merge_update_delete_predicate(&self) -> bool { - true - } } diff --git a/src/dialect/mod.rs b/src/dialect/mod.rs index c46e5d076..5b106c339 100644 --- a/src/dialect/mod.rs +++ b/src/dialect/mod.rs @@ -639,6 +639,7 @@ pub trait Dialect: Debug + Any { /// -- here: qualified with array subscripts /// INSERT (FOO.ID[1], FOO.NAME[1:12]) /// VALUES (FOO_IMP.ID, UPPER(FOO_IMP.NAME)) + /// ``` /// or /// ```sql /// MERGE INTO FOO X @@ -650,10 +651,9 @@ pub trait Dialect: Debug + Any { /// VALUES (FOO_IMP.ID, UPPER(FOO_IMP.NAME)) /// ``` /// - /// The default implementation always returns `false` not allowing the - /// qualifiers. + /// By default, qualifiers are allowed. fn supports_merge_insert_qualified_columns(&self) -> bool { - false + true } /// Returns `true` if the dialect supports specify an INSERT predicate in @@ -670,13 +670,12 @@ pub trait Dialect: Debug + Any { /// WHERE NOT FOO_IMP.NAME like '%.IGNORE' /// ``` /// - /// The default implementation always returns `false` indicating no - /// support for the additional predicate. + /// By default, the additional predicate support is enabled. /// /// See also [Dialect::supports_merge_update_predicate] and /// [Dialect::supports_merge_update_delete_predicate]. fn supports_merge_insert_predicate(&self) -> bool { - false + true } /// Indicates the supports of UPDATE predicates in MERGE @@ -692,13 +691,12 @@ pub trait Dialect: Debug + Any { /// WHERE FOO.NAME <> 'pete' /// ``` /// - /// The default implementation always returns false indicating no support - /// for the additional predicate. + /// By default, the additional predicate is enabled. /// /// See also [Dialect::supports_merge_insert_predicate] and /// [Dialect::supports_merge_update_delete_predicate]. fn supports_merge_update_predicate(&self) -> bool { - false + true } /// Indicates the supports of UPDATE ... DELETEs and associated predicates @@ -714,13 +712,13 @@ pub trait Dialect: Debug + Any { /// DELETE WHERE UPPER(FOO.NAME) == FOO.NAME /// ``` /// - /// The default implementation always returns false indicating no support - /// for the `UPDATE ... DELETE` and its associated predicate. + /// By default, the support for the `UPDATE ... DELETE` and its associated + /// predicate is enabled. /// /// See also [Dialect::supports_merge_insert_predicate] and /// [Dialect::supports_merge_update_predicate]. fn supports_merge_update_delete_predicate(&self) -> bool { - false + true } /// Dialect-specific infix parser override diff --git a/src/dialect/mssql.rs b/src/dialect/mssql.rs index e1902b389..04c43e39c 100644 --- a/src/dialect/mssql.rs +++ b/src/dialect/mssql.rs @@ -123,6 +123,26 @@ impl Dialect for MsSqlDialect { true } + /// Set + fn supports_merge_insert_predicate(&self) -> bool { + false + } + + /// Set + fn supports_merge_insert_qualified_columns(&self) -> bool { + false + } + + /// Set + fn supports_merge_update_delete_predicate(&self) -> bool { + false + } + + /// Set + fn supports_merge_update_predicate(&self) -> bool { + false + } + /// See fn get_reserved_grantees_types(&self) -> &[GranteesType] { &[GranteesType::Public] diff --git a/src/dialect/postgresql.rs b/src/dialect/postgresql.rs index 887c42396..eba54afe4 100644 --- a/src/dialect/postgresql.rs +++ b/src/dialect/postgresql.rs @@ -281,11 +281,18 @@ impl Dialect for PostgreSqlDialect { true } - /// [Postgres] supports column names with a subfield name or an array - /// subscript in the MERGE INSERT column lists. - /// - /// [Postgres]: https://www.postgresql.org/docs/current/sql-merge.html - fn supports_merge_insert_qualified_columns(&self) -> bool { - true + /// See + fn supports_merge_insert_predicate(&self) -> bool { + false + } + + /// See + fn supports_merge_update_delete_predicate(&self) -> bool { + false + } + + /// See + fn supports_merge_update_predicate(&self) -> bool { + false } } From 3c250907de50f35a8978f4e340cda68d0d7cf4f0 Mon Sep 17 00:00:00 2001 From: Petr Novotnik Date: Tue, 25 Nov 2025 19:40:24 +0100 Subject: [PATCH 8/8] Enable new features unconditionally --- src/ast/dml.rs | 6 -- src/dialect/mod.rs | 113 -------------------------------------- src/dialect/mssql.rs | 20 ------- src/dialect/postgresql.rs | 15 ----- src/parser/merge.rs | 34 ++---------- tests/sqlparser_common.rs | 31 +---------- 6 files changed, 8 insertions(+), 211 deletions(-) diff --git a/src/ast/dml.rs b/src/ast/dml.rs index 9d1659ded..d740b140e 100644 --- a/src/ast/dml.rs +++ b/src/ast/dml.rs @@ -545,8 +545,6 @@ pub struct MergeInsertExpr { /// The insert type used by the statement. pub kind: MergeInsertKind, /// An optional condition to restrict the insertion (Oracle specific) - /// - /// Enabled via [`Dialect::supports_merge_insert_predicate`](crate::dialect::Dialect::supports_merge_insert_predicate). pub insert_predicate: Option, } @@ -582,12 +580,8 @@ pub struct MergeUpdateExpr { /// The update assiment expressions pub assignments: Vec, /// `where_clause` for the update (Oralce specific) - /// - /// Enabled via [`Dialect::supports_merge_update_predicate`](crate::dialect::Dialect::supports_merge_update_predicate). pub update_predicate: Option, /// `delete_clause` for the update "delete where" (Oracle specific) - /// - /// Enabled via [`Dialect::supports_merge_update_delete_predicate`](crate::dialect::Dialect::supports_merge_update_delete_predicate). pub delete_predicate: Option, } diff --git a/src/dialect/mod.rs b/src/dialect/mod.rs index 5b106c339..69df280ed 100644 --- a/src/dialect/mod.rs +++ b/src/dialect/mod.rs @@ -608,119 +608,6 @@ pub trait Dialect: Debug + Any { false } - /// Returns `true` if the dialect supports qualified column names - /// as part of a MERGE's INSERT's column list. Example: - /// - /// ```sql - /// MERGE INTO FOO - /// USING FOO_IMP - /// ON (FOO.ID = FOO_IMP.ID) - /// WHEN NOT MATCHED THEN - /// -- no qualifier - /// INSERT (ID, NAME) - /// VALUES (FOO_IMP.ID, UPPER(FOO_IMP.NAME)) - /// ``` - /// vs. - /// ```sql - /// MERGE INTO FOO - /// USING FOO_IMP - /// ON (FOO.ID = FOO_IMP.ID) - /// WHEN NOT MATCHED THEN - /// -- here: qualified - /// INSERT (FOO.ID, FOO.NAME) - /// VALUES (FOO_IMP.ID, UPPER(FOO_IMP.NAME)) - /// ``` - /// or - /// ```sql - /// MERGE INTO FOO - /// USING FOO_IMP - /// ON (FOO.ID = FOO_IMP.ID) - /// WHEN NOT MATCHED THEN - /// -- here: qualified with array subscripts - /// INSERT (FOO.ID[1], FOO.NAME[1:12]) - /// VALUES (FOO_IMP.ID, UPPER(FOO_IMP.NAME)) - /// ``` - /// or - /// ```sql - /// MERGE INTO FOO X - /// USING FOO_IMP - /// ON (X.ID = FOO_IMP.ID) - /// WHEN NOT MATCHED THEN - /// -- here: qualified using the alias - /// INSERT (X.ID, X.NAME) - /// VALUES (FOO_IMP.ID, UPPER(FOO_IMP.NAME)) - /// ``` - /// - /// By default, qualifiers are allowed. - fn supports_merge_insert_qualified_columns(&self) -> bool { - true - } - - /// Returns `true` if the dialect supports specify an INSERT predicate in - /// MERGE statements. Example: - /// - /// ```sql - /// MERGE INTO FOO - /// USING FOO_IMP - /// ON (FOO.ID = FOO_IMP.ID) - /// WHEN NOT MATCHED THEN - /// INSERT (ID, NAME) - /// VALUES (FOO_IMP.ID, UPPER(FOO_IMP.NAME)) - /// -- insert predicate - /// WHERE NOT FOO_IMP.NAME like '%.IGNORE' - /// ``` - /// - /// By default, the additional predicate support is enabled. - /// - /// See also [Dialect::supports_merge_update_predicate] and - /// [Dialect::supports_merge_update_delete_predicate]. - fn supports_merge_insert_predicate(&self) -> bool { - true - } - - /// Indicates the supports of UPDATE predicates in MERGE - /// statements. Example: - /// - /// ```sql - /// MERGE INTO FOO - /// USING FOO_IMPORT - /// ON (FOO.ID = FOO_IMPORT.ID) - /// WHEN MATCHED THEN - /// UPDATE SET FOO.NAME = FOO_IMPORT.NAME - /// -- update predicate - /// WHERE FOO.NAME <> 'pete' - /// ``` - /// - /// By default, the additional predicate is enabled. - /// - /// See also [Dialect::supports_merge_insert_predicate] and - /// [Dialect::supports_merge_update_delete_predicate]. - fn supports_merge_update_predicate(&self) -> bool { - true - } - - /// Indicates the supports of UPDATE ... DELETEs and associated predicates - /// in MERGE statements. Example: - /// - /// ```sql - /// MERGE INTO FOO - /// USING FOO_IMPORT - /// ON (FOO.ID = FOO_IMPORT.ID) - /// WHEN MATCHED THEN - /// UPDATE SET FOO.NAME = FOO_IMPORT.NAME - /// -- update delete with predicate - /// DELETE WHERE UPPER(FOO.NAME) == FOO.NAME - /// ``` - /// - /// By default, the support for the `UPDATE ... DELETE` and its associated - /// predicate is enabled. - /// - /// See also [Dialect::supports_merge_insert_predicate] and - /// [Dialect::supports_merge_update_predicate]. - fn supports_merge_update_delete_predicate(&self) -> bool { - true - } - /// Dialect-specific infix parser override /// /// This method is called to parse the next infix expression. diff --git a/src/dialect/mssql.rs b/src/dialect/mssql.rs index 04c43e39c..e1902b389 100644 --- a/src/dialect/mssql.rs +++ b/src/dialect/mssql.rs @@ -123,26 +123,6 @@ impl Dialect for MsSqlDialect { true } - /// Set - fn supports_merge_insert_predicate(&self) -> bool { - false - } - - /// Set - fn supports_merge_insert_qualified_columns(&self) -> bool { - false - } - - /// Set - fn supports_merge_update_delete_predicate(&self) -> bool { - false - } - - /// Set - fn supports_merge_update_predicate(&self) -> bool { - false - } - /// See fn get_reserved_grantees_types(&self) -> &[GranteesType] { &[GranteesType::Public] diff --git a/src/dialect/postgresql.rs b/src/dialect/postgresql.rs index eba54afe4..e861cc515 100644 --- a/src/dialect/postgresql.rs +++ b/src/dialect/postgresql.rs @@ -280,19 +280,4 @@ impl Dialect for PostgreSqlDialect { fn supports_interval_options(&self) -> bool { true } - - /// See - fn supports_merge_insert_predicate(&self) -> bool { - false - } - - /// See - fn supports_merge_update_delete_predicate(&self) -> bool { - false - } - - /// See - fn supports_merge_update_predicate(&self) -> bool { - false - } } diff --git a/src/parser/merge.rs b/src/parser/merge.rs index f544bc3c4..636ebd6c9 100644 --- a/src/parser/merge.rs +++ b/src/parser/merge.rs @@ -18,7 +18,7 @@ use alloc::{boxed::Box, format, string::ToString, vec, vec::Vec}; use crate::{ ast::{ Merge, MergeAction, MergeClause, MergeClauseKind, MergeInsertExpr, MergeInsertKind, - MergeUpdateExpr, ObjectName, ObjectNamePart, OutputClause, SetExpr, Statement, + MergeUpdateExpr, ObjectName, OutputClause, SetExpr, Statement, }, dialect::{BigQueryDialect, GenericDialect, MySqlDialect}, keywords::Keyword, @@ -116,16 +116,12 @@ impl Parser<'_> { let update_token = self.get_current_token().clone(); self.expect_keyword_is(Keyword::SET)?; let assignments = self.parse_comma_separated(Parser::parse_assignment)?; - let update_predicate = if self.dialect.supports_merge_update_predicate() - && self.parse_keyword(Keyword::WHERE) - { + let update_predicate = if self.parse_keyword(Keyword::WHERE) { Some(self.parse_expr()?) } else { None }; - let delete_predicate = if self.dialect.supports_merge_update_delete_predicate() - && self.parse_keyword(Keyword::DELETE) - { + let delete_predicate = if self.parse_keyword(Keyword::DELETE) { let _ = self.expect_keyword(Keyword::WHERE)?; Some(self.parse_expr()?) } else { @@ -179,9 +175,7 @@ impl Parser<'_> { let values = self.parse_values(is_mysql, false)?; (MergeInsertKind::Values(values), values_token) }; - let insert_predicate = if self.dialect.supports_merge_insert_predicate() - && self.parse_keyword(Keyword::WHERE) - { + let insert_predicate = if self.parse_keyword(Keyword::WHERE) { Some(self.parse_expr()?) } else { None @@ -216,25 +210,7 @@ impl Parser<'_> { &mut self, allow_empty: bool, ) -> Result, ParserError> { - if self.dialect.supports_merge_insert_qualified_columns() { - self.parse_parenthesized_qualified_column_list(IsOptional::Optional, allow_empty) - } else { - self.parse_parenthesized_column_list_as_object_names(IsOptional::Optional, allow_empty) - } - } - - /// Just like [Parser::parse_parenthesized_column_list] parses a - /// parenthesized list of (simple) column names but returns them as object - /// names. - fn parse_parenthesized_column_list_as_object_names( - &mut self, - optional: IsOptional, - allow_empty: bool, - ) -> Result, ParserError> { - self.parse_parenthesized_column_list_inner(optional, allow_empty, |p| { - p.parse_identifier() - .map(|ident| ObjectName(vec![ObjectNamePart::Identifier(ident)])) - }) + self.parse_parenthesized_qualified_column_list(IsOptional::Optional, allow_empty) } fn parse_output( diff --git a/tests/sqlparser_common.rs b/tests/sqlparser_common.rs index 67242a874..8c93ead2a 100644 --- a/tests/sqlparser_common.rs +++ b/tests/sqlparser_common.rs @@ -1637,14 +1637,6 @@ fn ms_and_generic() -> TestedDialects { TestedDialects::new(vec![Box::new(MsSqlDialect {}), Box::new(GenericDialect {})]) } -fn only_ms() -> TestedDialects { - TestedDialects::new(vec![Box::new(MsSqlDialect {})]) -} - -fn only_generic() -> TestedDialects { - TestedDialects::new(vec![Box::new(GenericDialect {})]) -} - #[test] fn parse_json_ops_without_colon() { use self::BinaryOperator::*; @@ -10097,7 +10089,7 @@ WHEN NOT MATCHED THEN \ INSERT (ID, NAME) \ VALUES (FOO_IMPORT.ID, UPPER(FOO_IMPORT.NAME)) \ WHERE NOT FOO_IMPORT.NAME LIKE '%.DO_NOT_INSERT'"; - only_generic().verified_stmt(sql); + all_dialects().verified_stmt(sql); } #[test] @@ -10117,7 +10109,7 @@ MERGE INTO FOO USING FOO_IMPORT ON (FOO.ID = FOO_IMPORT.ID) \ WHEN NOT MATCHED THEN \ INSERT (FOO.ID, FOO.NAME) \ VALUES (1, 'abc')"; - pg_and_generic().verified_stmt(sql); + all_dialects().verified_stmt(sql); } #[test] @@ -10127,24 +10119,7 @@ MERGE INTO PLAYGROUND.FOO USING FOO_IMPORT ON (PLAYGROUND.FOO.ID = FOO_IMPORT.ID WHEN NOT MATCHED THEN \ INSERT (PLAYGROUND.FOO.ID, PLAYGROUND.FOO.NAME) \ VALUES (1, 'abc')"; - pg_and_generic().verified_stmt(sql); -} - -#[test] -fn test_merge_insert_with_qualified_columns_not_supported() { - let sql = "\ -MERGE INTO FOO USING FOO_IMPORT ON (FOO.ID = FOO_IMPORT.ID) \ -WHEN NOT MATCHED THEN \ -INSERT (FOO.ID, FOO.NAME) \ -VALUES (1, 'abc')"; - assert!(only_ms().parse_sql_statements(sql).is_err()); - - let sql = "\ -MERGE INTO PLAYGROUND.FOO USING FOO_IMPORT ON (PLAYGROUND.FOO.ID = FOO_IMPORT.ID) \ -WHEN NOT MATCHED THEN \ -INSERT (PLAYGROUND.FOO.ID, PLAYGROUND.FOO.NAME) \ -VALUES (1, 'abc')"; - assert!(only_ms().parse_sql_statements(sql).is_err()); + all_dialects().verified_stmt(sql); } #[test]