diff --git a/src/ast/mod.rs b/src/ast/mod.rs index 1e430171e..551e433d1 100644 --- a/src/ast/mod.rs +++ b/src/ast/mod.rs @@ -792,6 +792,247 @@ impl fmt::Display for CaseWhen { } } +/// Modes accepted by XML parsing/serialization clauses. +#[derive(Debug, Clone, Copy, PartialEq, PartialOrd, Eq, Ord, Hash)] +#[cfg_attr(feature = "serde", derive(Serialize, Deserialize))] +#[cfg_attr(feature = "visitor", derive(Visit, VisitMut))] +pub enum XmlParseMode { + /// `CONTENT` + Content, + /// `DOCUMENT` + Document, +} + +impl fmt::Display for XmlParseMode { + fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { + match self { + XmlParseMode::Content => write!(f, "CONTENT"), + XmlParseMode::Document => write!(f, "DOCUMENT"), + } + } +} + +/// A named XML argument (for XMLFOREST entries). +#[derive(Debug, Clone, PartialEq, PartialOrd, Eq, Ord, Hash)] +#[cfg_attr(feature = "serde", derive(Serialize, Deserialize))] +#[cfg_attr(feature = "visitor", derive(Visit, VisitMut))] +pub struct XmlNamedExpr { + /// Value expression. + pub expr: Expr, + /// Optional explicit XML name. + pub alias: Option, +} + +impl fmt::Display for XmlNamedExpr { + fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { + write!(f, "{}", self.expr)?; + if let Some(alias) = &self.alias { + write!(f, " AS {alias}")?; + } + Ok(()) + } +} + +/// A single XML attribute expression, optionally named. +#[derive(Debug, Clone, PartialEq, PartialOrd, Eq, Ord, Hash)] +#[cfg_attr(feature = "serde", derive(Serialize, Deserialize))] +#[cfg_attr(feature = "visitor", derive(Visit, VisitMut))] +pub struct XmlAttribute { + /// Attribute value expression. + pub expr: Expr, + /// Optional explicit attribute name. + pub alias: Option, +} + +impl fmt::Display for XmlAttribute { + fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { + write!(f, "{}", self.expr)?; + if let Some(alias) = &self.alias { + write!(f, " AS {alias}")?; + } + Ok(()) + } +} + +/// `XMLELEMENT(NAME ..., [XMLATTRIBUTES(...)], [content ...])`. +#[derive(Debug, Clone, PartialEq, PartialOrd, Eq, Ord, Hash)] +#[cfg_attr(feature = "serde", derive(Serialize, Deserialize))] +#[cfg_attr(feature = "visitor", derive(Visit, VisitMut))] +pub struct XmlElementExpr { + /// Element name. + pub name: Ident, + /// Optional XML attributes. + pub attributes: Option>, + /// Optional content expressions. + pub content: Vec, +} + +impl fmt::Display for XmlElementExpr { + fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { + write!(f, "XMLELEMENT(NAME {}", self.name)?; + if let Some(attrs) = &self.attributes { + write!(f, ", XMLATTRIBUTES({})", display_comma_separated(attrs))?; + } + if !self.content.is_empty() { + write!(f, ", {}", display_comma_separated(&self.content))?; + } + write!(f, ")") + } +} + +/// `XMLPARSE( )`. +#[derive(Debug, Clone, PartialEq, PartialOrd, Eq, Ord, Hash)] +#[cfg_attr(feature = "serde", derive(Serialize, Deserialize))] +#[cfg_attr(feature = "visitor", derive(Visit, VisitMut))] +pub struct XmlParseExpr { + /// Parsing mode. + pub mode: XmlParseMode, + /// Expression to parse as XML. + pub expr: Box, +} + +impl fmt::Display for XmlParseExpr { + fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { + write!(f, "XMLPARSE({} {})", self.mode, self.expr) + } +} + +/// `XMLPI(NAME ..., [content])`. +#[derive(Debug, Clone, PartialEq, PartialOrd, Eq, Ord, Hash)] +#[cfg_attr(feature = "serde", derive(Serialize, Deserialize))] +#[cfg_attr(feature = "visitor", derive(Visit, VisitMut))] +pub struct XmlPiExpr { + /// Processing instruction target name. + pub name: Ident, + /// Optional processing instruction content. + pub content: Option>, +} + +impl fmt::Display for XmlPiExpr { + fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { + write!(f, "XMLPI(NAME {}", self.name)?; + if let Some(content) = &self.content { + write!(f, ", {content}")?; + } + write!(f, ")") + } +} + +/// Version argument in XMLROOT. +#[derive(Debug, Clone, PartialEq, PartialOrd, Eq, Ord, Hash)] +#[cfg_attr(feature = "serde", derive(Serialize, Deserialize))] +#[cfg_attr(feature = "visitor", derive(Visit, VisitMut))] +pub enum XmlRootVersion { + /// `VERSION NO VALUE` + NoValue, + /// `VERSION ` + Value(Box), +} + +impl fmt::Display for XmlRootVersion { + fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { + match self { + XmlRootVersion::NoValue => write!(f, "NO VALUE"), + XmlRootVersion::Value(expr) => write!(f, "{expr}"), + } + } +} + +/// Standalone option in XMLROOT. +#[derive(Debug, Clone, Copy, PartialEq, PartialOrd, Eq, Ord, Hash)] +#[cfg_attr(feature = "serde", derive(Serialize, Deserialize))] +#[cfg_attr(feature = "visitor", derive(Visit, VisitMut))] +pub enum XmlStandalone { + /// `YES` + Yes, + /// `NO` + No, + /// `NO VALUE` + NoValue, +} + +impl fmt::Display for XmlStandalone { + fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { + match self { + XmlStandalone::Yes => write!(f, "YES"), + XmlStandalone::No => write!(f, "NO"), + XmlStandalone::NoValue => write!(f, "NO VALUE"), + } + } +} + +/// `XMLROOT(...)` expression. +#[derive(Debug, Clone, PartialEq, PartialOrd, Eq, Ord, Hash)] +#[cfg_attr(feature = "serde", derive(Serialize, Deserialize))] +#[cfg_attr(feature = "visitor", derive(Visit, VisitMut))] +pub struct XmlRootExpr { + /// XML expression to rewrite. + pub expr: Box, + /// Required version argument. + pub version: XmlRootVersion, + /// Optional standalone option. + pub standalone: Option, +} + +impl fmt::Display for XmlRootExpr { + fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { + write!(f, "XMLROOT({}, VERSION {}", self.expr, self.version)?; + if let Some(standalone) = &self.standalone { + write!(f, ", STANDALONE {standalone}")?; + } + write!(f, ")") + } +} + +/// Optional indentation behavior in XMLSERIALIZE. +#[derive(Debug, Clone, Copy, PartialEq, PartialOrd, Eq, Ord, Hash)] +#[cfg_attr(feature = "serde", derive(Serialize, Deserialize))] +#[cfg_attr(feature = "visitor", derive(Visit, VisitMut))] +pub enum XmlIndentOption { + /// `INDENT` + Indent, + /// `NO INDENT` + NoIndent, +} + +impl fmt::Display for XmlIndentOption { + fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { + match self { + XmlIndentOption::Indent => write!(f, "INDENT"), + XmlIndentOption::NoIndent => write!(f, "NO INDENT"), + } + } +} + +/// `XMLSERIALIZE( AS [indent-option])`. +#[derive(Debug, Clone, PartialEq, PartialOrd, Eq, Ord, Hash)] +#[cfg_attr(feature = "serde", derive(Serialize, Deserialize))] +#[cfg_attr(feature = "visitor", derive(Visit, VisitMut))] +pub struct XmlSerializeExpr { + /// Input XML mode. + pub mode: XmlParseMode, + /// Expression to serialize. + pub expr: Box, + /// Output SQL data type. + pub data_type: DataType, + /// Optional indentation behavior. + pub indent: Option, +} + +impl fmt::Display for XmlSerializeExpr { + fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { + write!( + f, + "XMLSERIALIZE({} {} AS {}", + self.mode, self.expr, self.data_type + )?; + if let Some(indent) = self.indent { + write!(f, " {indent}")?; + } + write!(f, ")") + } +} + /// An SQL expression of any type. /// /// # Semantics / Type Checking @@ -1181,6 +1422,20 @@ pub enum Expr { /// This can represent ANSI SQL `DATE`, `TIME`, and `TIMESTAMP` literals (such as `DATE '2020-01-01'`), /// as well as constants of other types (a non-standard PostgreSQL extension). TypedString(TypedString), + /// XML concatenation expression: `XMLCONCAT(expr [, ...])`. + XmlConcat(Vec), + /// XML element constructor: `XMLELEMENT(NAME ... [, XMLATTRIBUTES(...)] [, content ...])`. + XmlElement(XmlElementExpr), + /// XML forest constructor: `XMLFOREST(expr [AS name] [, ...])`. + XmlForest(Vec), + /// XML parse expression: `XMLPARSE(CONTENT|DOCUMENT expr)`. + XmlParse(XmlParseExpr), + /// XML processing instruction constructor: `XMLPI(NAME target [, content])`. + XmlPi(XmlPiExpr), + /// XML root mutator: `XMLROOT(expr, VERSION ... [, STANDALONE ...])`. + XmlRoot(XmlRootExpr), + /// XML serialization expression: `XMLSERIALIZE(CONTENT|DOCUMENT expr AS type [INDENT|NO INDENT])`. + XmlSerialize(XmlSerializeExpr), /// Scalar function call e.g. `LEFT(foo, 5)` Function(Function), /// `CASE [] WHEN THEN ... [ELSE ] END` @@ -1970,6 +2225,15 @@ impl fmt::Display for Expr { Expr::Value(v) => write!(f, "{v}"), Expr::Prefixed { prefix, value } => write!(f, "{prefix} {value}"), Expr::TypedString(ts) => ts.fmt(f), + Expr::XmlConcat(exprs) => write!(f, "XMLCONCAT({})", display_comma_separated(exprs)), + Expr::XmlElement(xml_element) => write!(f, "{xml_element}"), + Expr::XmlForest(items) => { + write!(f, "XMLFOREST({})", display_comma_separated(items)) + } + Expr::XmlParse(xml_parse) => write!(f, "{xml_parse}"), + Expr::XmlPi(xml_pi) => write!(f, "{xml_pi}"), + Expr::XmlRoot(xml_root) => write!(f, "{xml_root}"), + Expr::XmlSerialize(xml_serialize) => write!(f, "{xml_serialize}"), Expr::Function(fun) => fun.fmt(f), Expr::Case { case_token: _, diff --git a/src/ast/spans.rs b/src/ast/spans.rs index 0b95c3ed7..3ca6c1c73 100644 --- a/src/ast/spans.rs +++ b/src/ast/spans.rs @@ -1517,6 +1517,34 @@ impl Spanned for Expr { Expr::Nested(expr) => expr.span(), Expr::Value(value) => value.span(), Expr::TypedString(TypedString { value, .. }) => value.span(), + Expr::XmlConcat(exprs) => union_spans(exprs.iter().map(|expr| expr.span())), + Expr::XmlElement(xml_element) => union_spans( + iter::once(xml_element.name.span) + .chain( + xml_element + .attributes + .as_ref() + .into_iter() + .flatten() + .flat_map(|attr| { + iter::once(attr.expr.span()) + .chain(attr.alias.as_ref().map(|ident| ident.span)) + }), + ) + .chain(xml_element.content.iter().map(|expr| expr.span())), + ), + Expr::XmlForest(items) => union_spans(items.iter().flat_map(|item| { + iter::once(item.expr.span()).chain(item.alias.as_ref().map(|ident| ident.span)) + })), + Expr::XmlParse(xml_parse) => xml_parse.expr.span(), + Expr::XmlPi(xml_pi) => union_spans( + iter::once(xml_pi.name.span).chain(xml_pi.content.as_ref().map(|expr| expr.span())), + ), + Expr::XmlRoot(xml_root) => xml_root.expr.span().union_opt(&match &xml_root.version { + crate::ast::XmlRootVersion::NoValue => None, + crate::ast::XmlRootVersion::Value(expr) => Some(expr.span()), + }), + Expr::XmlSerialize(xml_serialize) => xml_serialize.expr.span(), Expr::Function(function) => function.span(), Expr::GroupingSets(vec) => { union_spans(vec.iter().flat_map(|i| i.iter().map(|k| k.span()))) diff --git a/src/parser/mod.rs b/src/parser/mod.rs index bea566bbe..7befbc906 100644 --- a/src/parser/mod.rs +++ b/src/parser/mod.rs @@ -508,10 +508,10 @@ impl<'a> Parser<'a> { Token::EOF => break, // end of statement - Token::Word(word) => { - if expecting_statement_delimiter && word.keyword == Keyword::END { - break; - } + Token::Word(word) + if expecting_statement_delimiter && word.keyword == Keyword::END => + { + break; } _ => {} } @@ -1298,41 +1298,40 @@ impl<'a> Parser<'a> { let next_token = self.next_token(); match next_token.token { - t @ (Token::Word(_) | Token::SingleQuotedString(_)) => { - if self.peek_token_ref().token == Token::Period { - let mut id_parts: Vec = vec![match t { - Token::Word(w) => w.into_ident(next_token.span), - Token::SingleQuotedString(s) => Ident::with_quote('\'', s), - _ => { - return Err(ParserError::ParserError( - "Internal parser error: unexpected token type".to_string(), - )) + t @ (Token::Word(_) | Token::SingleQuotedString(_)) + if self.peek_token_ref().token == Token::Period => + { + let mut id_parts: Vec = vec![match t { + Token::Word(w) => w.into_ident(next_token.span), + Token::SingleQuotedString(s) => Ident::with_quote('\'', s), + _ => { + return Err(ParserError::ParserError( + "Internal parser error: unexpected token type".to_string(), + )) + } + }]; + + while self.consume_token(&Token::Period) { + let next_token = self.next_token(); + match next_token.token { + Token::Word(w) => id_parts.push(w.into_ident(next_token.span)), + Token::SingleQuotedString(s) => { + // SQLite has single-quoted identifiers + id_parts.push(Ident::with_quote('\'', s)) } - }]; - - while self.consume_token(&Token::Period) { - let next_token = self.next_token(); - match next_token.token { - Token::Word(w) => id_parts.push(w.into_ident(next_token.span)), - Token::SingleQuotedString(s) => { - // SQLite has single-quoted identifiers - id_parts.push(Ident::with_quote('\'', s)) - } - Token::Placeholder(s) => { - // Snowflake uses $1, $2, etc. for positional column references - // in staged data queries like: SELECT t.$1 FROM @stage t - id_parts.push(Ident::new(s)) - } - Token::Mul => { - return Ok(Expr::QualifiedWildcard( - ObjectName::from(id_parts), - AttachedToken(next_token), - )); - } - _ => { - return self - .expected("an identifier or a '*' after '.'", next_token); - } + Token::Placeholder(s) => { + // Snowflake uses $1, $2, etc. for positional column references + // in staged data queries like: SELECT t.$1 FROM @stage t + id_parts.push(Ident::new(s)) + } + Token::Mul => { + return Ok(Expr::QualifiedWildcard( + ObjectName::from(id_parts), + AttachedToken(next_token), + )); + } + _ => { + return self.expected("an identifier or a '*' after '.'", next_token); } } } @@ -1716,6 +1715,17 @@ impl<'a> Parser<'a> { // name, resulting in `NOT 'a'` being recognized as a `TypedString` instead of // an unary negation `NOT ('a' LIKE 'b')`. To solve this, we don't accept the // `type 'string'` syntax for the custom data types at all. + DataType::Custom(type_name, modifiers) + if dialect_of!(self is PostgreSqlDialect | GenericDialect) + && modifiers.is_empty() + && Self::is_simple_unquoted_object_name(&type_name, "xml") => + { + Ok(Expr::TypedString(TypedString { + data_type: DataType::Custom(type_name, modifiers), + value: parser.parse_value()?, + uses_odbc_syntax: false, + })) + } DataType::Custom(..) => parser_err!("dummy", loc), // MySQL supports using the `BINARY` keyword as a cast to binary type. DataType::Binary(..) if self.dialect.supports_binary_kw_as_cast() => { @@ -2399,8 +2409,256 @@ impl<'a> Parser<'a> { }) } + fn is_simple_unquoted_object_name(name: &ObjectName, expected: &str) -> bool { + name.0.len() == 1 + && matches!( + &name.0[0], + ObjectNamePart::Identifier(Ident { + value, + quote_style: None, + .. + }) if value.eq_ignore_ascii_case(expected) + ) + } + + fn parse_unquoted_word_value(&mut self, expected: &str) -> bool { + if let Token::Word(word) = &self.peek_token_ref().token { + if word.quote_style.is_none() && word.value.eq_ignore_ascii_case(expected) { + let _ = self.next_token(); + return true; + } + } + false + } + + fn expect_unquoted_word_value(&mut self, expected: &str) -> Result<(), ParserError> { + if self.parse_unquoted_word_value(expected) { + Ok(()) + } else { + self.expected_ref(expected, self.peek_token_ref()) + } + } + + fn peek_unquoted_word_with_lparen(&self, expected: &str) -> bool { + matches!( + (&self.peek_token_ref().token, &self.peek_nth_token_ref(1).token), + ( + Token::Word(Word { + value, + quote_style: None, + .. + }), + Token::LParen + ) if value.eq_ignore_ascii_case(expected) + ) + } + + fn parse_xml_parse_mode(&mut self) -> Result { + if self.parse_unquoted_word_value("content") { + Ok(XmlParseMode::Content) + } else if self.parse_unquoted_word_value("document") { + Ok(XmlParseMode::Document) + } else { + self.expected_ref("CONTENT or DOCUMENT", self.peek_token_ref()) + } + } + + fn parse_xml_standalone(&mut self) -> Result { + if self.parse_unquoted_word_value("yes") { + Ok(XmlStandalone::Yes) + } else if self.parse_keyword(Keyword::NO) { + if self.parse_keyword(Keyword::VALUE) { + Ok(XmlStandalone::NoValue) + } else { + Ok(XmlStandalone::No) + } + } else { + self.expected_ref("YES, NO, or NO VALUE", self.peek_token_ref()) + } + } + + fn parse_xml_named_expr(&mut self) -> Result { + let expr = self.parse_expr()?; + let alias = if self.parse_keyword(Keyword::AS) { + Some(self.parse_identifier()?) + } else { + None + }; + Ok(XmlNamedExpr { expr, alias }) + } + + fn parse_xml_attribute(&mut self) -> Result { + let expr = self.parse_expr()?; + let alias = if self.parse_keyword(Keyword::AS) { + Some(self.parse_identifier()?) + } else { + None + }; + Ok(XmlAttribute { expr, alias }) + } + + fn parse_xmlattributes_clause(&mut self) -> Result, ParserError> { + self.expect_unquoted_word_value("xmlattributes")?; + self.expect_token(&Token::LParen)?; + let attributes = self.parse_comma_separated(Parser::parse_xml_attribute)?; + self.expect_token(&Token::RParen)?; + Ok(attributes) + } + + fn parse_xmlconcat_expr(&mut self) -> Result { + self.expect_token(&Token::LParen)?; + let exprs = self.parse_comma_separated(Parser::parse_expr)?; + self.expect_token(&Token::RParen)?; + Ok(Expr::XmlConcat(exprs)) + } + + fn parse_xmlelement_expr(&mut self) -> Result { + self.expect_token(&Token::LParen)?; + self.expect_keyword_is(Keyword::NAME)?; + let name = self.parse_identifier()?; + + let mut attributes = None; + let mut content = vec![]; + + if self.consume_token(&Token::Comma) { + if self.peek_unquoted_word_with_lparen("xmlattributes") { + attributes = Some(self.parse_xmlattributes_clause()?); + if self.consume_token(&Token::Comma) { + content = self.parse_comma_separated(Parser::parse_expr)?; + } + } else { + content = self.parse_comma_separated(Parser::parse_expr)?; + } + } + + self.expect_token(&Token::RParen)?; + Ok(Expr::XmlElement(XmlElementExpr { + name, + attributes, + content, + })) + } + + fn parse_xmlforest_expr(&mut self) -> Result { + self.expect_token(&Token::LParen)?; + let entries = self.parse_comma_separated(Parser::parse_xml_named_expr)?; + self.expect_token(&Token::RParen)?; + Ok(Expr::XmlForest(entries)) + } + + fn parse_xmlparse_expr(&mut self) -> Result { + self.expect_token(&Token::LParen)?; + let mode = self.parse_xml_parse_mode()?; + let expr = Box::new(self.parse_expr()?); + self.expect_token(&Token::RParen)?; + Ok(Expr::XmlParse(XmlParseExpr { mode, expr })) + } + + fn parse_xmlpi_expr(&mut self) -> Result { + self.expect_token(&Token::LParen)?; + self.expect_keyword_is(Keyword::NAME)?; + let name = self.parse_identifier()?; + let content = if self.consume_token(&Token::Comma) { + Some(Box::new(self.parse_expr()?)) + } else { + None + }; + self.expect_token(&Token::RParen)?; + Ok(Expr::XmlPi(XmlPiExpr { name, content })) + } + + fn parse_xmlroot_expr(&mut self) -> Result { + self.expect_token(&Token::LParen)?; + let expr = Box::new(self.parse_expr()?); + self.expect_token(&Token::Comma)?; + self.expect_keyword_is(Keyword::VERSION)?; + let version = if self.parse_keywords(&[Keyword::NO, Keyword::VALUE]) { + XmlRootVersion::NoValue + } else { + XmlRootVersion::Value(Box::new(self.parse_expr()?)) + }; + let standalone = if self.consume_token(&Token::Comma) { + self.expect_unquoted_word_value("standalone")?; + Some(self.parse_xml_standalone()?) + } else { + None + }; + self.expect_token(&Token::RParen)?; + Ok(Expr::XmlRoot(XmlRootExpr { + expr, + version, + standalone, + })) + } + + fn parse_xmlserialize_expr(&mut self) -> Result { + self.expect_token(&Token::LParen)?; + let mode = self.parse_xml_parse_mode()?; + let expr = Box::new(self.parse_expr()?); + self.expect_keyword_is(Keyword::AS)?; + let data_type = self.parse_data_type()?; + let indent = if self.parse_unquoted_word_value("indent") { + Some(XmlIndentOption::Indent) + } else if self.parse_keyword(Keyword::NO) { + self.expect_unquoted_word_value("indent")?; + Some(XmlIndentOption::NoIndent) + } else { + None + }; + self.expect_token(&Token::RParen)?; + Ok(Expr::XmlSerialize(XmlSerializeExpr { + mode, + expr, + data_type, + indent, + })) + } + + fn maybe_parse_xml_function(&mut self, name: &ObjectName) -> Result, ParserError> { + if !dialect_of!(self is PostgreSqlDialect | GenericDialect) + || name.0.len() != 1 + || !matches!( + &name.0[0], + ObjectNamePart::Identifier(Ident { + quote_style: None, + .. + }) + ) + { + return Ok(None); + } + + let function_name = match &name.0[0] { + ObjectNamePart::Identifier(ident) => ident.value.as_str(), + ObjectNamePart::Function(_) => return Ok(None), + }; + + let expr = if function_name.eq_ignore_ascii_case("xmlconcat") { + Some(self.parse_xmlconcat_expr()?) + } else if function_name.eq_ignore_ascii_case("xmlelement") { + Some(self.parse_xmlelement_expr()?) + } else if function_name.eq_ignore_ascii_case("xmlforest") { + Some(self.parse_xmlforest_expr()?) + } else if function_name.eq_ignore_ascii_case("xmlparse") { + Some(self.parse_xmlparse_expr()?) + } else if function_name.eq_ignore_ascii_case("xmlpi") { + Some(self.parse_xmlpi_expr()?) + } else if function_name.eq_ignore_ascii_case("xmlroot") { + Some(self.parse_xmlroot_expr()?) + } else if function_name.eq_ignore_ascii_case("xmlserialize") { + Some(self.parse_xmlserialize_expr()?) + } else { + None + }; + + Ok(expr) + } + /// Parse a function call expression named by `name` and return it as an `Expr`. pub fn parse_function(&mut self, name: ObjectName) -> Result { + if let Some(expr) = self.maybe_parse_xml_function(&name)? { + return Ok(expr); + } self.parse_function_call(name).map(Expr::Function) } @@ -4990,10 +5248,10 @@ impl<'a> Parser<'a> { loop { match &self.peek_nth_token_ref(0).token { Token::EOF => break, - Token::Word(w) => { - if w.quote_style.is_none() && terminal_keywords.contains(&w.keyword) { - break; - } + Token::Word(w) + if w.quote_style.is_none() && terminal_keywords.contains(&w.keyword) => + { + break; } _ => {} } @@ -8173,71 +8431,68 @@ impl<'a> Parser<'a> { Keyword::LINES, Keyword::NULL, ]) { - Some(Keyword::FIELDS) => { - if self.parse_keywords(&[Keyword::TERMINATED, Keyword::BY]) { + Some(Keyword::FIELDS) + if self.parse_keywords(&[Keyword::TERMINATED, Keyword::BY]) => + { + row_delimiters.push(HiveRowDelimiter { + delimiter: HiveDelimiter::FieldsTerminatedBy, + char: self.parse_identifier()?, + }); + + if self.parse_keywords(&[Keyword::ESCAPED, Keyword::BY]) { row_delimiters.push(HiveRowDelimiter { - delimiter: HiveDelimiter::FieldsTerminatedBy, + delimiter: HiveDelimiter::FieldsEscapedBy, char: self.parse_identifier()?, }); - - if self.parse_keywords(&[Keyword::ESCAPED, Keyword::BY]) { - row_delimiters.push(HiveRowDelimiter { - delimiter: HiveDelimiter::FieldsEscapedBy, - char: self.parse_identifier()?, - }); - } - } else { - break; } } - Some(Keyword::COLLECTION) => { + Some(Keyword::COLLECTION) if self.parse_keywords(&[ Keyword::ITEMS, Keyword::TERMINATED, Keyword::BY, - ]) { - row_delimiters.push(HiveRowDelimiter { - delimiter: HiveDelimiter::CollectionItemsTerminatedBy, - char: self.parse_identifier()?, - }); - } else { - break; - } + ]) => + { + row_delimiters.push(HiveRowDelimiter { + delimiter: HiveDelimiter::CollectionItemsTerminatedBy, + char: self.parse_identifier()?, + }); } - Some(Keyword::MAP) => { + Some(Keyword::MAP) if self.parse_keywords(&[ Keyword::KEYS, Keyword::TERMINATED, Keyword::BY, - ]) { - row_delimiters.push(HiveRowDelimiter { - delimiter: HiveDelimiter::MapKeysTerminatedBy, - char: self.parse_identifier()?, - }); - } else { - break; - } + ]) => + { + row_delimiters.push(HiveRowDelimiter { + delimiter: HiveDelimiter::MapKeysTerminatedBy, + char: self.parse_identifier()?, + }); } - Some(Keyword::LINES) => { - if self.parse_keywords(&[Keyword::TERMINATED, Keyword::BY]) { - row_delimiters.push(HiveRowDelimiter { - delimiter: HiveDelimiter::LinesTerminatedBy, - char: self.parse_identifier()?, - }); - } else { - break; - } + Some(Keyword::LINES) + if self.parse_keywords(&[Keyword::TERMINATED, Keyword::BY]) => + { + row_delimiters.push(HiveRowDelimiter { + delimiter: HiveDelimiter::LinesTerminatedBy, + char: self.parse_identifier()?, + }); } - Some(Keyword::NULL) => { - if self.parse_keywords(&[Keyword::DEFINED, Keyword::AS]) { - row_delimiters.push(HiveRowDelimiter { - delimiter: HiveDelimiter::NullDefinedAs, - char: self.parse_identifier()?, - }); - } else { - break; - } + Some(Keyword::NULL) + if self.parse_keywords(&[Keyword::DEFINED, Keyword::AS]) => + { + row_delimiters.push(HiveRowDelimiter { + delimiter: HiveDelimiter::NullDefinedAs, + char: self.parse_identifier()?, + }); } + Some( + Keyword::FIELDS + | Keyword::COLLECTION + | Keyword::MAP + | Keyword::LINES + | Keyword::NULL, + ) => break, _ => { break; } diff --git a/tests/sqlparser_common.rs b/tests/sqlparser_common.rs index 982bf1088..3e17e541a 100644 --- a/tests/sqlparser_common.rs +++ b/tests/sqlparser_common.rs @@ -6816,6 +6816,205 @@ fn parse_typed_strings() { } } +#[test] +fn parse_generic_xml_special_expressions() { + let generic = TestedDialects::new(vec![Box::new(GenericDialect {})]); + + let select = generic + .verified_only_select_with_canonical("SELECT xmlconcat(1, 2)", "SELECT XMLCONCAT(1, 2)"); + match &select.projection[0] { + SelectItem::UnnamedExpr(Expr::XmlConcat(items)) => assert_eq!(items.len(), 2), + item => panic!("expected XmlConcat expression, got {item:?}"), + } + + let select = generic.verified_only_select_with_canonical( + "SELECT xmlelement(name foo, 'bar')", + "SELECT XMLELEMENT(NAME foo, 'bar')", + ); + match &select.projection[0] { + SelectItem::UnnamedExpr(Expr::XmlElement(XmlElementExpr { + name, + attributes, + content, + })) => { + assert_eq!(name.value, "foo"); + assert!(attributes.is_none()); + assert_eq!(content.len(), 1); + } + item => panic!("expected XmlElement expression, got {item:?}"), + } + + let select = generic.verified_only_select_with_canonical( + "SELECT xmlforest(1 as one, 2)", + "SELECT XMLFOREST(1 AS one, 2)", + ); + match &select.projection[0] { + SelectItem::UnnamedExpr(Expr::XmlForest(items)) => { + assert_eq!(items.len(), 2); + assert_eq!(items[0].alias.as_ref().unwrap().value, "one"); + assert!(items[1].alias.is_none()); + } + item => panic!("expected XmlForest expression, got {item:?}"), + } + + let select = generic.verified_only_select_with_canonical( + "SELECT xmlparse(content '')", + "SELECT XMLPARSE(CONTENT '')", + ); + match &select.projection[0] { + SelectItem::UnnamedExpr(Expr::XmlParse(XmlParseExpr { mode, .. })) => { + assert_eq!(*mode, XmlParseMode::Content); + } + item => panic!("expected XmlParse expression, got {item:?}"), + } + + let select = generic.verified_only_select_with_canonical( + "SELECT xmlpi(name foo, 'bar')", + "SELECT XMLPI(NAME foo, 'bar')", + ); + match &select.projection[0] { + SelectItem::UnnamedExpr(Expr::XmlPi(XmlPiExpr { name, content })) => { + assert_eq!(name.value, "foo"); + assert!(content.is_some()); + } + item => panic!("expected XmlPi expression, got {item:?}"), + } + + let select = generic.verified_only_select_with_canonical( + "SELECT xmlserialize(document '' as text no indent)", + "SELECT XMLSERIALIZE(DOCUMENT '' AS TEXT NO INDENT)", + ); + match &select.projection[0] { + SelectItem::UnnamedExpr(Expr::XmlSerialize(XmlSerializeExpr { mode, indent, .. })) => { + assert_eq!(*mode, XmlParseMode::Document); + assert_eq!(*indent, Some(XmlIndentOption::NoIndent)); + } + item => panic!("expected XmlSerialize expression, got {item:?}"), + } + + let select = generic.verified_only_select_with_canonical( + "SELECT xmlroot(xml '', version no value, standalone yes)", + "SELECT XMLROOT(xml '', VERSION NO VALUE, STANDALONE YES)", + ); + match &select.projection[0] { + SelectItem::UnnamedExpr(Expr::XmlRoot(XmlRootExpr { + version, + standalone, + .. + })) => { + assert!(matches!(version, XmlRootVersion::NoValue)); + assert_eq!(*standalone, Some(XmlStandalone::Yes)); + } + item => panic!("expected XmlRoot expression, got {item:?}"), + } +} + +#[test] +fn parse_generic_xml_special_expressions_reject_invalid_forms() { + let generic = TestedDialects::new(vec![Box::new(GenericDialect {})]); + + assert!( + generic.parse_sql_statements("SELECT xmlparse(1)").is_err(), + "xmlparse requires DOCUMENT|CONTENT mode" + ); + assert!( + generic + .parse_sql_statements("SELECT xmlroot(xml '', standalone yes)") + .is_err(), + "xmlroot requires VERSION clause" + ); + assert!( + generic + .parse_sql_statements("SELECT xmlserialize(document '' text)") + .is_err(), + "xmlserialize requires AS " + ); +} + +#[test] +fn parse_non_pg_dialects_keep_xml_names_as_regular_functions() { + let cases = [ + ("SELECT xmlparse(1)", "xmlparse", 1usize), + ("SELECT xmlelement(1, 2)", "xmlelement", 2usize), + ("SELECT xmlroot(1, 2)", "xmlroot", 2usize), + ("SELECT xmlserialize(1, 2)", "xmlserialize", 2usize), + ]; + + let non_pg_dialects = + all_dialects_except(|d| d.is::() || d.is::()).dialects; + + for dialect in non_pg_dialects { + let dialect_name = format!("{dialect:?}"); + for (sql, expected_name, expected_arg_count) in cases { + let statements = Parser::parse_sql(&*dialect, sql) + .unwrap_or_else(|e| panic!("dialect {dialect_name} failed to parse `{sql}`: {e}")); + match statements.as_slice() { + [Statement::Query(query)] => match query.body.as_ref() { + SetExpr::Select(select) => match select.projection.as_slice() { + [SelectItem::UnnamedExpr(Expr::Function(function))] => { + match function.name.0.as_slice() { + [ObjectNamePart::Identifier(ident)] => { + assert!( + ident.value.eq_ignore_ascii_case(expected_name), + "dialect {dialect_name} parsed `{sql}` as function `{}` instead of `{expected_name}`", + ident.value + ); + } + name_parts => { + panic!("dialect {dialect_name} expected simple function name, got {name_parts:?}") + } + } + match &function.args { + FunctionArguments::List(list) => { + assert_eq!( + list.args.len(), + expected_arg_count, + "dialect {dialect_name} parsed `{sql}` with unexpected argument count" + ); + } + args => panic!( + "dialect {dialect_name} expected positional argument list, got {args:?}" + ), + } + } + projection => panic!( + "dialect {dialect_name} expected single function projection for `{sql}`, got {projection:?}" + ), + }, + body => panic!( + "dialect {dialect_name} expected SELECT query body for `{sql}`, got {body:?}" + ), + }, + parsed => panic!( + "dialect {dialect_name} expected a single query statement for `{sql}`, got {parsed:?}" + ), + } + } + } +} + +#[test] +fn parse_non_pg_dialects_reject_xml_special_syntax() { + let xml_special_forms = [ + "SELECT xmlparse(content '')", + "SELECT xmlelement(name foo, 'bar')", + "SELECT xmlserialize(document '' as text)", + ]; + + let non_pg_dialects = + all_dialects_except(|d| d.is::() || d.is::()).dialects; + + for dialect in non_pg_dialects { + let dialect_name = format!("{dialect:?}"); + for sql in xml_special_forms { + assert!( + Parser::parse_sql(&*dialect, sql).is_err(), + "dialect {dialect_name} unexpectedly accepted XML special syntax: `{sql}`" + ); + } + } +} + #[test] fn parse_bignumeric_keyword() { let sql = r#"SELECT BIGNUMERIC '0'"#; diff --git a/tests/sqlparser_postgres.rs b/tests/sqlparser_postgres.rs index 7c19f51e5..fda8174e9 100644 --- a/tests/sqlparser_postgres.rs +++ b/tests/sqlparser_postgres.rs @@ -8602,3 +8602,234 @@ fn parse_pg_analyze() { _ => panic!("Expected Analyze, got: {stmt:?}"), } } + +#[test] +fn parse_postgres_xml_expression_regression_statements() { + let statements = [ + "SELECT xmlconcat(xmlcomment('hello'), xmlelement(NAME qux, 'foo'), xmlcomment('world'))", + "SELECT xmlelement(name element, xmlattributes (1 as one, 'deuce' as two), 'content')", + "SELECT xmlelement(name element, xmlattributes ('unnamed and wrong'))", + "SELECT xmlelement(name element, xmlelement(name nested, 'stuff'))", + "SELECT xmlelement(name employee, xmlforest(name, age, salary as pay)) FROM emp", + "SELECT xmlelement(name duplicate, xmlattributes(1 as a, 2 as b, 3 as a))", + "SELECT xmlelement(name num, 37)", + "SELECT xmlelement(name foo, text 'bar')", + "SELECT xmlelement(name foo, xml 'bar')", + "SELECT xmlelement(name foo, text 'br')", + "SELECT xmlelement(name foo, xml 'br')", + "SELECT xmlelement(name foo, array[1, 2, 3])", + "SELECT xmlelement(name foo, bytea 'bar')", + "SELECT xmlelement(name foo, xmlattributes(true as bar))", + "SELECT xmlelement(name foo, xmlattributes('2009-04-09 00:24:37'::timestamp as bar))", + "SELECT xmlelement(name foo, xmlattributes('infinity'::timestamp as bar))", + r#"SELECT xmlelement(name foo, xmlattributes('<>&"''' as funny, xml 'br' as funnier))"#, + "SELECT xmlparse(content '')", + "SELECT xmlparse(content ' ')", + "SELECT xmlparse(content 'abc')", + "SELECT xmlparse(content 'x')", + "SELECT xmlparse(content '&')", + "SELECT xmlparse(content '&idontexist;')", + "SELECT xmlparse(content '')", + "SELECT xmlparse(content '')", + "SELECT xmlparse(content '&idontexist;')", + "SELECT xmlparse(content '')", + "SELECT xmlparse(document ' ')", + "SELECT xmlparse(document 'abc')", + "SELECT xmlparse(document 'x')", + "SELECT xmlparse(document '&')", + "SELECT xmlparse(document '&idontexist;')", + "SELECT xmlparse(document '')", + "SELECT xmlparse(document '')", + "SELECT xmlparse(document '&idontexist;')", + "SELECT xmlparse(document '')", + "SELECT xmlpi(name foo)", + "SELECT xmlpi(name xml)", + "SELECT xmlpi(name xmlstuff)", + "SELECT xmlpi(name foo, 'bar')", + "SELECT xmlpi(name foo, 'in?>valid')", + "SELECT xmlpi(name foo, null)", + "SELECT xmlpi(name xml, null)", + "SELECT xmlpi(name xmlstuff, null)", + r#"SELECT xmlpi(name "xml-stylesheet", 'href="mystyle.css" type="text/css"')"#, + "SELECT xmlpi(name foo, ' bar')", + "SELECT xmlroot(xml '', version no value, standalone no value)", + "SELECT xmlroot(xml '', version '2.0')", + "SELECT xmlroot(xml '', version no value, standalone yes)", + "SELECT xmlroot(xml '', version no value, standalone yes)", + "SELECT xmlroot(xmlroot(xml '', version '1.0'), version '1.1', standalone no)", + "SELECT xmlroot('', version no value, standalone no)", + "SELECT xmlroot('', version no value, standalone no value)", + "SELECT xmlroot('', version no value)", + "SELECT xmlroot ( xmlelement ( name gazonk, xmlattributes ( 'val' AS name, 1 + 1 AS num ), xmlelement ( NAME qux, 'foo' ) ), version '1.0', standalone yes )", + "SELECT xmlserialize(content data as character varying(20)) FROM xmltest", + "SELECT xmlserialize(content 'good' as char(10))", + "SELECT xmlserialize(document 'bad' as text)", + r#"SELECT xmlserialize(DOCUMENT '42' AS text INDENT)"#, + r#"SELECT xmlserialize(CONTENT '42' AS text INDENT)"#, + r#"SELECT xmlserialize(DOCUMENT '42' AS text NO INDENT)"#, + r#"SELECT xmlserialize(CONTENT '42' AS text NO INDENT)"#, + r#"SELECT xmlserialize(DOCUMENT '7342' AS text INDENT)"#, + r#"SELECT xmlserialize(CONTENT '7342' AS text INDENT)"#, + r#"SELECT xmlserialize(DOCUMENT 'text node73text node42' AS text INDENT)"#, + r#"SELECT xmlserialize(CONTENT 'text node73text node42' AS text INDENT)"#, + r#"SELECT xmlserialize(DOCUMENT '42text node73' AS text INDENT)"#, + r#"SELECT xmlserialize(CONTENT '42text node73' AS text INDENT)"#, + "SELECT xmlserialize(DOCUMENT '' AS text INDENT)", + "SELECT xmlserialize(CONTENT '' AS text INDENT)", + "SELECT xmlserialize(DOCUMENT ' ' AS text INDENT)", + "SELECT xmlserialize(CONTENT ' ' AS text INDENT)", + "SELECT xmlserialize(DOCUMENT NULL AS text INDENT)", + "SELECT xmlserialize(CONTENT NULL AS text INDENT)", + "SELECT xmlserialize(DOCUMENT '73' AS text INDENT)", + "SELECT xmlserialize(CONTENT '73' AS text INDENT)", + "SELECT xmlserialize(DOCUMENT '' AS text INDENT)", + "SELECT xmlserialize(CONTENT '' AS text INDENT)", + "SELECT xmlserialize(DOCUMENT '' AS text INDENT)", + "SELECT xmlserialize(CONTENT '' AS text INDENT)", + r#"SELECT xmlserialize(DOCUMENT '42' AS text) = xmlserialize(DOCUMENT '42' AS text NO INDENT)"#, + r#"SELECT xmlserialize(CONTENT '42' AS text) = xmlserialize(CONTENT '42' AS text NO INDENT)"#, + "SELECT xmlserialize(DOCUMENT ' ' AS text INDENT)", + "SELECT xmlserialize(CONTENT 'text node ' AS text INDENT)", + ]; + + for sql in statements { + pg().parse_sql_statements(sql).unwrap_or_else(|e| { + panic!("failed to parse statement `{sql}` with error `{e}`"); + }); + } +} + +#[test] +fn parse_postgres_xml_expression_ast_shapes() { + let select = pg() + .verified_only_select_with_canonical("SELECT xmlconcat(1, 2)", "SELECT XMLCONCAT(1, 2)"); + match &select.projection[0] { + SelectItem::UnnamedExpr(Expr::XmlConcat(items)) => assert_eq!(items.len(), 2), + item => panic!("expected XmlConcat expression, got {item:?}"), + } + + let select = pg().verified_only_select_with_canonical( + "SELECT xmlelement(name element, xmlattributes (1 as one, 'deuce' as two), 'content')", + "SELECT XMLELEMENT(NAME element, XMLATTRIBUTES(1 AS one, 'deuce' AS two), 'content')", + ); + match &select.projection[0] { + SelectItem::UnnamedExpr(Expr::XmlElement(xml_element)) => { + assert_eq!(xml_element.name.value, "element"); + assert_eq!(xml_element.attributes.as_ref().unwrap().len(), 2); + assert_eq!(xml_element.content.len(), 1); + } + item => panic!("expected XmlElement expression, got {item:?}"), + } + + let select = pg().verified_only_select_with_canonical( + "SELECT xmlelement(name foo, xml 'bar')", + "SELECT XMLELEMENT(NAME foo, xml 'bar')", + ); + match &select.projection[0] { + SelectItem::UnnamedExpr(Expr::XmlElement(XmlElementExpr { content, .. })) => { + assert_eq!(content.len(), 1); + match &content[0] { + Expr::TypedString(TypedString { + data_type: DataType::Custom(type_name, modifiers), + value, + uses_odbc_syntax: false, + }) => { + match type_name.0.as_slice() { + [ObjectNamePart::Identifier(ident)] => { + assert!(ident.value.eq_ignore_ascii_case("xml")); + assert_eq!(ident.quote_style, None); + } + parts => panic!("expected simple xml type name, got {parts:?}"), + } + assert!(modifiers.is_empty()); + assert_eq!(value.value, Value::SingleQuotedString("bar".to_string())); + } + expr => panic!("expected xml typed-string content, got {expr:?}"), + } + } + item => panic!("expected XmlElement expression, got {item:?}"), + } + + let select = pg().verified_only_select_with_canonical( + "SELECT xmlforest(1 as one, 2)", + "SELECT XMLFOREST(1 AS one, 2)", + ); + match &select.projection[0] { + SelectItem::UnnamedExpr(Expr::XmlForest(items)) => { + assert_eq!(items.len(), 2); + assert_eq!(items[0].alias.as_ref().unwrap().value, "one"); + assert!(items[1].alias.is_none()); + } + item => panic!("expected XmlForest expression, got {item:?}"), + } + + let select = pg().verified_only_select_with_canonical( + "SELECT xmlparse(content '')", + "SELECT XMLPARSE(CONTENT '')", + ); + match &select.projection[0] { + SelectItem::UnnamedExpr(Expr::XmlParse(XmlParseExpr { mode, .. })) => { + assert_eq!(*mode, XmlParseMode::Content); + } + item => panic!("expected XmlParse expression, got {item:?}"), + } + + let select = pg().verified_only_select_with_canonical( + "SELECT xmlpi(name foo, 'bar')", + "SELECT XMLPI(NAME foo, 'bar')", + ); + match &select.projection[0] { + SelectItem::UnnamedExpr(Expr::XmlPi(XmlPiExpr { name, content })) => { + assert_eq!(name.value, "foo"); + assert!(content.is_some()); + } + item => panic!("expected XmlPi expression, got {item:?}"), + } + + let select = pg().verified_only_select_with_canonical( + "SELECT xmlroot(xml '', version no value, standalone yes)", + "SELECT XMLROOT(xml '', VERSION NO VALUE, STANDALONE YES)", + ); + match &select.projection[0] { + SelectItem::UnnamedExpr(Expr::XmlRoot(XmlRootExpr { + version, + standalone, + .. + })) => { + assert!(matches!(version, XmlRootVersion::NoValue)); + assert_eq!(*standalone, Some(XmlStandalone::Yes)); + } + item => panic!("expected XmlRoot expression, got {item:?}"), + } + + let select = pg().verified_only_select_with_canonical( + "SELECT xmlserialize(document '' as text no indent)", + "SELECT XMLSERIALIZE(DOCUMENT '' AS TEXT NO INDENT)", + ); + match &select.projection[0] { + SelectItem::UnnamedExpr(Expr::XmlSerialize(XmlSerializeExpr { mode, indent, .. })) => { + assert_eq!(*mode, XmlParseMode::Document); + assert_eq!(*indent, Some(XmlIndentOption::NoIndent)); + } + item => panic!("expected XmlSerialize expression, got {item:?}"), + } +} + +#[test] +fn parse_postgres_xml_expression_negative_syntax() { + assert!( + pg().parse_sql_statements("SELECT xmlparse('x')") + .is_err(), + "xmlparse requires DOCUMENT|CONTENT mode" + ); + assert!( + pg().parse_sql_statements("SELECT xmlroot(xml '', standalone yes)") + .is_err(), + "xmlroot requires VERSION clause" + ); + assert!( + pg().parse_sql_statements("SELECT xmlserialize(document '' text)") + .is_err(), + "xmlserialize requires AS " + ); +}