From d6a6a440a3afe3cf0bfb8ff1840f0c21b8139ba1 Mon Sep 17 00:00:00 2001 From: Luca Date: Sun, 9 Nov 2025 16:15:16 +0100 Subject: [PATCH 01/10] Added support for CREATE OPERATOR * statements --- src/ast/ddl.rs | 230 ++++++++++++++++++++++++++++ src/ast/mod.rs | 44 ++++-- src/ast/spans.rs | 26 +++- src/keywords.rs | 2 + src/parser/mod.rs | 289 ++++++++++++++++++++++++++++++++++++ tests/sqlparser_postgres.rs | 116 +++++++++++++++ 6 files changed, 692 insertions(+), 15 deletions(-) diff --git a/src/ast/ddl.rs b/src/ast/ddl.rs index fd481213f..f9554dce0 100644 --- a/src/ast/ddl.rs +++ b/src/ast/ddl.rs @@ -3613,3 +3613,233 @@ impl Spanned for DropFunction { Span::empty() } } + +/// CREATE OPERATOR statement +/// See +#[derive(Debug, Clone, PartialEq, PartialOrd, Eq, Ord, Hash)] +#[cfg_attr(feature = "serde", derive(Serialize, Deserialize))] +#[cfg_attr(feature = "visitor", derive(Visit, VisitMut))] +pub struct CreateOperator { + /// Operator name (can be schema-qualified) + pub name: ObjectName, + /// FUNCTION or PROCEDURE parameter (function name) + pub function: ObjectName, + /// Whether PROCEDURE keyword was used (vs FUNCTION) + pub is_procedure: bool, + /// LEFTARG parameter (left operand type) + pub left_arg: Option, + /// RIGHTARG parameter (right operand type) + pub right_arg: Option, + /// COMMUTATOR parameter (commutator operator) + pub commutator: Option, + /// NEGATOR parameter (negator operator) + pub negator: Option, + /// RESTRICT parameter (restriction selectivity function) + pub restrict: Option, + /// JOIN parameter (join selectivity function) + pub join: Option, + /// HASHES flag + pub hashes: bool, + /// MERGES flag + pub merges: bool, +} + +/// CREATE OPERATOR FAMILY statement +/// See +#[derive(Debug, Clone, PartialEq, PartialOrd, Eq, Ord, Hash)] +#[cfg_attr(feature = "serde", derive(Serialize, Deserialize))] +#[cfg_attr(feature = "visitor", derive(Visit, VisitMut))] +pub struct CreateOperatorFamily { + /// Operator family name (can be schema-qualified) + pub name: ObjectName, + /// Index method (btree, hash, gist, gin, etc.) + pub using: Ident, +} + +/// CREATE OPERATOR CLASS statement +/// See +#[derive(Debug, Clone, PartialEq, PartialOrd, Eq, Ord, Hash)] +#[cfg_attr(feature = "serde", derive(Serialize, Deserialize))] +#[cfg_attr(feature = "visitor", derive(Visit, VisitMut))] +pub struct CreateOperatorClass { + /// Operator class name (can be schema-qualified) + pub name: ObjectName, + /// Whether this is the default operator class for the type + pub default: bool, + /// The data type + pub for_type: DataType, + /// Index method (btree, hash, gist, gin, etc.) + pub using: Ident, + /// Optional operator family name + pub family: Option, + /// List of operator class items (operators, functions, storage) + pub items: Vec, +} + +impl fmt::Display for CreateOperator { + fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result { + write!(f, "CREATE OPERATOR {} (", self.name)?; + + let function_keyword = if self.is_procedure { + "PROCEDURE" + } else { + "FUNCTION" + }; + let mut params = vec![format!("{} = {}", function_keyword, self.function)]; + + if let Some(left_arg) = &self.left_arg { + params.push(format!("LEFTARG = {}", left_arg)); + } + if let Some(right_arg) = &self.right_arg { + params.push(format!("RIGHTARG = {}", right_arg)); + } + if let Some(commutator) = &self.commutator { + params.push(format!("COMMUTATOR = {}", commutator)); + } + if let Some(negator) = &self.negator { + params.push(format!("NEGATOR = {}", negator)); + } + if let Some(restrict) = &self.restrict { + params.push(format!("RESTRICT = {}", restrict)); + } + if let Some(join) = &self.join { + params.push(format!("JOIN = {}", join)); + } + if self.hashes { + params.push("HASHES".to_string()); + } + if self.merges { + params.push("MERGES".to_string()); + } + + write!(f, "{}", params.join(", "))?; + write!(f, ")") + } +} + +impl fmt::Display for CreateOperatorFamily { + fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result { + write!( + f, + "CREATE OPERATOR FAMILY {} USING {}", + self.name, self.using + ) + } +} + +impl fmt::Display for CreateOperatorClass { + fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result { + write!(f, "CREATE OPERATOR CLASS {}", self.name)?; + if self.default { + write!(f, " DEFAULT")?; + } + write!(f, " FOR TYPE {} USING {}", self.for_type, self.using)?; + if let Some(family) = &self.family { + write!(f, " FAMILY {}", family)?; + } + write!(f, " AS {}", display_comma_separated(&self.items)) + } +} + +/// Operator argument types for CREATE OPERATOR CLASS +#[derive(Debug, Clone, PartialEq, PartialOrd, Eq, Ord, Hash)] +#[cfg_attr(feature = "serde", derive(Serialize, Deserialize))] +#[cfg_attr(feature = "visitor", derive(Visit, VisitMut))] +pub struct OperatorArgTypes { + pub left: DataType, + pub right: DataType, +} + +impl fmt::Display for OperatorArgTypes { + fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result { + write!(f, "{}, {}", self.left, self.right) + } +} + +/// An item in a CREATE OPERATOR CLASS statement +#[derive(Debug, Clone, PartialEq, PartialOrd, Eq, Ord, Hash)] +#[cfg_attr(feature = "serde", derive(Serialize, Deserialize))] +#[cfg_attr(feature = "visitor", derive(Visit, VisitMut))] +pub enum OperatorClassItem { + /// OPERATOR clause + Operator { + strategy_number: u32, + operator_name: ObjectName, + /// Optional operator argument types + op_types: Option, + /// FOR SEARCH or FOR ORDER BY + purpose: Option, + }, + /// FUNCTION clause + Function { + support_number: u32, + /// Optional function argument types for the operator class + op_types: Option>, + function_name: ObjectName, + /// Function argument types + argument_types: Vec, + }, + /// STORAGE clause + Storage { storage_type: DataType }, +} + +/// Purpose of an operator in an operator class +#[derive(Debug, Clone, PartialEq, PartialOrd, Eq, Ord, Hash)] +#[cfg_attr(feature = "serde", derive(Serialize, Deserialize))] +#[cfg_attr(feature = "visitor", derive(Visit, VisitMut))] +pub enum OperatorPurpose { + ForSearch, + ForOrderBy { sort_family: ObjectName }, +} + +impl fmt::Display for OperatorClassItem { + fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result { + match self { + OperatorClassItem::Operator { + strategy_number, + operator_name, + op_types, + purpose, + } => { + write!(f, "OPERATOR {strategy_number} {operator_name}")?; + if let Some(types) = op_types { + write!(f, " ({types})")?; + } + if let Some(purpose) = purpose { + write!(f, " {purpose}")?; + } + Ok(()) + } + OperatorClassItem::Function { + support_number, + op_types, + function_name, + argument_types, + } => { + write!(f, "FUNCTION {support_number}")?; + if let Some(types) = op_types { + write!(f, " ({})", display_comma_separated(types))?; + } + write!(f, " {function_name}")?; + if !argument_types.is_empty() { + write!(f, "({})", display_comma_separated(argument_types))?; + } + Ok(()) + } + OperatorClassItem::Storage { storage_type } => { + write!(f, "STORAGE {storage_type}") + } + } + } +} + +impl fmt::Display for OperatorPurpose { + fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result { + match self { + OperatorPurpose::ForSearch => write!(f, "FOR SEARCH"), + OperatorPurpose::ForOrderBy { sort_family } => { + write!(f, "FOR ORDER BY {sort_family}") + } + } + } +} diff --git a/src/ast/mod.rs b/src/ast/mod.rs index 176d36545..90acf429b 100644 --- a/src/ast/mod.rs +++ b/src/ast/mod.rs @@ -65,13 +65,14 @@ pub use self::ddl::{ AlterTypeOperation, AlterTypeRename, AlterTypeRenameValue, ClusteredBy, ColumnDef, ColumnOption, ColumnOptionDef, ColumnOptions, ColumnPolicy, ColumnPolicyProperty, ConstraintCharacteristics, CreateConnector, CreateDomain, CreateExtension, CreateFunction, - CreateIndex, CreateTable, CreateTrigger, CreateView, Deduplicate, DeferrableInitial, - DropBehavior, DropExtension, DropFunction, DropTrigger, GeneratedAs, GeneratedExpressionMode, - IdentityParameters, IdentityProperty, IdentityPropertyFormatKind, IdentityPropertyKind, - IdentityPropertyOrder, IndexColumn, IndexOption, IndexType, KeyOrIndexDisplay, Msck, - NullsDistinctOption, Owner, Partition, ProcedureParam, ReferentialAction, RenameTableNameKind, - ReplicaIdentity, TagsColumnOption, TriggerObjectKind, Truncate, - UserDefinedTypeCompositeAttributeDef, UserDefinedTypeRepresentation, ViewColumnDef, + CreateIndex, CreateOperator, CreateOperatorClass, CreateOperatorFamily, CreateTable, + CreateTrigger, CreateView, Deduplicate, DeferrableInitial, DropBehavior, DropExtension, + DropFunction, DropTrigger, GeneratedAs, GeneratedExpressionMode, IdentityParameters, + IdentityProperty, IdentityPropertyFormatKind, IdentityPropertyKind, IdentityPropertyOrder, + IndexColumn, IndexOption, IndexType, KeyOrIndexDisplay, Msck, NullsDistinctOption, + OperatorArgTypes, OperatorClassItem, OperatorPurpose, Owner, Partition, ProcedureParam, + ReferentialAction, RenameTableNameKind, ReplicaIdentity, TagsColumnOption, TriggerObjectKind, + Truncate, UserDefinedTypeCompositeAttributeDef, UserDefinedTypeRepresentation, ViewColumnDef, }; pub use self::dml::{Delete, Insert, Update}; pub use self::operator::{BinaryOperator, UnaryOperator}; @@ -2787,10 +2788,11 @@ impl fmt::Display for Declare { } /// Sql options of a `CREATE TABLE` statement. -#[derive(Debug, Clone, PartialEq, PartialOrd, Eq, Ord, Hash)] +#[derive(Debug, Default, Clone, PartialEq, PartialOrd, Eq, Ord, Hash)] #[cfg_attr(feature = "serde", derive(Serialize, Deserialize))] #[cfg_attr(feature = "visitor", derive(Visit, VisitMut))] pub enum CreateTableOptions { + #[default] None, /// Options specified using the `WITH` keyword. /// e.g. `WITH (description = "123")` @@ -2819,12 +2821,6 @@ pub enum CreateTableOptions { TableProperties(Vec), } -impl Default for CreateTableOptions { - fn default() -> Self { - Self::None - } -} - impl fmt::Display for CreateTableOptions { fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result { match self { @@ -3340,6 +3336,21 @@ pub enum Statement { /// See [Hive](https://cwiki.apache.org/confluence/pages/viewpage.action?pageId=27362034#LanguageManualDDL-CreateDataConnectorCreateConnector) CreateConnector(CreateConnector), /// ```sql + /// CREATE OPERATOR + /// ``` + /// See [PostgreSQL](https://www.postgresql.org/docs/current/sql-createoperator.html) + CreateOperator(CreateOperator), + /// ```sql + /// CREATE OPERATOR FAMILY + /// ``` + /// See [PostgreSQL](https://www.postgresql.org/docs/current/sql-createopfamily.html) + CreateOperatorFamily(CreateOperatorFamily), + /// ```sql + /// CREATE OPERATOR CLASS + /// ``` + /// See [PostgreSQL](https://www.postgresql.org/docs/current/sql-createopclass.html) + CreateOperatorClass(CreateOperatorClass), + /// ```sql /// ALTER TABLE /// ``` AlterTable(AlterTable), @@ -4886,6 +4897,11 @@ impl fmt::Display for Statement { Ok(()) } Statement::CreateConnector(create_connector) => create_connector.fmt(f), + Statement::CreateOperator(create_operator) => create_operator.fmt(f), + Statement::CreateOperatorFamily(create_operator_family) => { + create_operator_family.fmt(f) + } + Statement::CreateOperatorClass(create_operator_class) => create_operator_class.fmt(f), Statement::AlterTable(alter_table) => write!(f, "{alter_table}"), Statement::AlterIndex { name, operation } => { write!(f, "ALTER INDEX {name} {operation}") diff --git a/src/ast/spans.rs b/src/ast/spans.rs index 7d2a00095..5f95e7513 100644 --- a/src/ast/spans.rs +++ b/src/ast/spans.rs @@ -17,7 +17,8 @@ use crate::ast::{ ddl::AlterSchema, query::SelectItemQualifiedWildcardKind, AlterSchemaOperation, AlterTable, - ColumnOptions, CreateView, ExportData, Owner, TypedString, + ColumnOptions, CreateOperator, CreateOperatorClass, CreateOperatorFamily, CreateView, + ExportData, Owner, TypedString, }; use core::iter; @@ -367,6 +368,11 @@ impl Spanned for Statement { Statement::CreateSecret { .. } => Span::empty(), Statement::CreateServer { .. } => Span::empty(), Statement::CreateConnector { .. } => Span::empty(), + Statement::CreateOperator(create_operator) => create_operator.span(), + Statement::CreateOperatorFamily(create_operator_family) => { + create_operator_family.span() + } + Statement::CreateOperatorClass(create_operator_class) => create_operator_class.span(), Statement::AlterTable(alter_table) => alter_table.span(), Statement::AlterIndex { name, operation } => name.span().union(&operation.span()), Statement::AlterView { @@ -2536,3 +2542,21 @@ ALTER TABLE users assert_eq!(stmt_span.end, (4, 11).into()); } } + +impl Spanned for CreateOperator { + fn span(&self) -> Span { + Span::empty() + } +} + +impl Spanned for CreateOperatorFamily { + fn span(&self) -> Span { + Span::empty() + } +} + +impl Spanned for CreateOperatorClass { + fn span(&self) -> Span { + Span::empty() + } +} diff --git a/src/keywords.rs b/src/keywords.rs index 319c57827..d69848944 100644 --- a/src/keywords.rs +++ b/src/keywords.rs @@ -194,6 +194,7 @@ define_keywords!( CHECK, CHECKSUM, CIRCLE, + CLASS, CLEANPATH, CLEAR, CLOB, @@ -381,6 +382,7 @@ define_keywords!( FAIL, FAILOVER, FALSE, + FAMILY, FETCH, FIELDS, FILE, diff --git a/src/parser/mod.rs b/src/parser/mod.rs index 9a01e510b..6b86b9b91 100644 --- a/src/parser/mod.rs +++ b/src/parser/mod.rs @@ -4791,6 +4791,15 @@ impl<'a> Parser<'a> { self.parse_create_procedure(or_alter) } else if self.parse_keyword(Keyword::CONNECTOR) { self.parse_create_connector() + } else if self.parse_keyword(Keyword::OPERATOR) { + // Check if this is CREATE OPERATOR FAMILY or CREATE OPERATOR CLASS + if self.parse_keyword(Keyword::FAMILY) { + self.parse_create_operator_family() + } else if self.parse_keyword(Keyword::CLASS) { + self.parse_create_operator_class() + } else { + self.parse_create_operator() + } } else if self.parse_keyword(Keyword::SERVER) { self.parse_pg_create_server() } else { @@ -6421,6 +6430,286 @@ impl<'a> Parser<'a> { })) } + /// Helper function to parse an operator name (which can contain special characters) + /// Operator names can be schema-qualified (e.g., schema.operator) + fn parse_operator_name(&mut self) -> Result { + let mut name_parts = vec![]; + loop { + let token = self.next_token(); + let part = ObjectNamePart::Identifier(Ident::new(token.to_string())); + name_parts.push(part); + + if !self.consume_token(&Token::Period) { + break; + } + } + Ok(ObjectName(name_parts)) + } + + /// Parse a CREATE OPERATOR statement + /// + /// [PostgreSQL Documentation](https://www.postgresql.org/docs/current/sql-createoperator.html) + pub fn parse_create_operator(&mut self) -> Result { + // Parse the operator name (can be schema-qualified) + // Operators can contain special characters like +, -, *, /, <, >, =, ~, !, @, #, %, ^, &, |, `, ? + // See https://www.postgresql.org/docs/current/sql-createoperator.html + let name = self.parse_operator_name()?; + + // Expect opening parenthesis + self.expect_token(&Token::LParen)?; + + let mut function: Option = None; + let mut is_procedure = false; + let mut left_arg: Option = None; + let mut right_arg: Option = None; + let mut commutator: Option = None; + let mut negator: Option = None; + let mut restrict: Option = None; + let mut join: Option = None; + let mut hashes = false; + let mut merges = false; + + loop { + // Parse parameter name + let param_name = self.parse_identifier()?; + let param_name_upper = param_name.value.to_uppercase(); + + // Check if this is a flag (HASHES or MERGES) - no '=' expected + match param_name_upper.as_str() { + "HASHES" => { + hashes = true; + } + "MERGES" => { + merges = true; + } + "FUNCTION" | "PROCEDURE" => { + self.expect_token(&Token::Eq)?; + let func_name = self.parse_object_name(false)?; + function = Some(func_name); + is_procedure = param_name_upper == "PROCEDURE"; + } + "LEFTARG" => { + self.expect_token(&Token::Eq)?; + let data_type = self.parse_data_type()?; + left_arg = Some(data_type); + } + "RIGHTARG" => { + self.expect_token(&Token::Eq)?; + let data_type = self.parse_data_type()?; + right_arg = Some(data_type); + } + "COMMUTATOR" => { + self.expect_token(&Token::Eq)?; + let op_name = if self.parse_keyword(Keyword::OPERATOR) { + self.expect_token(&Token::LParen)?; + let op = self.parse_operator_name()?; + self.expect_token(&Token::RParen)?; + op + } else { + self.parse_operator_name()? + }; + commutator = Some(op_name); + } + "NEGATOR" => { + self.expect_token(&Token::Eq)?; + let op_name = if self.parse_keyword(Keyword::OPERATOR) { + self.expect_token(&Token::LParen)?; + let op = self.parse_operator_name()?; + self.expect_token(&Token::RParen)?; + op + } else { + self.parse_operator_name()? + }; + negator = Some(op_name); + } + "RESTRICT" => { + self.expect_token(&Token::Eq)?; + let func_name = self.parse_object_name(false)?; + restrict = Some(func_name); + } + "JOIN" => { + self.expect_token(&Token::Eq)?; + let func_name = self.parse_object_name(false)?; + join = Some(func_name); + } + _ => { + return Err(ParserError::ParserError(format!( + "Unknown CREATE OPERATOR parameter: {}", + param_name_upper + ))) + } + } + + // Check for comma or closing parenthesis + if !self.consume_token(&Token::Comma) { + break; + } + } + + // Expect closing parenthesis + self.expect_token(&Token::RParen)?; + + // FUNCTION is required + let function = function.ok_or_else(|| { + ParserError::ParserError("CREATE OPERATOR requires FUNCTION parameter".to_string()) + })?; + + Ok(Statement::CreateOperator(CreateOperator { + name, + function, + is_procedure, + left_arg, + right_arg, + commutator, + negator, + restrict, + join, + hashes, + merges, + })) + } + + /// Parse a CREATE OPERATOR FAMILY statement + /// + /// [PostgreSQL Documentation](https://www.postgresql.org/docs/current/sql-createopfamily.html) + pub fn parse_create_operator_family(&mut self) -> Result { + let name = self.parse_object_name(false)?; + self.expect_keyword(Keyword::USING)?; + let using = self.parse_identifier()?; + + Ok(Statement::CreateOperatorFamily(CreateOperatorFamily { + name, + using, + })) + } + + /// Parse a CREATE OPERATOR CLASS statement + /// + /// [PostgreSQL Documentation](https://www.postgresql.org/docs/current/sql-createopclass.html) + pub fn parse_create_operator_class(&mut self) -> Result { + let name = self.parse_object_name(false)?; + let default = self.parse_keyword(Keyword::DEFAULT); + self.expect_keywords(&[Keyword::FOR, Keyword::TYPE])?; + let for_type = self.parse_data_type()?; + self.expect_keyword(Keyword::USING)?; + let using = self.parse_identifier()?; + + let family = if self.parse_keyword(Keyword::FAMILY) { + Some(self.parse_object_name(false)?) + } else { + None + }; + + self.expect_keyword(Keyword::AS)?; + + let mut items = vec![]; + loop { + if self.parse_keyword(Keyword::OPERATOR) { + let strategy_number = self.parse_literal_uint()? as u32; + let operator_name = self.parse_operator_name()?; + + // Optional operator argument types + let op_types = if self.consume_token(&Token::LParen) { + let left = self.parse_data_type()?; + self.expect_token(&Token::Comma)?; + let right = self.parse_data_type()?; + self.expect_token(&Token::RParen)?; + Some(OperatorArgTypes { left, right }) + } else { + None + }; + + // Optional purpose + let purpose = if self.parse_keyword(Keyword::FOR) { + if self.parse_keyword(Keyword::SEARCH) { + Some(OperatorPurpose::ForSearch) + } else if self.parse_keywords(&[Keyword::ORDER, Keyword::BY]) { + let sort_family = self.parse_object_name(false)?; + Some(OperatorPurpose::ForOrderBy { sort_family }) + } else { + return self.expected("SEARCH or ORDER BY after FOR", self.peek_token()); + } + } else { + None + }; + + items.push(OperatorClassItem::Operator { + strategy_number, + operator_name, + op_types, + purpose, + }); + } else if self.parse_keyword(Keyword::FUNCTION) { + let support_number = self.parse_literal_uint()? as u32; + + // Optional operator types + let op_types = + if self.consume_token(&Token::LParen) && self.peek_token() != Token::RParen { + let mut types = vec![]; + loop { + types.push(self.parse_data_type()?); + if !self.consume_token(&Token::Comma) { + break; + } + } + self.expect_token(&Token::RParen)?; + Some(types) + } else if self.consume_token(&Token::LParen) { + self.expect_token(&Token::RParen)?; + Some(vec![]) + } else { + None + }; + + let function_name = self.parse_object_name(false)?; + + // Function argument types + let argument_types = if self.consume_token(&Token::LParen) { + let mut types = vec![]; + loop { + if self.peek_token() == Token::RParen { + break; + } + types.push(self.parse_data_type()?); + if !self.consume_token(&Token::Comma) { + break; + } + } + self.expect_token(&Token::RParen)?; + types + } else { + vec![] + }; + + items.push(OperatorClassItem::Function { + support_number, + op_types, + function_name, + argument_types, + }); + } else if self.parse_keyword(Keyword::STORAGE) { + let storage_type = self.parse_data_type()?; + items.push(OperatorClassItem::Storage { storage_type }); + } else { + break; + } + + // Check for comma separator + if !self.consume_token(&Token::Comma) { + break; + } + } + + Ok(Statement::CreateOperatorClass(CreateOperatorClass { + name, + default, + for_type, + using, + family, + items, + })) + } + pub fn parse_drop(&mut self) -> Result { // MySQL dialect supports `TEMPORARY` let temporary = dialect_of!(self is MySqlDialect | GenericDialect | DuckDbDialect) diff --git a/tests/sqlparser_postgres.rs b/tests/sqlparser_postgres.rs index 9ba0fb978..74fc3f631 100644 --- a/tests/sqlparser_postgres.rs +++ b/tests/sqlparser_postgres.rs @@ -6650,3 +6650,119 @@ fn parse_foreign_key_match_with_actions() { pg_and_generic().verified_stmt(sql); } + +#[test] +fn parse_create_operator() { + // Test a basic CREATE OPERATOR statement + let sql = "CREATE OPERATOR < (PROCEDURE = \"cas_lt\", LEFTARG = CAS, RIGHTARG = CAS, COMMUTATOR = >, NEGATOR = >=, RESTRICT = scalarltsel, JOIN = scalarltjoinsel)"; + let statement = pg().verified_stmt(sql); + + // Verify the parsed statement + match statement { + Statement::CreateOperator(CreateOperator { + name, + function, + is_procedure, + left_arg, + right_arg, + commutator, + negator, + restrict, + join, + hashes, + merges, + }) => { + assert_eq!(name.to_string(), "<"); + assert_eq!(function.to_string(), "\"cas_lt\""); + assert!(is_procedure); + assert_eq!(left_arg.as_ref().unwrap().to_string(), "CAS"); + assert_eq!(right_arg.as_ref().unwrap().to_string(), "CAS"); + assert_eq!(commutator.as_ref().unwrap().to_string(), ">"); + assert_eq!(negator.as_ref().unwrap().to_string(), ">="); + assert_eq!(restrict.as_ref().unwrap().to_string(), "scalarltsel"); + assert_eq!(join.as_ref().unwrap().to_string(), "scalarltjoinsel"); + assert!(!hashes); + assert!(!merges); + } + _ => panic!("Expected Statement::CreateOperator, got {:?}", statement), + } +} + +#[test] +fn parse_create_operator_family() { + let sql = "CREATE OPERATOR FAMILY CAS_btree_ops USING btree"; + let statement = pg().verified_stmt(sql); + + match statement { + Statement::CreateOperatorFamily(CreateOperatorFamily { name, using }) => { + assert_eq!(name.to_string(), "CAS_btree_ops"); + assert_eq!(using.to_string(), "btree"); + } + _ => panic!( + "Expected Statement::CreateOperatorFamily, got {:?}", + statement + ), + } +} + +#[test] +fn parse_create_operator_class() { + let sql = "CREATE OPERATOR CLASS CAS_btree_ops DEFAULT FOR TYPE CAS USING btree FAMILY CAS_btree_ops AS OPERATOR 1 <, OPERATOR 2 <=, OPERATOR 3 =, OPERATOR 4 >=, OPERATOR 5 >, FUNCTION 1 cas_cmp(CAS, CAS)"; + let statement = pg().verified_stmt(sql); + + match statement { + Statement::CreateOperatorClass(CreateOperatorClass { + name, + default, + for_type, + using, + family, + items, + }) => { + assert_eq!(name.to_string(), "CAS_btree_ops"); + assert!(default); + assert_eq!(for_type.to_string(), "CAS"); + assert_eq!(using.to_string(), "btree"); + assert_eq!(family.unwrap().to_string(), "CAS_btree_ops"); + assert_eq!(items.len(), 6); + + // Check first operator + match &items[0] { + OperatorClassItem::Operator { + strategy_number, + operator_name, + op_types, + purpose, + } => { + assert_eq!(*strategy_number, 1); + assert_eq!(operator_name.to_string(), "<"); + assert!(op_types.is_none()); + assert!(purpose.is_none()); + } + _ => panic!("Expected Operator"), + } + + // Check function + match &items[5] { + OperatorClassItem::Function { + support_number, + op_types, + function_name, + argument_types, + } => { + assert_eq!(*support_number, 1); + assert!(op_types.is_none()); + assert_eq!(function_name.to_string(), "cas_cmp"); + assert_eq!(argument_types.len(), 2); + assert_eq!(argument_types[0].to_string(), "CAS"); + assert_eq!(argument_types[1].to_string(), "CAS"); + } + _ => panic!("Expected Function"), + } + } + _ => panic!( + "Expected Statement::CreateOperatorClass, got {:?}", + statement + ), + } +} From 535870b14e97fc2bd239c4d5a851da5bb88ff988 Mon Sep 17 00:00:00 2001 From: Luca Date: Sun, 9 Nov 2025 16:26:17 +0100 Subject: [PATCH 02/10] Refactored parsing to using keywords instead of strings --- src/keywords.rs | 8 ++++++++ src/parser/mod.rs | 45 +++++++++++++++++++++++++-------------------- 2 files changed, 33 insertions(+), 20 deletions(-) diff --git a/src/keywords.rs b/src/keywords.rs index d69848944..0426f9f25 100644 --- a/src/keywords.rs +++ b/src/keywords.rs @@ -214,6 +214,7 @@ define_keywords!( COMMENT, COMMIT, COMMITTED, + COMMUTATOR, COMPATIBLE, COMPRESSION, COMPUPDATE, @@ -444,6 +445,7 @@ define_keywords!( GROUPS, GZIP, HASH, + HASHES, HAVING, HEADER, HEAP, @@ -536,7 +538,10 @@ define_keywords!( LATERAL, LEAD, LEADING, + LEAKPROOF, + LEAST, LEFT, + LEFTARG, LEVEL, LIKE, LIKE_REGEX, @@ -590,6 +595,7 @@ define_keywords!( MEDIUMTEXT, MEMBER, MERGE, + MERGES, MESSAGE, METADATA, METHOD, @@ -627,6 +633,7 @@ define_keywords!( NATURAL, NCHAR, NCLOB, + NEGATOR, NEST, NESTED, NETWORK, @@ -834,6 +841,7 @@ define_keywords!( RETURNS, REVOKE, RIGHT, + RIGHTARG, RLIKE, RM, ROLE, diff --git a/src/parser/mod.rs b/src/parser/mod.rs index 6b86b9b91..2831d1461 100644 --- a/src/parser/mod.rs +++ b/src/parser/mod.rs @@ -6470,35 +6470,45 @@ impl<'a> Parser<'a> { let mut merges = false; loop { - // Parse parameter name - let param_name = self.parse_identifier()?; - let param_name_upper = param_name.value.to_uppercase(); + // Parse parameter name as keyword + let keyword = self.expect_one_of_keywords(&[ + Keyword::FUNCTION, + Keyword::PROCEDURE, + Keyword::LEFTARG, + Keyword::RIGHTARG, + Keyword::COMMUTATOR, + Keyword::NEGATOR, + Keyword::RESTRICT, + Keyword::JOIN, + Keyword::HASHES, + Keyword::MERGES, + ])?; // Check if this is a flag (HASHES or MERGES) - no '=' expected - match param_name_upper.as_str() { - "HASHES" => { + match keyword { + Keyword::HASHES => { hashes = true; } - "MERGES" => { + Keyword::MERGES => { merges = true; } - "FUNCTION" | "PROCEDURE" => { + Keyword::FUNCTION | Keyword::PROCEDURE => { self.expect_token(&Token::Eq)?; let func_name = self.parse_object_name(false)?; function = Some(func_name); - is_procedure = param_name_upper == "PROCEDURE"; + is_procedure = keyword == Keyword::PROCEDURE; } - "LEFTARG" => { + Keyword::LEFTARG => { self.expect_token(&Token::Eq)?; let data_type = self.parse_data_type()?; left_arg = Some(data_type); } - "RIGHTARG" => { + Keyword::RIGHTARG => { self.expect_token(&Token::Eq)?; let data_type = self.parse_data_type()?; right_arg = Some(data_type); } - "COMMUTATOR" => { + Keyword::COMMUTATOR => { self.expect_token(&Token::Eq)?; let op_name = if self.parse_keyword(Keyword::OPERATOR) { self.expect_token(&Token::LParen)?; @@ -6510,7 +6520,7 @@ impl<'a> Parser<'a> { }; commutator = Some(op_name); } - "NEGATOR" => { + Keyword::NEGATOR => { self.expect_token(&Token::Eq)?; let op_name = if self.parse_keyword(Keyword::OPERATOR) { self.expect_token(&Token::LParen)?; @@ -6522,22 +6532,17 @@ impl<'a> Parser<'a> { }; negator = Some(op_name); } - "RESTRICT" => { + Keyword::RESTRICT => { self.expect_token(&Token::Eq)?; let func_name = self.parse_object_name(false)?; restrict = Some(func_name); } - "JOIN" => { + Keyword::JOIN => { self.expect_token(&Token::Eq)?; let func_name = self.parse_object_name(false)?; join = Some(func_name); } - _ => { - return Err(ParserError::ParserError(format!( - "Unknown CREATE OPERATOR parameter: {}", - param_name_upper - ))) - } + _ => unreachable!("unexpected keyword in CREATE OPERATOR"), } // Check for comma or closing parenthesis From c317547f7a8e224168ef73cf2667b21dc36ed685 Mon Sep 17 00:00:00 2001 From: Luca Date: Sun, 9 Nov 2025 16:29:15 +0100 Subject: [PATCH 03/10] Fixed no-std error --- src/ast/ddl.rs | 8 +++++++- 1 file changed, 7 insertions(+), 1 deletion(-) diff --git a/src/ast/ddl.rs b/src/ast/ddl.rs index f9554dce0..bab375022 100644 --- a/src/ast/ddl.rs +++ b/src/ast/ddl.rs @@ -19,7 +19,13 @@ //! (commonly referred to as Data Definition Language, or DDL) #[cfg(not(feature = "std"))] -use alloc::{boxed::Box, format, string::String, vec, vec::Vec}; +use alloc::{ + boxed::Box, + format, + string::{String, ToString}, + vec, + vec::Vec, +}; use core::fmt::{self, Display, Write}; #[cfg(feature = "serde")] From 994344dcf6f2e43807882a0ce577380bdf5d2f21 Mon Sep 17 00:00:00 2001 From: Luca Date: Sun, 9 Nov 2025 16:31:06 +0100 Subject: [PATCH 04/10] Fixed clippy code smell --- src/ast/spans.rs | 36 ++++++++++++++++++------------------ 1 file changed, 18 insertions(+), 18 deletions(-) diff --git a/src/ast/spans.rs b/src/ast/spans.rs index 5f95e7513..749aeac4c 100644 --- a/src/ast/spans.rs +++ b/src/ast/spans.rs @@ -2351,6 +2351,24 @@ impl Spanned for AlterTable { } } +impl Spanned for CreateOperator { + fn span(&self) -> Span { + Span::empty() + } +} + +impl Spanned for CreateOperatorFamily { + fn span(&self) -> Span { + Span::empty() + } +} + +impl Spanned for CreateOperatorClass { + fn span(&self) -> Span { + Span::empty() + } +} + #[cfg(test)] pub mod tests { use crate::dialect::{Dialect, GenericDialect, SnowflakeDialect}; @@ -2542,21 +2560,3 @@ ALTER TABLE users assert_eq!(stmt_span.end, (4, 11).into()); } } - -impl Spanned for CreateOperator { - fn span(&self) -> Span { - Span::empty() - } -} - -impl Spanned for CreateOperatorFamily { - fn span(&self) -> Span { - Span::empty() - } -} - -impl Spanned for CreateOperatorClass { - fn span(&self) -> Span { - Span::empty() - } -} From 6bf05f021a6d584cf1d3f566782522b37d5a8f61 Mon Sep 17 00:00:00 2001 From: Luca Cappelletti Date: Thu, 13 Nov 2025 15:27:46 +0100 Subject: [PATCH 05/10] Update src/parser/mod.rs Co-authored-by: Ifeanyi Ubah --- src/parser/mod.rs | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/parser/mod.rs b/src/parser/mod.rs index 166eb4dd3..d078cecdc 100644 --- a/src/parser/mod.rs +++ b/src/parser/mod.rs @@ -6447,7 +6447,7 @@ impl<'a> Parser<'a> { Ok(ObjectName(name_parts)) } - /// Parse a CREATE OPERATOR statement + /// Parse a `CREATE OPERATOR` statement /// /// [PostgreSQL Documentation](https://www.postgresql.org/docs/current/sql-createoperator.html) pub fn parse_create_operator(&mut self) -> Result { From 3d677e13f35eca911019621170ed7d6ae7e4bee9 Mon Sep 17 00:00:00 2001 From: Luca Cappelletti Date: Thu, 13 Nov 2025 15:28:10 +0100 Subject: [PATCH 06/10] Update src/parser/mod.rs Co-authored-by: Ifeanyi Ubah --- src/parser/mod.rs | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/parser/mod.rs b/src/parser/mod.rs index d078cecdc..23ae260a3 100644 --- a/src/parser/mod.rs +++ b/src/parser/mod.rs @@ -6575,7 +6575,7 @@ impl<'a> Parser<'a> { })) } - /// Parse a CREATE OPERATOR FAMILY statement + /// Parse a `CREATE OPERATOR FAMILY` statement /// /// [PostgreSQL Documentation](https://www.postgresql.org/docs/current/sql-createopfamily.html) pub fn parse_create_operator_family(&mut self) -> Result { From 4d4573900e33cbe6b9a916e7ba0a6f804dff2aec Mon Sep 17 00:00:00 2001 From: Luca Date: Thu, 13 Nov 2025 20:48:54 +0100 Subject: [PATCH 07/10] Tried to simplify and extend test coverage --- src/parser/mod.rs | 93 ++++---- tests/sqlparser_postgres.rs | 436 +++++++++++++++++++++++++----------- 2 files changed, 352 insertions(+), 177 deletions(-) diff --git a/src/parser/mod.rs b/src/parser/mod.rs index 23ae260a3..d0414bab6 100644 --- a/src/parser/mod.rs +++ b/src/parser/mod.rs @@ -6431,32 +6431,33 @@ impl<'a> Parser<'a> { })) } - /// Helper function to parse an operator name (which can contain special characters) - /// Operator names can be schema-qualified (e.g., schema.operator) - fn parse_operator_name(&mut self) -> Result { - let mut name_parts = vec![]; + /// Parse an operator name, which can contain special characters like +, -, <, >, = + /// that are tokenized as operator tokens rather than identifiers. + /// This is used for PostgreSQL CREATE OPERATOR statements. + /// + /// Examples: `+`, `myschema.+`, `pg_catalog.<=` + pub fn parse_operator_name(&mut self) -> Result { + let mut parts = vec![]; loop { - let token = self.next_token(); - let part = ObjectNamePart::Identifier(Ident::new(token.to_string())); - name_parts.push(part); - + parts.push(ObjectNamePart::Identifier(Ident::new(self.next_token().to_string()))); if !self.consume_token(&Token::Period) { break; } } - Ok(ObjectName(name_parts)) + Ok(ObjectName(parts)) } /// Parse a `CREATE OPERATOR` statement /// /// [PostgreSQL Documentation](https://www.postgresql.org/docs/current/sql-createoperator.html) pub fn parse_create_operator(&mut self) -> Result { - // Parse the operator name (can be schema-qualified) - // Operators can contain special characters like +, -, *, /, <, >, =, ~, !, @, #, %, ^, &, |, `, ? - // See https://www.postgresql.org/docs/current/sql-createoperator.html - let name = self.parse_operator_name()?; + macro_rules! dup_err { + ($name:expr) => { + ParserError::ParserError(format!("Duplicate {} clause in CREATE OPERATOR", $name)) + }; + } - // Expect opening parenthesis + let name = self.parse_operator_name()?; self.expect_token(&Token::LParen)?; let mut function: Option = None; @@ -6471,82 +6472,74 @@ impl<'a> Parser<'a> { let mut merges = false; loop { - // Parse parameter name as keyword let keyword = self.expect_one_of_keywords(&[ - Keyword::FUNCTION, - Keyword::PROCEDURE, - Keyword::LEFTARG, - Keyword::RIGHTARG, - Keyword::COMMUTATOR, - Keyword::NEGATOR, - Keyword::RESTRICT, - Keyword::JOIN, - Keyword::HASHES, - Keyword::MERGES, + Keyword::FUNCTION, Keyword::PROCEDURE, Keyword::LEFTARG, Keyword::RIGHTARG, + Keyword::COMMUTATOR, Keyword::NEGATOR, Keyword::RESTRICT, Keyword::JOIN, + Keyword::HASHES, Keyword::MERGES, ])?; - // Check if this is a flag (HASHES or MERGES) - no '=' expected match keyword { Keyword::HASHES => { + if hashes { return Err(dup_err!("HASHES")); } hashes = true; } Keyword::MERGES => { + if merges { return Err(dup_err!("MERGES")); } merges = true; } Keyword::FUNCTION | Keyword::PROCEDURE => { + if function.is_some() { return Err(dup_err!("FUNCTION/PROCEDURE")); } self.expect_token(&Token::Eq)?; - let func_name = self.parse_object_name(false)?; - function = Some(func_name); + function = Some(self.parse_object_name(false)?); is_procedure = keyword == Keyword::PROCEDURE; } Keyword::LEFTARG => { + if left_arg.is_some() { return Err(dup_err!("LEFTARG")); } self.expect_token(&Token::Eq)?; - let data_type = self.parse_data_type()?; - left_arg = Some(data_type); + left_arg = Some(self.parse_data_type()?); } Keyword::RIGHTARG => { + if right_arg.is_some() { return Err(dup_err!("RIGHTARG")); } self.expect_token(&Token::Eq)?; - let data_type = self.parse_data_type()?; - right_arg = Some(data_type); + right_arg = Some(self.parse_data_type()?); } Keyword::COMMUTATOR => { + if commutator.is_some() { return Err(dup_err!("COMMUTATOR")); } self.expect_token(&Token::Eq)?; - let op_name = if self.parse_keyword(Keyword::OPERATOR) { + if self.parse_keyword(Keyword::OPERATOR) { self.expect_token(&Token::LParen)?; - let op = self.parse_operator_name()?; + commutator = Some(self.parse_operator_name()?); self.expect_token(&Token::RParen)?; - op } else { - self.parse_operator_name()? - }; - commutator = Some(op_name); + commutator = Some(self.parse_operator_name()?); + } } Keyword::NEGATOR => { + if negator.is_some() { return Err(dup_err!("NEGATOR")); } self.expect_token(&Token::Eq)?; - let op_name = if self.parse_keyword(Keyword::OPERATOR) { + if self.parse_keyword(Keyword::OPERATOR) { self.expect_token(&Token::LParen)?; - let op = self.parse_operator_name()?; + negator = Some(self.parse_operator_name()?); self.expect_token(&Token::RParen)?; - op } else { - self.parse_operator_name()? - }; - negator = Some(op_name); + negator = Some(self.parse_operator_name()?); + } } Keyword::RESTRICT => { + if restrict.is_some() { return Err(dup_err!("RESTRICT")); } self.expect_token(&Token::Eq)?; - let func_name = self.parse_object_name(false)?; - restrict = Some(func_name); + restrict = Some(self.parse_object_name(false)?); } Keyword::JOIN => { + if join.is_some() { return Err(dup_err!("JOIN")); } self.expect_token(&Token::Eq)?; - let func_name = self.parse_object_name(false)?; - join = Some(func_name); + join = Some(self.parse_object_name(false)?); } - _ => unreachable!("unexpected keyword in CREATE OPERATOR"), + _ => return Err(ParserError::ParserError(format!( + "Unexpected keyword {:?} in CREATE OPERATOR", keyword + ))), } - // Check for comma or closing parenthesis if !self.consume_token(&Token::Comma) { break; } diff --git a/tests/sqlparser_postgres.rs b/tests/sqlparser_postgres.rs index 06b47b229..6b03cbb91 100644 --- a/tests/sqlparser_postgres.rs +++ b/tests/sqlparser_postgres.rs @@ -6548,7 +6548,9 @@ fn parse_create_server() { #[test] fn parse_alter_schema() { - match pg_and_generic().verified_stmt("ALTER SCHEMA foo RENAME TO bar") { + // Test RENAME operation + let stmt = pg_and_generic().verified_stmt("ALTER SCHEMA foo RENAME TO bar"); + match stmt { Statement::AlterSchema(AlterSchema { operations, .. }) => { assert_eq!( operations, @@ -6560,52 +6562,26 @@ fn parse_alter_schema() { _ => unreachable!(), } - match pg_and_generic().verified_stmt("ALTER SCHEMA foo OWNER TO bar") { - Statement::AlterSchema(AlterSchema { operations, .. }) => { - assert_eq!( - operations, - vec![AlterSchemaOperation::OwnerTo { - owner: Owner::Ident("bar".into()) - }] - ); - } - _ => unreachable!(), - } - - match pg_and_generic().verified_stmt("ALTER SCHEMA foo OWNER TO CURRENT_ROLE") { - Statement::AlterSchema(AlterSchema { operations, .. }) => { - assert_eq!( - operations, - vec![AlterSchemaOperation::OwnerTo { - owner: Owner::CurrentRole - }] - ); - } - _ => unreachable!(), - } - - match pg_and_generic().verified_stmt("ALTER SCHEMA foo OWNER TO CURRENT_USER") { - Statement::AlterSchema(AlterSchema { operations, .. }) => { - assert_eq!( - operations, - vec![AlterSchemaOperation::OwnerTo { - owner: Owner::CurrentUser - }] - ); - } - _ => unreachable!(), - } - - match pg_and_generic().verified_stmt("ALTER SCHEMA foo OWNER TO SESSION_USER") { - Statement::AlterSchema(AlterSchema { operations, .. }) => { - assert_eq!( - operations, - vec![AlterSchemaOperation::OwnerTo { - owner: Owner::SessionUser - }] - ); + // Test OWNER TO operations with different owner types + for (owner_clause, expected_owner) in &[ + ("bar", Owner::Ident("bar".into())), + ("CURRENT_ROLE", Owner::CurrentRole), + ("CURRENT_USER", Owner::CurrentUser), + ("SESSION_USER", Owner::SessionUser), + ] { + let sql = format!("ALTER SCHEMA foo OWNER TO {}", owner_clause); + let stmt = pg_and_generic().verified_stmt(&sql); + match stmt { + Statement::AlterSchema(AlterSchema { operations, .. }) => { + assert_eq!( + operations, + vec![AlterSchemaOperation::OwnerTo { + owner: expected_owner.clone() + }] + ); + } + _ => unreachable!(), } - _ => unreachable!(), } } @@ -6659,116 +6635,322 @@ fn parse_foreign_key_match_with_actions() { #[test] fn parse_create_operator() { - // Test a basic CREATE OPERATOR statement - let sql = "CREATE OPERATOR < (PROCEDURE = \"cas_lt\", LEFTARG = CAS, RIGHTARG = CAS, COMMUTATOR = >, NEGATOR = >=, RESTRICT = scalarltsel, JOIN = scalarltjoinsel)"; - let statement = pg().verified_stmt(sql); - - // Verify the parsed statement - match statement { + let sql = "CREATE OPERATOR myschema.@@ (PROCEDURE = myschema.my_proc, LEFTARG = TIMESTAMP WITH TIME ZONE, RIGHTARG = VARCHAR(255), COMMUTATOR = schema.>, NEGATOR = schema.<=, RESTRICT = myschema.sel_func, JOIN = myschema.join_func, HASHES, MERGES)"; + assert_eq!( + pg().verified_stmt(sql), Statement::CreateOperator(CreateOperator { - name, - function, - is_procedure, - left_arg, - right_arg, - commutator, - negator, - restrict, - join, - hashes, - merges, - }) => { - assert_eq!(name.to_string(), "<"); - assert_eq!(function.to_string(), "\"cas_lt\""); - assert!(is_procedure); - assert_eq!(left_arg.as_ref().unwrap().to_string(), "CAS"); - assert_eq!(right_arg.as_ref().unwrap().to_string(), "CAS"); - assert_eq!(commutator.as_ref().unwrap().to_string(), ">"); - assert_eq!(negator.as_ref().unwrap().to_string(), ">="); - assert_eq!(restrict.as_ref().unwrap().to_string(), "scalarltsel"); - assert_eq!(join.as_ref().unwrap().to_string(), "scalarltjoinsel"); - assert!(!hashes); - assert!(!merges); + name: ObjectName::from(vec![Ident::new("myschema"), Ident::new("@@")]), + function: ObjectName::from(vec![Ident::new("myschema"), Ident::new("my_proc")]), + is_procedure: true, + left_arg: Some(DataType::Timestamp(None, TimezoneInfo::WithTimeZone)), + right_arg: Some(DataType::Varchar(Some(CharacterLength::IntegerLength { + length: 255, + unit: None + }))), + commutator: Some(ObjectName::from(vec![ + Ident::new("schema"), + Ident::new(">") + ])), + negator: Some(ObjectName::from(vec![ + Ident::new("schema"), + Ident::new("<=") + ])), + restrict: Some(ObjectName::from(vec![ + Ident::new("myschema"), + Ident::new("sel_func") + ])), + join: Some(ObjectName::from(vec![ + Ident::new("myschema"), + Ident::new("join_func") + ])), + hashes: true, + merges: true, + }) + ); + + for op_symbol in &[ + "-", "*", "/", "<", ">", "=", "<=", ">=", "<>", "~", "!", "@", "#", "%", "^", "&", "|", + "<<", ">>", "&&", + ] { + assert_eq!( + pg().verified_stmt(&format!("CREATE OPERATOR {op_symbol} (FUNCTION = f)")), + Statement::CreateOperator(CreateOperator { + name: ObjectName::from(vec![Ident::new(*op_symbol)]), + function: ObjectName::from(vec![Ident::new("f")]), + is_procedure: false, + left_arg: None, + right_arg: None, + commutator: None, + negator: None, + restrict: None, + join: None, + hashes: false, + merges: false, + }) + ); + } + + pg().one_statement_parses_to( + "CREATE OPERATOR != (FUNCTION = func)", + "CREATE OPERATOR <> (FUNCTION = func)", + ); + + for (name, expected_name) in [ + ( + "s1.+", + ObjectName::from(vec![Ident::new("s1"), Ident::new("+")]), + ), + ( + "s2.-", + ObjectName::from(vec![Ident::new("s2"), Ident::new("-")]), + ), + ( + "s1.s3.*", + ObjectName::from(vec![Ident::new("s1"), Ident::new("s3"), Ident::new("*")]), + ), + ] { + match pg().verified_stmt(&format!("CREATE OPERATOR {name} (FUNCTION = f)")) { + Statement::CreateOperator(CreateOperator { name, hashes: false, merges: false, .. }) => { + assert_eq!(name, expected_name); + } + _ => unreachable!(), } - _ => panic!("Expected Statement::CreateOperator, got {:?}", statement), } + + pg().one_statement_parses_to( + "CREATE OPERATOR + (FUNCTION = f, COMMUTATOR = OPERATOR(>), NEGATOR = OPERATOR(>=))", + "CREATE OPERATOR + (FUNCTION = f, COMMUTATOR = >, NEGATOR = >=)", + ); + + // Test all duplicate clause errors + for field in &[ + "FUNCTION = f2", + "PROCEDURE = p", + "LEFTARG = INT4, LEFTARG = INT4", + "RIGHTARG = INT4, RIGHTARG = INT4", + "COMMUTATOR = -, COMMUTATOR = *", + "NEGATOR = -, NEGATOR = *", + "RESTRICT = f1, RESTRICT = f2", + "JOIN = f1, JOIN = f2", + "HASHES, HASHES", + "MERGES, MERGES", + ] { + assert!(pg() + .parse_sql_statements(&format!("CREATE OPERATOR + (FUNCTION = f, {field})")) + .is_err()); + } + + // Test missing FUNCTION/PROCEDURE error + assert!(pg() + .parse_sql_statements("CREATE OPERATOR + (LEFTARG = INT4)") + .is_err()); } #[test] fn parse_create_operator_family() { - let sql = "CREATE OPERATOR FAMILY CAS_btree_ops USING btree"; - let statement = pg().verified_stmt(sql); - - match statement { - Statement::CreateOperatorFamily(CreateOperatorFamily { name, using }) => { - assert_eq!(name.to_string(), "CAS_btree_ops"); - assert_eq!(using.to_string(), "btree"); - } - _ => panic!( - "Expected Statement::CreateOperatorFamily, got {:?}", - statement - ), + for index_method in &["btree", "hash", "gist", "gin", "spgist", "brin"] { + assert_eq!( + pg().verified_stmt(&format!("CREATE OPERATOR FAMILY my_family USING {index_method}")), + Statement::CreateOperatorFamily(CreateOperatorFamily { + name: ObjectName::from(vec![Ident::new("my_family")]), + using: Ident::new(*index_method), + }) + ); + assert_eq!( + pg().verified_stmt(&format!("CREATE OPERATOR FAMILY myschema.test_family USING {index_method}")), + Statement::CreateOperatorFamily(CreateOperatorFamily { + name: ObjectName::from(vec![Ident::new("myschema"), Ident::new("test_family")]), + using: Ident::new(*index_method), + }) + ); } } #[test] fn parse_create_operator_class() { - let sql = "CREATE OPERATOR CLASS CAS_btree_ops DEFAULT FOR TYPE CAS USING btree FAMILY CAS_btree_ops AS OPERATOR 1 <, OPERATOR 2 <=, OPERATOR 3 =, OPERATOR 4 >=, OPERATOR 5 >, FUNCTION 1 cas_cmp(CAS, CAS)"; - let statement = pg().verified_stmt(sql); + // Test all combinations of DEFAULT flag and FAMILY clause with different name qualifications + for (is_default, default_clause) in [(false, ""), (true, "DEFAULT ")] { + for (has_family, family_clause) in [(false, ""), (true, " FAMILY int4_family")] { + for (class_name, expected_name) in [ + ("int4_ops", ObjectName::from(vec![Ident::new("int4_ops")])), + ("myschema.test_ops", ObjectName::from(vec![Ident::new("myschema"), Ident::new("test_ops")])), + ] { + let sql = format!( + "CREATE OPERATOR CLASS {class_name} {default_clause}FOR TYPE INT4 USING btree{family_clause} AS OPERATOR 1 <" + ); + match pg().verified_stmt(&sql) { + Statement::CreateOperatorClass(CreateOperatorClass { + name, + default, + ref for_type, + ref using, + ref family, + ref items, + }) => { + assert_eq!(name, expected_name); + assert_eq!(default, is_default); + assert_eq!(for_type, &DataType::Int4(None)); + assert_eq!(using, &Ident::new("btree")); + assert_eq!( + family, + &if has_family { Some(ObjectName::from(vec![Ident::new("int4_family")])) } else { None } + ); + assert_eq!(items.len(), 1); + } + _ => panic!("Expected CreateOperatorClass statement"), + } + } + } + } - match statement { + // Test comprehensive operator class with all fields + match pg().verified_stmt("CREATE OPERATOR CLASS CAS_btree_ops DEFAULT FOR TYPE CAS USING btree FAMILY CAS_btree_ops AS OPERATOR 1 <, OPERATOR 2 <=, OPERATOR 3 =, OPERATOR 4 >=, OPERATOR 5 >, FUNCTION 1 cas_cmp(CAS, CAS)") { Statement::CreateOperatorClass(CreateOperatorClass { name, - default, - for_type, - using, - family, - items, + default: true, + ref for_type, + ref using, + ref family, + ref items, }) => { - assert_eq!(name.to_string(), "CAS_btree_ops"); - assert!(default); - assert_eq!(for_type.to_string(), "CAS"); - assert_eq!(using.to_string(), "btree"); - assert_eq!(family.unwrap().to_string(), "CAS_btree_ops"); + assert_eq!(name, ObjectName::from(vec![Ident::new("CAS_btree_ops")])); + assert_eq!(for_type, &DataType::Custom(ObjectName::from(vec![Ident::new("CAS")]), vec![])); + assert_eq!(using, &Ident::new("btree")); + assert_eq!(family, &Some(ObjectName::from(vec![Ident::new("CAS_btree_ops")]))); assert_eq!(items.len(), 6); + } + _ => panic!("Expected CreateOperatorClass statement"), + } + + // Test operator with argument types + match pg().verified_stmt("CREATE OPERATOR CLASS test_ops FOR TYPE INT4 USING gist AS OPERATOR 1 < (INT4, INT4)") { + Statement::CreateOperatorClass(CreateOperatorClass { + ref items, + .. + }) => { + assert_eq!(items.len(), 1); + match &items[0] { + OperatorClassItem::Operator { + strategy_number: 1, + ref operator_name, + op_types: Some(_), + purpose: None, + } => { + assert_eq!(operator_name, &ObjectName::from(vec![Ident::new("<")])); + } + _ => panic!("Expected Operator item with arg types"), + } + } + _ => panic!("Expected CreateOperatorClass statement"), + } + + // Test operator FOR SEARCH + match pg().verified_stmt("CREATE OPERATOR CLASS test_ops FOR TYPE INT4 USING gist AS OPERATOR 1 < FOR SEARCH") { + Statement::CreateOperatorClass(CreateOperatorClass { + ref items, + .. + }) => { + assert_eq!(items.len(), 1); + match &items[0] { + OperatorClassItem::Operator { + strategy_number: 1, + ref operator_name, + op_types: None, + purpose: Some(OperatorPurpose::ForSearch), + } => { + assert_eq!(operator_name, &ObjectName::from(vec![Ident::new("<")])); + } + _ => panic!("Expected Operator item FOR SEARCH"), + } + } + _ => panic!("Expected CreateOperatorClass statement"), + } - // Check first operator + // Test operator FOR ORDER BY + match pg().verified_stmt("CREATE OPERATOR CLASS test_ops FOR TYPE INT4 USING gist AS OPERATOR 2 <<-> FOR ORDER BY float_ops") { + Statement::CreateOperatorClass(CreateOperatorClass { + ref items, + .. + }) => { + assert_eq!(items.len(), 1); match &items[0] { OperatorClassItem::Operator { - strategy_number, - operator_name, - op_types, - purpose, + strategy_number: 2, + ref operator_name, + op_types: None, + purpose: Some(OperatorPurpose::ForOrderBy { ref sort_family }), } => { - assert_eq!(*strategy_number, 1); - assert_eq!(operator_name.to_string(), "<"); - assert!(op_types.is_none()); - assert!(purpose.is_none()); + assert_eq!(operator_name, &ObjectName::from(vec![Ident::new("<<->")])); + assert_eq!(sort_family, &ObjectName::from(vec![Ident::new("float_ops")])); } - _ => panic!("Expected Operator"), + _ => panic!("Expected Operator item FOR ORDER BY"), } + } + _ => panic!("Expected CreateOperatorClass statement"), + } - // Check function - match &items[5] { + // Test function with operator class arg types + match pg().verified_stmt("CREATE OPERATOR CLASS test_ops FOR TYPE INT4 USING btree AS FUNCTION 1 (INT4, INT4) btcmp(INT4, INT4)") { + Statement::CreateOperatorClass(CreateOperatorClass { + ref items, + .. + }) => { + assert_eq!(items.len(), 1); + match &items[0] { OperatorClassItem::Function { - support_number, - op_types, - function_name, - argument_types, + support_number: 1, + op_types: Some(_), + ref function_name, + ref argument_types, } => { - assert_eq!(*support_number, 1); - assert!(op_types.is_none()); - assert_eq!(function_name.to_string(), "cas_cmp"); + assert_eq!(function_name, &ObjectName::from(vec![Ident::new("btcmp")])); assert_eq!(argument_types.len(), 2); - assert_eq!(argument_types[0].to_string(), "CAS"); - assert_eq!(argument_types[1].to_string(), "CAS"); } - _ => panic!("Expected Function"), + _ => panic!("Expected Function item with op_types"), } } - _ => panic!( - "Expected Statement::CreateOperatorClass, got {:?}", - statement - ), + _ => panic!("Expected CreateOperatorClass statement"), + } + + // Test multiple items including STORAGE + match pg().verified_stmt("CREATE OPERATOR CLASS gist_ops FOR TYPE geometry USING gist AS OPERATOR 1 <<, FUNCTION 1 gist_consistent(internal, geometry, INT4), STORAGE box") { + Statement::CreateOperatorClass(CreateOperatorClass { + ref items, + .. + }) => { + assert_eq!(items.len(), 3); + // Check operator item + match &items[0] { + OperatorClassItem::Operator { + strategy_number: 1, + ref operator_name, + .. + } => { + assert_eq!(operator_name, &ObjectName::from(vec![Ident::new("<<")])); + } + _ => panic!("Expected Operator item"), + } + // Check function item + match &items[1] { + OperatorClassItem::Function { + support_number: 1, + ref function_name, + ref argument_types, + .. + } => { + assert_eq!(function_name, &ObjectName::from(vec![Ident::new("gist_consistent")])); + assert_eq!(argument_types.len(), 3); + } + _ => panic!("Expected Function item"), + } + // Check storage item + match &items[2] { + OperatorClassItem::Storage { ref storage_type } => { + assert_eq!(storage_type, &DataType::Custom(ObjectName::from(vec![Ident::new("box")]), vec![])); + } + _ => panic!("Expected Storage item"), + } + } + _ => panic!("Expected CreateOperatorClass statement"), } } From 4a413197fca8d4305b1b7e8787989aa2cb0f2ec6 Mon Sep 17 00:00:00 2001 From: Luca Date: Thu, 13 Nov 2025 20:49:17 +0100 Subject: [PATCH 08/10] Formatted code --- src/parser/mod.rs | 64 +++++++++++++++++++++++++++---------- tests/sqlparser_postgres.rs | 44 ++++++++++++++++--------- 2 files changed, 76 insertions(+), 32 deletions(-) diff --git a/src/parser/mod.rs b/src/parser/mod.rs index d0414bab6..cb4e7ff70 100644 --- a/src/parser/mod.rs +++ b/src/parser/mod.rs @@ -6434,12 +6434,14 @@ impl<'a> Parser<'a> { /// Parse an operator name, which can contain special characters like +, -, <, >, = /// that are tokenized as operator tokens rather than identifiers. /// This is used for PostgreSQL CREATE OPERATOR statements. - /// + /// /// Examples: `+`, `myschema.+`, `pg_catalog.<=` pub fn parse_operator_name(&mut self) -> Result { let mut parts = vec![]; loop { - parts.push(ObjectNamePart::Identifier(Ident::new(self.next_token().to_string()))); + parts.push(ObjectNamePart::Identifier(Ident::new( + self.next_token().to_string(), + ))); if !self.consume_token(&Token::Period) { break; } @@ -6473,38 +6475,57 @@ impl<'a> Parser<'a> { loop { let keyword = self.expect_one_of_keywords(&[ - Keyword::FUNCTION, Keyword::PROCEDURE, Keyword::LEFTARG, Keyword::RIGHTARG, - Keyword::COMMUTATOR, Keyword::NEGATOR, Keyword::RESTRICT, Keyword::JOIN, - Keyword::HASHES, Keyword::MERGES, + Keyword::FUNCTION, + Keyword::PROCEDURE, + Keyword::LEFTARG, + Keyword::RIGHTARG, + Keyword::COMMUTATOR, + Keyword::NEGATOR, + Keyword::RESTRICT, + Keyword::JOIN, + Keyword::HASHES, + Keyword::MERGES, ])?; match keyword { Keyword::HASHES => { - if hashes { return Err(dup_err!("HASHES")); } + if hashes { + return Err(dup_err!("HASHES")); + } hashes = true; } Keyword::MERGES => { - if merges { return Err(dup_err!("MERGES")); } + if merges { + return Err(dup_err!("MERGES")); + } merges = true; } Keyword::FUNCTION | Keyword::PROCEDURE => { - if function.is_some() { return Err(dup_err!("FUNCTION/PROCEDURE")); } + if function.is_some() { + return Err(dup_err!("FUNCTION/PROCEDURE")); + } self.expect_token(&Token::Eq)?; function = Some(self.parse_object_name(false)?); is_procedure = keyword == Keyword::PROCEDURE; } Keyword::LEFTARG => { - if left_arg.is_some() { return Err(dup_err!("LEFTARG")); } + if left_arg.is_some() { + return Err(dup_err!("LEFTARG")); + } self.expect_token(&Token::Eq)?; left_arg = Some(self.parse_data_type()?); } Keyword::RIGHTARG => { - if right_arg.is_some() { return Err(dup_err!("RIGHTARG")); } + if right_arg.is_some() { + return Err(dup_err!("RIGHTARG")); + } self.expect_token(&Token::Eq)?; right_arg = Some(self.parse_data_type()?); } Keyword::COMMUTATOR => { - if commutator.is_some() { return Err(dup_err!("COMMUTATOR")); } + if commutator.is_some() { + return Err(dup_err!("COMMUTATOR")); + } self.expect_token(&Token::Eq)?; if self.parse_keyword(Keyword::OPERATOR) { self.expect_token(&Token::LParen)?; @@ -6515,7 +6536,9 @@ impl<'a> Parser<'a> { } } Keyword::NEGATOR => { - if negator.is_some() { return Err(dup_err!("NEGATOR")); } + if negator.is_some() { + return Err(dup_err!("NEGATOR")); + } self.expect_token(&Token::Eq)?; if self.parse_keyword(Keyword::OPERATOR) { self.expect_token(&Token::LParen)?; @@ -6526,18 +6549,25 @@ impl<'a> Parser<'a> { } } Keyword::RESTRICT => { - if restrict.is_some() { return Err(dup_err!("RESTRICT")); } + if restrict.is_some() { + return Err(dup_err!("RESTRICT")); + } self.expect_token(&Token::Eq)?; restrict = Some(self.parse_object_name(false)?); } Keyword::JOIN => { - if join.is_some() { return Err(dup_err!("JOIN")); } + if join.is_some() { + return Err(dup_err!("JOIN")); + } self.expect_token(&Token::Eq)?; join = Some(self.parse_object_name(false)?); } - _ => return Err(ParserError::ParserError(format!( - "Unexpected keyword {:?} in CREATE OPERATOR", keyword - ))), + _ => { + return Err(ParserError::ParserError(format!( + "Unexpected keyword {:?} in CREATE OPERATOR", + keyword + ))) + } } if !self.consume_token(&Token::Comma) { diff --git a/tests/sqlparser_postgres.rs b/tests/sqlparser_postgres.rs index 6b03cbb91..326638bfb 100644 --- a/tests/sqlparser_postgres.rs +++ b/tests/sqlparser_postgres.rs @@ -6710,7 +6710,12 @@ fn parse_create_operator() { ), ] { match pg().verified_stmt(&format!("CREATE OPERATOR {name} (FUNCTION = f)")) { - Statement::CreateOperator(CreateOperator { name, hashes: false, merges: false, .. }) => { + Statement::CreateOperator(CreateOperator { + name, + hashes: false, + merges: false, + .. + }) => { assert_eq!(name, expected_name); } _ => unreachable!(), @@ -6750,14 +6755,18 @@ fn parse_create_operator() { fn parse_create_operator_family() { for index_method in &["btree", "hash", "gist", "gin", "spgist", "brin"] { assert_eq!( - pg().verified_stmt(&format!("CREATE OPERATOR FAMILY my_family USING {index_method}")), + pg().verified_stmt(&format!( + "CREATE OPERATOR FAMILY my_family USING {index_method}" + )), Statement::CreateOperatorFamily(CreateOperatorFamily { name: ObjectName::from(vec![Ident::new("my_family")]), using: Ident::new(*index_method), }) ); assert_eq!( - pg().verified_stmt(&format!("CREATE OPERATOR FAMILY myschema.test_family USING {index_method}")), + pg().verified_stmt(&format!( + "CREATE OPERATOR FAMILY myschema.test_family USING {index_method}" + )), Statement::CreateOperatorFamily(CreateOperatorFamily { name: ObjectName::from(vec![Ident::new("myschema"), Ident::new("test_family")]), using: Ident::new(*index_method), @@ -6773,7 +6782,10 @@ fn parse_create_operator_class() { for (has_family, family_clause) in [(false, ""), (true, " FAMILY int4_family")] { for (class_name, expected_name) in [ ("int4_ops", ObjectName::from(vec![Ident::new("int4_ops")])), - ("myschema.test_ops", ObjectName::from(vec![Ident::new("myschema"), Ident::new("test_ops")])), + ( + "myschema.test_ops", + ObjectName::from(vec![Ident::new("myschema"), Ident::new("test_ops")]), + ), ] { let sql = format!( "CREATE OPERATOR CLASS {class_name} {default_clause}FOR TYPE INT4 USING btree{family_clause} AS OPERATOR 1 <" @@ -6793,7 +6805,11 @@ fn parse_create_operator_class() { assert_eq!(using, &Ident::new("btree")); assert_eq!( family, - &if has_family { Some(ObjectName::from(vec![Ident::new("int4_family")])) } else { None } + &if has_family { + Some(ObjectName::from(vec![Ident::new("int4_family")])) + } else { + None + } ); assert_eq!(items.len(), 1); } @@ -6823,11 +6839,10 @@ fn parse_create_operator_class() { } // Test operator with argument types - match pg().verified_stmt("CREATE OPERATOR CLASS test_ops FOR TYPE INT4 USING gist AS OPERATOR 1 < (INT4, INT4)") { - Statement::CreateOperatorClass(CreateOperatorClass { - ref items, - .. - }) => { + match pg().verified_stmt( + "CREATE OPERATOR CLASS test_ops FOR TYPE INT4 USING gist AS OPERATOR 1 < (INT4, INT4)", + ) { + Statement::CreateOperatorClass(CreateOperatorClass { ref items, .. }) => { assert_eq!(items.len(), 1); match &items[0] { OperatorClassItem::Operator { @@ -6845,11 +6860,10 @@ fn parse_create_operator_class() { } // Test operator FOR SEARCH - match pg().verified_stmt("CREATE OPERATOR CLASS test_ops FOR TYPE INT4 USING gist AS OPERATOR 1 < FOR SEARCH") { - Statement::CreateOperatorClass(CreateOperatorClass { - ref items, - .. - }) => { + match pg().verified_stmt( + "CREATE OPERATOR CLASS test_ops FOR TYPE INT4 USING gist AS OPERATOR 1 < FOR SEARCH", + ) { + Statement::CreateOperatorClass(CreateOperatorClass { ref items, .. }) => { assert_eq!(items.len(), 1); match &items[0] { OperatorClassItem::Operator { From 48f1cd08a32603f649a5850c7b40975c6ceee9ef Mon Sep 17 00:00:00 2001 From: Luca Date: Thu, 13 Nov 2025 21:20:56 +0100 Subject: [PATCH 09/10] Tested more corner cases --- tests/sqlparser_postgres.rs | 13 ++++++++++++- 1 file changed, 12 insertions(+), 1 deletion(-) diff --git a/tests/sqlparser_postgres.rs b/tests/sqlparser_postgres.rs index 326638bfb..87b9b7b10 100644 --- a/tests/sqlparser_postgres.rs +++ b/tests/sqlparser_postgres.rs @@ -6749,6 +6749,13 @@ fn parse_create_operator() { assert!(pg() .parse_sql_statements("CREATE OPERATOR + (LEFTARG = INT4)") .is_err()); + + // Test empty parameter list error + assert!(pg().parse_sql_statements("CREATE OPERATOR + ()").is_err()); + + // Test nested empty parentheses error + assert!(pg().parse_sql_statements("CREATE OPERATOR > (()").is_err()); + assert!(pg().parse_sql_statements("CREATE OPERATOR > ())").is_err()); } #[test] @@ -6848,7 +6855,11 @@ fn parse_create_operator_class() { OperatorClassItem::Operator { strategy_number: 1, ref operator_name, - op_types: Some(_), + op_types: + Some(OperatorArgTypes { + left: DataType::Int4(None), + right: DataType::Int4(None), + }), purpose: None, } => { assert_eq!(operator_name, &ObjectName::from(vec![Ident::new("<")])); From 1812929a4cd2cbdb15572017511fab2b74a6cfae Mon Sep 17 00:00:00 2001 From: Luca Date: Thu, 13 Nov 2025 21:25:45 +0100 Subject: [PATCH 10/10] Ensured coverage of more corner cases --- tests/sqlparser_postgres.rs | 36 ++++++++++++++++++++++++++++++++++++ 1 file changed, 36 insertions(+) diff --git a/tests/sqlparser_postgres.rs b/tests/sqlparser_postgres.rs index 87b9b7b10..74ec1af69 100644 --- a/tests/sqlparser_postgres.rs +++ b/tests/sqlparser_postgres.rs @@ -6937,6 +6937,35 @@ fn parse_create_operator_class() { _ => panic!("Expected CreateOperatorClass statement"), } + // Test function with no arguments (empty parentheses normalizes to no parentheses) + pg().one_statement_parses_to( + "CREATE OPERATOR CLASS test_ops FOR TYPE INT4 USING btree AS FUNCTION 1 my_func()", + "CREATE OPERATOR CLASS test_ops FOR TYPE INT4 USING btree AS FUNCTION 1 my_func", + ); + match pg().verified_stmt( + "CREATE OPERATOR CLASS test_ops FOR TYPE INT4 USING btree AS FUNCTION 1 my_func", + ) { + Statement::CreateOperatorClass(CreateOperatorClass { ref items, .. }) => { + assert_eq!(items.len(), 1); + match &items[0] { + OperatorClassItem::Function { + support_number: 1, + op_types: None, + ref function_name, + ref argument_types, + } => { + assert_eq!( + function_name, + &ObjectName::from(vec![Ident::new("my_func")]) + ); + assert_eq!(argument_types.len(), 0); + } + _ => panic!("Expected Function item without op_types and no arguments"), + } + } + _ => panic!("Expected CreateOperatorClass statement"), + } + // Test multiple items including STORAGE match pg().verified_stmt("CREATE OPERATOR CLASS gist_ops FOR TYPE geometry USING gist AS OPERATOR 1 <<, FUNCTION 1 gist_consistent(internal, geometry, INT4), STORAGE box") { Statement::CreateOperatorClass(CreateOperatorClass { @@ -6978,4 +7007,11 @@ fn parse_create_operator_class() { } _ => panic!("Expected CreateOperatorClass statement"), } + + // Test nested empty parentheses error in function arguments + assert!(pg() + .parse_sql_statements( + "CREATE OPERATOR CLASS test_ops FOR TYPE INT4 USING btree AS FUNCTION 1 cas_cmp(()" + ) + .is_err()); }