diff --git a/datafusion/core/src/execution/context/mod.rs b/datafusion/core/src/execution/context/mod.rs index b6c606ff467f9..8d6ef70f1a801 100644 --- a/datafusion/core/src/execution/context/mod.rs +++ b/datafusion/core/src/execution/context/mod.rs @@ -2161,7 +2161,9 @@ mod tests { use crate::test; use crate::test_util::{plan_and_collect, populate_csv_partitions}; use arrow::datatypes::{DataType, TimeUnit}; + use arrow_schema::FieldRef; use datafusion_common::DataFusionError; + use datafusion_common::datatype::DataTypeExt; use std::error::Error; use std::path::PathBuf; @@ -2678,7 +2680,7 @@ mod tests { struct MyTypePlanner {} impl TypePlanner for MyTypePlanner { - fn plan_type(&self, sql_type: &ast::DataType) -> Result> { + fn plan_type_field(&self, sql_type: &ast::DataType) -> Result> { match sql_type { ast::DataType::Datetime(precision) => { let precision = match precision { @@ -2688,7 +2690,9 @@ mod tests { None | Some(9) => TimeUnit::Nanosecond, _ => unreachable!(), }; - Ok(Some(DataType::Timestamp(precision, None))) + Ok(Some( + DataType::Timestamp(precision, None).into_nullable_field_ref(), + )) } _ => Ok(None), } diff --git a/datafusion/expr/src/planner.rs b/datafusion/expr/src/planner.rs index 837a9eefe289f..197ac8c035712 100644 --- a/datafusion/expr/src/planner.rs +++ b/datafusion/expr/src/planner.rs @@ -434,10 +434,28 @@ pub trait TypePlanner: Debug + Send + Sync { /// Plan SQL [`sqlparser::ast::DataType`] to DataFusion [`DataType`] /// /// Returns None if not possible + #[deprecated(since = "53.0.0", note = "Use plan_type_field()")] fn plan_type( &self, _sql_type: &sqlparser::ast::DataType, ) -> Result> { Ok(None) } + + /// Plan SQL [`sqlparser::ast::DataType`] to DataFusion [`FieldRef`] + /// + /// Returns None if not possible. Unlike [`Self::plan_type`], `plan_type_field()` + /// makes it possible to express extension types (e.g., `arrow.uuid`) or otherwise + /// insert metadata into the DataFusion type representation. The default implementation + /// falls back on [`Self::plan_type`] for backward compatibility and wraps the result + /// in a nullable field reference. + fn plan_type_field( + &self, + sql_type: &sqlparser::ast::DataType, + ) -> Result> { + #[expect(deprecated)] + Ok(self + .plan_type(sql_type)? + .map(|data_type| data_type.into_nullable_field_ref())) + } } diff --git a/datafusion/sql/src/expr/mod.rs b/datafusion/sql/src/expr/mod.rs index 9aa5be8131dcb..f33144761a808 100644 --- a/datafusion/sql/src/expr/mod.rs +++ b/datafusion/sql/src/expr/mod.rs @@ -292,15 +292,13 @@ impl SqlToRel<'_, S> { return not_impl_err!("CAST with format is not supported: {format}"); } - Ok(Expr::TryCast(TryCast::new( + Ok(Expr::TryCast(TryCast::new_from_field( Box::new(self.sql_expr_to_logical_expr( *expr, schema, planner_context, )?), - self.convert_data_type_to_field(&data_type)? - .data_type() - .clone(), + self.convert_data_type_to_field(&data_type)?, ))) } @@ -308,11 +306,9 @@ impl SqlToRel<'_, S> { data_type, value, uses_odbc_syntax: _, - }) => Ok(Expr::Cast(Cast::new( + }) => Ok(Expr::Cast(Cast::new_from_field( Box::new(lit(value.into_string().unwrap())), - self.convert_data_type_to_field(&data_type)? - .data_type() - .clone(), + self.convert_data_type_to_field(&data_type)?, ))), SQLExpr::IsNull(expr) => Ok(Expr::IsNull(Box::new( @@ -1035,12 +1031,7 @@ impl SqlToRel<'_, S> { _ => expr, }; - // Currently drops metadata attached to the type - // https://github.com/apache/datafusion/issues/18060 - Ok(Expr::Cast(Cast::new( - Box::new(expr), - dt.data_type().clone(), - ))) + Ok(Expr::Cast(Cast::new_from_field(Box::new(expr), dt))) } /// Extracts the root expression and access chain from a compound expression. diff --git a/datafusion/sql/src/planner.rs b/datafusion/sql/src/planner.rs index 307f28e8ff9ad..b7e270e4f0570 100644 --- a/datafusion/sql/src/planner.rs +++ b/datafusion/sql/src/planner.rs @@ -621,9 +621,9 @@ impl<'a, S: ContextProvider> SqlToRel<'a, S> { ) -> Result { // First check if any of the registered type_planner can handle this type if let Some(type_planner) = self.context_provider.get_type_planner() - && let Some(data_type) = type_planner.plan_type(sql_type)? + && let Some(data_type) = type_planner.plan_type_field(sql_type)? { - return Ok(data_type.into_nullable_field_ref()); + return Ok(data_type); } // If no type_planner can handle this type, use the default conversion diff --git a/datafusion/sql/tests/common/mod.rs b/datafusion/sql/tests/common/mod.rs index 4b8667c3c0cbf..5caade300290f 100644 --- a/datafusion/sql/tests/common/mod.rs +++ b/datafusion/sql/tests/common/mod.rs @@ -23,6 +23,7 @@ use std::{sync::Arc, vec}; use arrow::datatypes::*; use datafusion_common::config::ConfigOptions; +use datafusion_common::datatype::DataTypeExt; use datafusion_common::file_options::file_type::FileType; use datafusion_common::{DFSchema, GetExt, Result, TableReference, plan_err}; use datafusion_expr::planner::{ExprPlanner, PlannerResult, TypePlanner}; @@ -341,8 +342,17 @@ impl TableSource for EmptyTable { pub struct CustomTypePlanner {} impl TypePlanner for CustomTypePlanner { - fn plan_type(&self, sql_type: &sqlparser::ast::DataType) -> Result> { + fn plan_type_field( + &self, + sql_type: &sqlparser::ast::DataType, + ) -> Result> { match sql_type { + sqlparser::ast::DataType::Uuid => Ok(Some(Arc::new( + Field::new("", DataType::FixedSizeBinary(16), true).with_metadata( + [("ARROW:extension:name".to_string(), "arrow.uuid".to_string())] + .into(), + ), + ))), sqlparser::ast::DataType::Datetime(precision) => { let precision = match precision { Some(0) => TimeUnit::Second, @@ -351,7 +361,9 @@ impl TypePlanner for CustomTypePlanner { None | Some(9) => TimeUnit::Nanosecond, _ => unreachable!(), }; - Ok(Some(DataType::Timestamp(precision, None))) + Ok(Some( + DataType::Timestamp(precision, None).into_nullable_field_ref(), + )) } _ => Ok(None), } diff --git a/datafusion/sql/tests/sql_integration.rs b/datafusion/sql/tests/sql_integration.rs index 444bdae73ac26..edf39b2b8d7fa 100644 --- a/datafusion/sql/tests/sql_integration.rs +++ b/datafusion/sql/tests/sql_integration.rs @@ -4727,6 +4727,14 @@ fn test_custom_type_plan() -> Result<()> { "# ); + let plan = plan_sql("SELECT UUID '00010203-0405-0607-0809-000102030506'"); + assert_snapshot!( + plan, + @r#" + Projection: CAST(Utf8("00010203-0405-0607-0809-000102030506") AS FixedSizeBinary(16)<{"ARROW:extension:name": "arrow.uuid"}>) + EmptyRelation: rows=1 + "# + ); Ok(()) } diff --git a/docs/source/library-user-guide/extending-sql.md b/docs/source/library-user-guide/extending-sql.md index 687d884895c8b..eea5b3b1acfc9 100644 --- a/docs/source/library-user-guide/extending-sql.md +++ b/docs/source/library-user-guide/extending-sql.md @@ -158,7 +158,7 @@ when you need to support SQL types that aren't natively recognized. ```rust # use std::sync::Arc; -# use arrow::datatypes::{DataType, TimeUnit}; +# use arrow::datatypes::{DataType, FieldRef, TimeUnit}; # use datafusion::error::Result; # use datafusion::prelude::*; # use datafusion::execution::SessionStateBuilder; @@ -169,7 +169,7 @@ use datafusion_expr::planner::TypePlanner; struct MyTypePlanner; impl TypePlanner for MyTypePlanner { - fn plan_type(&self, sql_type: &ast::DataType) -> Result> { + fn plan_type_field(&self, sql_type: &ast::DataType) -> Result> { match sql_type { // Map DATETIME(precision) to Arrow Timestamp ast::DataType::Datetime(precision) => { @@ -180,7 +180,9 @@ impl TypePlanner for MyTypePlanner { None | Some(9) => TimeUnit::Nanosecond, _ => return Ok(None), // Let default handling take over }; - Ok(Some(DataType::Timestamp(time_unit, None))) + Ok(Some( + DataType::Timestamp(time_unit, None).into_nullable_field_ref() + )) } _ => Ok(None), // Return None for types we don't handle } @@ -202,6 +204,49 @@ async fn main() -> Result<()> { } ``` +#### Example: Supporting the UUID Type + +```rust +# use std::sync::Arc; +# use arrow::datatypes::{DataType, FieldRef, TimeUnit}; +# use datafusion::error::Result; +# use datafusion::prelude::*; +# use datafusion::execution::SessionStateBuilder; +use datafusion_expr::planner::TypePlanner; +# use sqlparser::ast; + +#[derive(Debug)] +struct MyTypePlanner; + +impl TypePlanner for MyTypePlanner { + fn plan_type_field(&self, sql_type: &ast::DataType) -> Result> { + match sql_type { + sqlparser::ast::DataType::Uuid => Ok(Some(Arc::new( + Field::new("", DataType::FixedSizeBinary(16), true).with_metadata( + [("ARROW:extension:name".to_string(), "arrow.uuid".to_string())] + .into(), + ), + ))), + _ => Ok(None), + } + } +} + +#[tokio::main] +async fn main() -> Result<()> { + let state = SessionStateBuilder::new() + .with_default_features() + .with_type_planner(Arc::new(MyTypePlanner)) + .build(); + + let ctx = SessionContext::new_with_state(state); + + // Now UUID type is recognized + ctx.sql("CREATE TABLE idx (uuid UUID)").await?; + Ok(()) +} +``` + For more details, see the [TypePlanner API documentation]. ### RelationPlanner: Custom FROM Clause Elements