Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
8 changes: 6 additions & 2 deletions datafusion/core/src/execution/context/mod.rs
Original file line number Diff line number Diff line change
Expand Up @@ -2161,7 +2161,9 @@ mod tests {
use crate::test;
use crate::test_util::{plan_and_collect, populate_csv_partitions};
use arrow::datatypes::{DataType, TimeUnit};
use arrow_schema::FieldRef;
use datafusion_common::DataFusionError;
use datafusion_common::datatype::DataTypeExt;
use std::error::Error;
use std::path::PathBuf;

Expand Down Expand Up @@ -2678,7 +2680,7 @@ mod tests {
struct MyTypePlanner {}

impl TypePlanner for MyTypePlanner {
fn plan_type(&self, sql_type: &ast::DataType) -> Result<Option<DataType>> {
fn plan_type_field(&self, sql_type: &ast::DataType) -> Result<Option<FieldRef>> {
match sql_type {
ast::DataType::Datetime(precision) => {
let precision = match precision {
Expand All @@ -2688,7 +2690,9 @@ mod tests {
None | Some(9) => TimeUnit::Nanosecond,
_ => unreachable!(),
};
Ok(Some(DataType::Timestamp(precision, None)))
Ok(Some(
DataType::Timestamp(precision, None).into_nullable_field_ref(),
))
}
_ => Ok(None),
}
Expand Down
18 changes: 18 additions & 0 deletions datafusion/expr/src/planner.rs
Original file line number Diff line number Diff line change
Expand Up @@ -434,10 +434,28 @@ pub trait TypePlanner: Debug + Send + Sync {
/// Plan SQL [`sqlparser::ast::DataType`] to DataFusion [`DataType`]
///
/// Returns None if not possible
#[deprecated(since = "53.0.0", note = "Use plan_type_field()")]
fn plan_type(
&self,
_sql_type: &sqlparser::ast::DataType,
) -> Result<Option<DataType>> {
Ok(None)
}

/// Plan SQL [`sqlparser::ast::DataType`] to DataFusion [`FieldRef`]
///
/// Returns None if not possible. Unlike [`Self::plan_type`], `plan_type_field()`
/// makes it possible to express extension types (e.g., `arrow.uuid`) or otherwise
/// insert metadata into the DataFusion type representation. The default implementation
/// falls back on [`Self::plan_type`] for backward compatibility and wraps the result
/// in a nullable field reference.
fn plan_type_field(
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Maybe we can help document how this is different from plan_type (or should we be deprecating plan_type in favor of plan_type_field?

&self,
sql_type: &sqlparser::ast::DataType,
) -> Result<Option<FieldRef>> {
#[expect(deprecated)]
Ok(self
.plan_type(sql_type)?
.map(|data_type| data_type.into_nullable_field_ref()))
}
}
19 changes: 5 additions & 14 deletions datafusion/sql/src/expr/mod.rs
Original file line number Diff line number Diff line change
Expand Up @@ -292,27 +292,23 @@ impl<S: ContextProvider> SqlToRel<'_, S> {
return not_impl_err!("CAST with format is not supported: {format}");
}

Ok(Expr::TryCast(TryCast::new(
Ok(Expr::TryCast(TryCast::new_from_field(
Box::new(self.sql_expr_to_logical_expr(
*expr,
schema,
planner_context,
)?),
self.convert_data_type_to_field(&data_type)?
.data_type()
.clone(),
self.convert_data_type_to_field(&data_type)?,
)))
}

SQLExpr::TypedString(TypedString {
data_type,
value,
uses_odbc_syntax: _,
}) => Ok(Expr::Cast(Cast::new(
}) => Ok(Expr::Cast(Cast::new_from_field(
Box::new(lit(value.into_string().unwrap())),
self.convert_data_type_to_field(&data_type)?
.data_type()
.clone(),
self.convert_data_type_to_field(&data_type)?,
))),

SQLExpr::IsNull(expr) => Ok(Expr::IsNull(Box::new(
Expand Down Expand Up @@ -1035,12 +1031,7 @@ impl<S: ContextProvider> SqlToRel<'_, S> {
_ => expr,
};

// Currently drops metadata attached to the type
// https://github.com/apache/datafusion/issues/18060
Ok(Expr::Cast(Cast::new(
Box::new(expr),
dt.data_type().clone(),
)))
Ok(Expr::Cast(Cast::new_from_field(Box::new(expr), dt)))
}

/// Extracts the root expression and access chain from a compound expression.
Expand Down
4 changes: 2 additions & 2 deletions datafusion/sql/src/planner.rs
Original file line number Diff line number Diff line change
Expand Up @@ -621,9 +621,9 @@ impl<'a, S: ContextProvider> SqlToRel<'a, S> {
) -> Result<FieldRef> {
// First check if any of the registered type_planner can handle this type
if let Some(type_planner) = self.context_provider.get_type_planner()
&& let Some(data_type) = type_planner.plan_type(sql_type)?
&& let Some(data_type) = type_planner.plan_type_field(sql_type)?
{
return Ok(data_type.into_nullable_field_ref());
return Ok(data_type);
}

// If no type_planner can handle this type, use the default conversion
Expand Down
16 changes: 14 additions & 2 deletions datafusion/sql/tests/common/mod.rs
Original file line number Diff line number Diff line change
Expand Up @@ -23,6 +23,7 @@ use std::{sync::Arc, vec};

use arrow::datatypes::*;
use datafusion_common::config::ConfigOptions;
use datafusion_common::datatype::DataTypeExt;
use datafusion_common::file_options::file_type::FileType;
use datafusion_common::{DFSchema, GetExt, Result, TableReference, plan_err};
use datafusion_expr::planner::{ExprPlanner, PlannerResult, TypePlanner};
Expand Down Expand Up @@ -341,8 +342,17 @@ impl TableSource for EmptyTable {
pub struct CustomTypePlanner {}

impl TypePlanner for CustomTypePlanner {
fn plan_type(&self, sql_type: &sqlparser::ast::DataType) -> Result<Option<DataType>> {
fn plan_type_field(
&self,
sql_type: &sqlparser::ast::DataType,
) -> Result<Option<FieldRef>> {
match sql_type {
sqlparser::ast::DataType::Uuid => Ok(Some(Arc::new(
Field::new("", DataType::FixedSizeBinary(16), true).with_metadata(
[("ARROW:extension:name".to_string(), "arrow.uuid".to_string())]
.into(),
),
))),
sqlparser::ast::DataType::Datetime(precision) => {
let precision = match precision {
Some(0) => TimeUnit::Second,
Expand All @@ -351,7 +361,9 @@ impl TypePlanner for CustomTypePlanner {
None | Some(9) => TimeUnit::Nanosecond,
_ => unreachable!(),
};
Ok(Some(DataType::Timestamp(precision, None)))
Ok(Some(
DataType::Timestamp(precision, None).into_nullable_field_ref(),
))
}
_ => Ok(None),
}
Expand Down
8 changes: 8 additions & 0 deletions datafusion/sql/tests/sql_integration.rs
Original file line number Diff line number Diff line change
Expand Up @@ -4727,6 +4727,14 @@ fn test_custom_type_plan() -> Result<()> {
"#
);

let plan = plan_sql("SELECT UUID '00010203-0405-0607-0809-000102030506'");
assert_snapshot!(
plan,
@r#"
Projection: CAST(Utf8("00010203-0405-0607-0809-000102030506") AS FixedSizeBinary(16)<{"ARROW:extension:name": "arrow.uuid"}>)
EmptyRelation: rows=1
"#
);
Ok(())
}

Expand Down
51 changes: 48 additions & 3 deletions docs/source/library-user-guide/extending-sql.md
Original file line number Diff line number Diff line change
Expand Up @@ -158,7 +158,7 @@ when you need to support SQL types that aren't natively recognized.

```rust
# use std::sync::Arc;
# use arrow::datatypes::{DataType, TimeUnit};
# use arrow::datatypes::{DataType, FieldRef, TimeUnit};
# use datafusion::error::Result;
# use datafusion::prelude::*;
# use datafusion::execution::SessionStateBuilder;
Expand All @@ -169,7 +169,7 @@ use datafusion_expr::planner::TypePlanner;
struct MyTypePlanner;

impl TypePlanner for MyTypePlanner {
fn plan_type(&self, sql_type: &ast::DataType) -> Result<Option<DataType>> {
fn plan_type_field(&self, sql_type: &ast::DataType) -> Result<Option<FieldRef>> {
match sql_type {
// Map DATETIME(precision) to Arrow Timestamp
ast::DataType::Datetime(precision) => {
Expand All @@ -180,7 +180,9 @@ impl TypePlanner for MyTypePlanner {
None | Some(9) => TimeUnit::Nanosecond,
_ => return Ok(None), // Let default handling take over
};
Ok(Some(DataType::Timestamp(time_unit, None)))
Ok(Some(
DataType::Timestamp(time_unit, None).into_nullable_field_ref()
))
}
_ => Ok(None), // Return None for types we don't handle
}
Expand All @@ -202,6 +204,49 @@ async fn main() -> Result<()> {
}
```

#### Example: Supporting the UUID Type

```rust
# use std::sync::Arc;
# use arrow::datatypes::{DataType, FieldRef, TimeUnit};
# use datafusion::error::Result;
# use datafusion::prelude::*;
# use datafusion::execution::SessionStateBuilder;
use datafusion_expr::planner::TypePlanner;
# use sqlparser::ast;

#[derive(Debug)]
struct MyTypePlanner;

impl TypePlanner for MyTypePlanner {
fn plan_type_field(&self, sql_type: &ast::DataType) -> Result<Option<FieldRef>> {
match sql_type {
sqlparser::ast::DataType::Uuid => Ok(Some(Arc::new(
Field::new("", DataType::FixedSizeBinary(16), true).with_metadata(
[("ARROW:extension:name".to_string(), "arrow.uuid".to_string())]
.into(),
),
))),
_ => Ok(None),
}
}
}

#[tokio::main]
async fn main() -> Result<()> {
let state = SessionStateBuilder::new()
.with_default_features()
.with_type_planner(Arc::new(MyTypePlanner))
.build();

let ctx = SessionContext::new_with_state(state);

// Now UUID type is recognized
ctx.sql("CREATE TABLE idx (uuid UUID)").await?;
Ok(())
}
```

For more details, see the [TypePlanner API documentation].

### RelationPlanner: Custom FROM Clause Elements
Expand Down
Loading