From c6077180565879345afadb8254742145e1e530f4 Mon Sep 17 00:00:00 2001 From: Jefffrey Date: Thu, 25 Dec 2025 10:55:26 +0900 Subject: [PATCH 1/2] Introduce `TypeSignatureClass::Any` --- datafusion/expr-common/src/signature.rs | 7 +++++ datafusion/functions/src/core/arrow_cast.rs | 13 +++++++-- .../functions/src/core/arrow_metadata.rs | 28 +++++++++++++------ .../sqllogictest/test_files/arrow_typeof.slt | 5 +++- 4 files changed, 41 insertions(+), 12 deletions(-) diff --git a/datafusion/expr-common/src/signature.rs b/datafusion/expr-common/src/signature.rs index 90bd1415003cd..a972584469ebf 100644 --- a/datafusion/expr-common/src/signature.rs +++ b/datafusion/expr-common/src/signature.rs @@ -328,6 +328,8 @@ impl TypeSignature { /// arguments that can be coerced to a particular class of types. #[derive(Debug, Clone, Eq, PartialEq, PartialOrd, Hash)] pub enum TypeSignatureClass { + /// Allows an arbitrary type argument without coercing the argument. + Any, Timestamp, Time, Interval, @@ -354,6 +356,9 @@ impl TypeSignatureClass { /// documentation or error messages. fn get_example_types(&self) -> Vec { match self { + // TODO: might be too much info to return every single type here + // maybe https://github.com/apache/datafusion/issues/14761 will help here? + TypeSignatureClass::Any => vec![], TypeSignatureClass::Native(l) => get_data_types(l.native()), TypeSignatureClass::Timestamp => { vec![ @@ -396,6 +401,7 @@ impl TypeSignatureClass { } match self { + TypeSignatureClass::Any => true, TypeSignatureClass::Native(t) if t.native() == logical_type => true, TypeSignatureClass::Timestamp if logical_type.is_timestamp() => true, TypeSignatureClass::Time if logical_type.is_time() => true, @@ -417,6 +423,7 @@ impl TypeSignatureClass { origin_type: &DataType, ) -> Result { match self { + TypeSignatureClass::Any => Ok(origin_type.to_owned()), TypeSignatureClass::Native(logical_type) => { logical_type.native().default_cast_for(origin_type) } diff --git a/datafusion/functions/src/core/arrow_cast.rs b/datafusion/functions/src/core/arrow_cast.rs index a0101dc09da91..04189c0c6f363 100644 --- a/datafusion/functions/src/core/arrow_cast.rs +++ b/datafusion/functions/src/core/arrow_cast.rs @@ -19,6 +19,7 @@ use arrow::datatypes::{DataType, Field, FieldRef}; use arrow::error::ArrowError; +use datafusion_common::types::logical_string; use datafusion_common::{ Result, ScalarValue, arrow_datafusion_err, exec_err, internal_err, }; @@ -27,8 +28,8 @@ use std::any::Any; use datafusion_expr::simplify::{ExprSimplifyResult, SimplifyInfo}; use datafusion_expr::{ - ColumnarValue, Documentation, Expr, ReturnFieldArgs, ScalarFunctionArgs, - ScalarUDFImpl, Signature, Volatility, + Coercion, ColumnarValue, Documentation, Expr, ReturnFieldArgs, ScalarFunctionArgs, + ScalarUDFImpl, Signature, TypeSignatureClass, Volatility, }; use datafusion_macros::user_doc; @@ -102,7 +103,13 @@ impl Default for ArrowCastFunc { impl ArrowCastFunc { pub fn new() -> Self { Self { - signature: Signature::any(2, Volatility::Immutable), + signature: Signature::coercible( + vec![ + Coercion::new_exact(TypeSignatureClass::Any), + Coercion::new_exact(TypeSignatureClass::Native(logical_string())), + ], + Volatility::Immutable, + ), } } } diff --git a/datafusion/functions/src/core/arrow_metadata.rs b/datafusion/functions/src/core/arrow_metadata.rs index 92873889b02c1..86a6d8c21e96b 100644 --- a/datafusion/functions/src/core/arrow_metadata.rs +++ b/datafusion/functions/src/core/arrow_metadata.rs @@ -17,10 +17,11 @@ use arrow::array::{MapBuilder, StringBuilder}; use arrow::datatypes::{DataType, Field, Fields}; -use datafusion_common::{Result, ScalarValue, exec_err}; +use datafusion_common::types::logical_string; +use datafusion_common::{Result, ScalarValue, exec_err, internal_err}; use datafusion_expr::{ - ColumnarValue, Documentation, ScalarFunctionArgs, ScalarUDFImpl, Signature, - Volatility, + Coercion, ColumnarValue, Documentation, ScalarFunctionArgs, ScalarUDFImpl, Signature, + TypeSignature, TypeSignatureClass, Volatility, }; use datafusion_macros::user_doc; use std::any::Any; @@ -29,7 +30,7 @@ use std::sync::Arc; #[user_doc( doc_section(label = "Other Functions"), description = "Returns the metadata of the input expression. If a key is provided, returns the value for that key. If no key is provided, returns a Map of all metadata.", - syntax_example = "arrow_metadata(expression, [key])", + syntax_example = "arrow_metadata(expression[, key])", sql_example = r#"```sql > select arrow_metadata(col) from table; +----------------------------+ @@ -61,7 +62,18 @@ pub struct ArrowMetadataFunc { impl ArrowMetadataFunc { pub fn new() -> Self { Self { - signature: Signature::variadic_any(Volatility::Immutable), + signature: Signature::one_of( + vec![ + TypeSignature::Coercible(vec![Coercion::new_exact( + TypeSignatureClass::Any, + )]), + TypeSignature::Coercible(vec![ + Coercion::new_exact(TypeSignatureClass::Any), + Coercion::new_exact(TypeSignatureClass::Native(logical_string())), + ]), + ], + Volatility::Immutable, + ), } } } @@ -105,7 +117,7 @@ impl ScalarUDFImpl for ArrowMetadataFunc { false, )) } else { - exec_err!("arrow_metadata requires 1 or 2 arguments") + internal_err!("arrow_metadata requires 1 or 2 arguments") } } @@ -114,7 +126,7 @@ impl ScalarUDFImpl for ArrowMetadataFunc { if args.args.len() == 2 { let key = match &args.args[1] { - ColumnarValue::Scalar(ScalarValue::Utf8(Some(k))) => k, + ColumnarValue::Scalar(ScalarValue::Utf8(Some(key))) => key, _ => { return exec_err!( "Second argument to arrow_metadata must be a string literal key" @@ -142,7 +154,7 @@ impl ScalarUDFImpl for ArrowMetadataFunc { &map_array, 0, )?)) } else { - exec_err!("arrow_metadata requires 1 or 2 arguments") + internal_err!("arrow_metadata requires 1 or 2 arguments") } } } diff --git a/datafusion/sqllogictest/test_files/arrow_typeof.slt b/datafusion/sqllogictest/test_files/arrow_typeof.slt index c213f2abf7190..ee1f204664a14 100644 --- a/datafusion/sqllogictest/test_files/arrow_typeof.slt +++ b/datafusion/sqllogictest/test_files/arrow_typeof.slt @@ -95,9 +95,12 @@ SELECT arrow_cast('1', 'Int16') query error SELECT arrow_cast('1') -query error DataFusion error: Execution error: arrow_cast requires its second argument to be a non\-empty constant string +query error Expect TypeSignatureClass::Native\(LogicalType\(Native\(String\), String\)\) but received NativeType::Int64, DataType: Int64 SELECT arrow_cast('1', 43) +query error DataFusion error: Execution error: arrow_cast requires its second argument to be a non\-empty constant string +SELECT arrow_cast('1', arrow_cast('Utf8', 'Utf8')) + query error DataFusion error: Execution error: Unsupported type 'unknown'\. Must be a supported arrow type name such as 'Int32' or 'Timestamp\(ns\)'\. Error unknown token: unknown SELECT arrow_cast('1', 'unknown') From b30676f116526ed531a6fad145b344e8a6d53906 Mon Sep 17 00:00:00 2001 From: Jefffrey Date: Thu, 25 Dec 2025 10:59:05 +0900 Subject: [PATCH 2/2] update docs --- docs/source/user-guide/sql/scalar_functions.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/docs/source/user-guide/sql/scalar_functions.md b/docs/source/user-guide/sql/scalar_functions.md index e454788003f88..0c01b32b756b5 100644 --- a/docs/source/user-guide/sql/scalar_functions.md +++ b/docs/source/user-guide/sql/scalar_functions.md @@ -5047,7 +5047,7 @@ arrow_cast(expression, datatype) Returns the metadata of the input expression. If a key is provided, returns the value for that key. If no key is provided, returns a Map of all metadata. ```sql -arrow_metadata(expression, [key]) +arrow_metadata(expression[, key]) ``` #### Arguments