Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
7 changes: 7 additions & 0 deletions datafusion/expr-common/src/signature.rs
Original file line number Diff line number Diff line change
Expand Up @@ -328,6 +328,8 @@ impl TypeSignature {
/// arguments that can be coerced to a particular class of types.
#[derive(Debug, Clone, Eq, PartialEq, PartialOrd, Hash)]
pub enum TypeSignatureClass {
/// Allows an arbitrary type argument without coercing the argument.
Any,
Timestamp,
Time,
Interval,
Expand All @@ -354,6 +356,9 @@ impl TypeSignatureClass {
/// documentation or error messages.
fn get_example_types(&self) -> Vec<DataType> {
match self {
// TODO: might be too much info to return every single type here
// maybe https://github.com/apache/datafusion/issues/14761 will help here?
Comment on lines +359 to +360
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

👍🏻

TypeSignatureClass::Any => vec![],
TypeSignatureClass::Native(l) => get_data_types(l.native()),
TypeSignatureClass::Timestamp => {
vec![
Expand Down Expand Up @@ -396,6 +401,7 @@ impl TypeSignatureClass {
}

match self {
TypeSignatureClass::Any => true,
TypeSignatureClass::Native(t) if t.native() == logical_type => true,
TypeSignatureClass::Timestamp if logical_type.is_timestamp() => true,
TypeSignatureClass::Time if logical_type.is_time() => true,
Expand All @@ -417,6 +423,7 @@ impl TypeSignatureClass {
origin_type: &DataType,
) -> Result<DataType> {
match self {
TypeSignatureClass::Any => Ok(origin_type.to_owned()),
TypeSignatureClass::Native(logical_type) => {
logical_type.native().default_cast_for(origin_type)
}
Expand Down
13 changes: 10 additions & 3 deletions datafusion/functions/src/core/arrow_cast.rs
Original file line number Diff line number Diff line change
Expand Up @@ -19,6 +19,7 @@

use arrow::datatypes::{DataType, Field, FieldRef};
use arrow::error::ArrowError;
use datafusion_common::types::logical_string;
use datafusion_common::{
Result, ScalarValue, arrow_datafusion_err, exec_err, internal_err,
};
Expand All @@ -27,8 +28,8 @@ use std::any::Any;

use datafusion_expr::simplify::{ExprSimplifyResult, SimplifyInfo};
use datafusion_expr::{
ColumnarValue, Documentation, Expr, ReturnFieldArgs, ScalarFunctionArgs,
ScalarUDFImpl, Signature, Volatility,
Coercion, ColumnarValue, Documentation, Expr, ReturnFieldArgs, ScalarFunctionArgs,
ScalarUDFImpl, Signature, TypeSignatureClass, Volatility,
};
use datafusion_macros::user_doc;

Expand Down Expand Up @@ -102,7 +103,13 @@ impl Default for ArrowCastFunc {
impl ArrowCastFunc {
pub fn new() -> Self {
Self {
signature: Signature::any(2, Volatility::Immutable),
signature: Signature::coercible(
vec![
Coercion::new_exact(TypeSignatureClass::Any),
Coercion::new_exact(TypeSignatureClass::Native(logical_string())),
],
Volatility::Immutable,
),
}
}
}
Expand Down
28 changes: 20 additions & 8 deletions datafusion/functions/src/core/arrow_metadata.rs
Original file line number Diff line number Diff line change
Expand Up @@ -17,10 +17,11 @@

use arrow::array::{MapBuilder, StringBuilder};
use arrow::datatypes::{DataType, Field, Fields};
use datafusion_common::{Result, ScalarValue, exec_err};
use datafusion_common::types::logical_string;
use datafusion_common::{Result, ScalarValue, exec_err, internal_err};
use datafusion_expr::{
ColumnarValue, Documentation, ScalarFunctionArgs, ScalarUDFImpl, Signature,
Volatility,
Coercion, ColumnarValue, Documentation, ScalarFunctionArgs, ScalarUDFImpl, Signature,
TypeSignature, TypeSignatureClass, Volatility,
};
use datafusion_macros::user_doc;
use std::any::Any;
Expand All @@ -29,7 +30,7 @@ use std::sync::Arc;
#[user_doc(
doc_section(label = "Other Functions"),
description = "Returns the metadata of the input expression. If a key is provided, returns the value for that key. If no key is provided, returns a Map of all metadata.",
syntax_example = "arrow_metadata(expression, [key])",
syntax_example = "arrow_metadata(expression[, key])",
sql_example = r#"```sql
> select arrow_metadata(col) from table;
+----------------------------+
Expand Down Expand Up @@ -61,7 +62,18 @@ pub struct ArrowMetadataFunc {
impl ArrowMetadataFunc {
pub fn new() -> Self {
Self {
signature: Signature::variadic_any(Volatility::Immutable),
signature: Signature::one_of(
vec![
TypeSignature::Coercible(vec![Coercion::new_exact(
TypeSignatureClass::Any,
)]),
TypeSignature::Coercible(vec![
Coercion::new_exact(TypeSignatureClass::Any),
Coercion::new_exact(TypeSignatureClass::Native(logical_string())),
]),
],
Volatility::Immutable,
),
}
}
}
Expand Down Expand Up @@ -105,7 +117,7 @@ impl ScalarUDFImpl for ArrowMetadataFunc {
false,
))
} else {
exec_err!("arrow_metadata requires 1 or 2 arguments")
internal_err!("arrow_metadata requires 1 or 2 arguments")
}
}

Expand All @@ -114,7 +126,7 @@ impl ScalarUDFImpl for ArrowMetadataFunc {

if args.args.len() == 2 {
let key = match &args.args[1] {
ColumnarValue::Scalar(ScalarValue::Utf8(Some(k))) => k,
ColumnarValue::Scalar(ScalarValue::Utf8(Some(key))) => key,
_ => {
return exec_err!(
"Second argument to arrow_metadata must be a string literal key"
Expand Down Expand Up @@ -142,7 +154,7 @@ impl ScalarUDFImpl for ArrowMetadataFunc {
&map_array, 0,
)?))
} else {
exec_err!("arrow_metadata requires 1 or 2 arguments")
internal_err!("arrow_metadata requires 1 or 2 arguments")
}
}
}
5 changes: 4 additions & 1 deletion datafusion/sqllogictest/test_files/arrow_typeof.slt
Original file line number Diff line number Diff line change
Expand Up @@ -95,9 +95,12 @@ SELECT arrow_cast('1', 'Int16')
query error
SELECT arrow_cast('1')

query error DataFusion error: Execution error: arrow_cast requires its second argument to be a non\-empty constant string
query error Expect TypeSignatureClass::Native\(LogicalType\(Native\(String\), String\)\) but received NativeType::Int64, DataType: Int64
Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Probably should start looking in #19004 properly to try pretty up these new error messages 🤔

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

These are really ugly I agree it would be nice if it was neater. But the current status quo is not good so this is no better or worse imo.

SELECT arrow_cast('1', 43)

query error DataFusion error: Execution error: arrow_cast requires its second argument to be a non\-empty constant string
SELECT arrow_cast('1', arrow_cast('Utf8', 'Utf8'))

query error DataFusion error: Execution error: Unsupported type 'unknown'\. Must be a supported arrow type name such as 'Int32' or 'Timestamp\(ns\)'\. Error unknown token: unknown
SELECT arrow_cast('1', 'unknown')

Expand Down
2 changes: 1 addition & 1 deletion docs/source/user-guide/sql/scalar_functions.md
Original file line number Diff line number Diff line change
Expand Up @@ -5047,7 +5047,7 @@ arrow_cast(expression, datatype)
Returns the metadata of the input expression. If a key is provided, returns the value for that key. If no key is provided, returns a Map of all metadata.

```sql
arrow_metadata(expression, [key])
arrow_metadata(expression[, key])
```

#### Arguments
Expand Down