Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
8 changes: 2 additions & 6 deletions datafusion/functions/src/string/ends_with.rs
Original file line number Diff line number Diff line change
Expand Up @@ -23,6 +23,7 @@ use arrow::compute::kernels::comparison::ends_with as arrow_ends_with;
use arrow::datatypes::DataType;

use datafusion_common::types::logical_string;
use datafusion_common::utils::take_function_args;
use datafusion_common::{Result, ScalarValue, exec_err};
use datafusion_expr::binary::{binary_to_string_coercion, string_coercion};
use datafusion_expr::{
Expand Down Expand Up @@ -95,12 +96,7 @@ impl ScalarUDFImpl for EndsWithFunc {
}

fn invoke_with_args(&self, args: ScalarFunctionArgs) -> Result<ColumnarValue> {
let [str_arg, suffix_arg] = args.args.as_slice() else {
return exec_err!(
"ends_with was called with {} arguments, expected 2",
args.args.len()
);
};
let [str_arg, suffix_arg] = take_function_args(self.name(), &args.args)?;

// Determine the common type for coercion
let coercion_type = string_coercion(
Expand Down
8 changes: 2 additions & 6 deletions datafusion/functions/src/string/ltrim.rs
Original file line number Diff line number Diff line change
Expand Up @@ -21,7 +21,7 @@ use std::any::Any;
use std::sync::Arc;

use crate::string::common::*;
use crate::utils::{make_scalar_function, utf8_to_str_type};
use crate::utils::make_scalar_function;
use datafusion_common::types::logical_string;
use datafusion_common::{Result, exec_err};
use datafusion_expr::function::Hint;
Expand Down Expand Up @@ -115,11 +115,7 @@ impl ScalarUDFImpl for LtrimFunc {
}

fn return_type(&self, arg_types: &[DataType]) -> Result<DataType> {
if arg_types[0] == DataType::Utf8View {
Ok(DataType::Utf8View)
} else {
utf8_to_str_type(&arg_types[0], "ltrim")
Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

utf8_to_str_type() preserves type to LargeUtf8 if input is large, otherwise just Utf8; we can see we also preserve view type, so in this case we are just preserving the input type as is

}
Ok(arg_types[0].clone())
}

fn invoke_with_args(&self, args: ScalarFunctionArgs) -> Result<ColumnarValue> {
Expand Down
8 changes: 2 additions & 6 deletions datafusion/functions/src/string/rtrim.rs
Original file line number Diff line number Diff line change
Expand Up @@ -21,7 +21,7 @@ use std::any::Any;
use std::sync::Arc;

use crate::string::common::*;
use crate::utils::{make_scalar_function, utf8_to_str_type};
use crate::utils::make_scalar_function;
use datafusion_common::types::logical_string;
use datafusion_common::{Result, exec_err};
use datafusion_expr::function::Hint;
Expand Down Expand Up @@ -115,11 +115,7 @@ impl ScalarUDFImpl for RtrimFunc {
}

fn return_type(&self, arg_types: &[DataType]) -> Result<DataType> {
if arg_types[0] == DataType::Utf8View {
Ok(DataType::Utf8View)
} else {
utf8_to_str_type(&arg_types[0], "rtrim")
}
Ok(arg_types[0].clone())
}

fn invoke_with_args(&self, args: ScalarFunctionArgs) -> Result<ColumnarValue> {
Expand Down
24 changes: 12 additions & 12 deletions datafusion/functions/src/string/split_part.rs
Original file line number Diff line number Diff line change
Expand Up @@ -24,8 +24,11 @@ use arrow::array::{AsArray, GenericStringBuilder};
use arrow::datatypes::DataType;
use datafusion_common::ScalarValue;
use datafusion_common::cast::as_int64_array;
use datafusion_common::types::{NativeType, logical_int64, logical_string};
use datafusion_common::{DataFusionError, Result, exec_err};
use datafusion_expr::{ColumnarValue, Documentation, TypeSignature, Volatility};
use datafusion_expr::{
Coercion, ColumnarValue, Documentation, TypeSignatureClass, Volatility,
};
use datafusion_expr::{ScalarFunctionArgs, ScalarUDFImpl, Signature};
use datafusion_macros::user_doc;
use std::any::Any;
Expand Down Expand Up @@ -60,19 +63,16 @@ impl Default for SplitPartFunc {

impl SplitPartFunc {
pub fn new() -> Self {
use DataType::*;
Self {
signature: Signature::one_of(
signature: Signature::coercible(
vec![
TypeSignature::Exact(vec![Utf8View, Utf8View, Int64]),
TypeSignature::Exact(vec![Utf8View, Utf8, Int64]),
TypeSignature::Exact(vec![Utf8View, LargeUtf8, Int64]),
TypeSignature::Exact(vec![Utf8, Utf8View, Int64]),
TypeSignature::Exact(vec![Utf8, Utf8, Int64]),
TypeSignature::Exact(vec![LargeUtf8, Utf8View, Int64]),
TypeSignature::Exact(vec![LargeUtf8, Utf8, Int64]),
TypeSignature::Exact(vec![Utf8, LargeUtf8, Int64]),
TypeSignature::Exact(vec![LargeUtf8, LargeUtf8, Int64]),
Coercion::new_exact(TypeSignatureClass::Native(logical_string())),
Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

More compact API

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

nice, however took sometime to get used to the API

Coercion::new_exact(TypeSignatureClass::Native(logical_string())),
Coercion::new_implicit(
TypeSignatureClass::Native(logical_int64()),
vec![TypeSignatureClass::Integer],
NativeType::Int64,
),
],
Volatility::Immutable,
),
Expand Down
8 changes: 2 additions & 6 deletions datafusion/functions/src/string/starts_with.rs
Original file line number Diff line number Diff line change
Expand Up @@ -21,6 +21,7 @@ use std::sync::Arc;
use arrow::array::{ArrayRef, Scalar};
use arrow::compute::kernels::comparison::starts_with as arrow_starts_with;
use arrow::datatypes::DataType;
use datafusion_common::utils::take_function_args;
use datafusion_expr::simplify::{ExprSimplifyResult, SimplifyInfo};
use datafusion_expr::type_coercion::binary::{
binary_to_string_coercion, string_coercion,
Expand Down Expand Up @@ -92,12 +93,7 @@ impl ScalarUDFImpl for StartsWithFunc {
}

fn invoke_with_args(&self, args: ScalarFunctionArgs) -> Result<ColumnarValue> {
let [str_arg, prefix_arg] = args.args.as_slice() else {
return exec_err!(
"starts_with was called with {} arguments, expected 2",
args.args.len()
);
};
let [str_arg, prefix_arg] = take_function_args(self.name(), &args.args)?;

// Determine the common type for coercion
let coercion_type = string_coercion(
Expand Down
69 changes: 34 additions & 35 deletions datafusion/functions/src/string/to_hex.rs
Original file line number Diff line number Diff line change
Expand Up @@ -21,20 +21,16 @@ use std::sync::Arc;
use crate::utils::make_scalar_function;
use arrow::array::{Array, ArrayRef, StringArray};
use arrow::buffer::{Buffer, OffsetBuffer};
use arrow::datatypes::DataType::{
Int8, Int16, Int32, Int64, UInt8, UInt16, UInt32, UInt64, Utf8,
};
use arrow::datatypes::{
ArrowNativeType, ArrowPrimitiveType, DataType, Int8Type, Int16Type, Int32Type,
Int64Type, UInt8Type, UInt16Type, UInt32Type, UInt64Type,
};
use datafusion_common::Result;
use datafusion_common::cast::as_primitive_array;
use datafusion_common::{exec_err, plan_err};

use datafusion_expr::{ColumnarValue, Documentation};
use datafusion_expr::{ScalarFunctionArgs, ScalarUDFImpl, Signature, Volatility};
use datafusion_expr_common::signature::TypeSignature::Exact;
use datafusion_common::{Result, ScalarValue, exec_err};
use datafusion_expr::{
Coercion, ColumnarValue, Documentation, ScalarFunctionArgs, ScalarUDFImpl, Signature,
TypeSignatureClass, Volatility,
};
use datafusion_macros::user_doc;

/// Hex lookup table for fast conversion
Expand Down Expand Up @@ -201,17 +197,8 @@ impl Default for ToHexFunc {
impl ToHexFunc {
pub fn new() -> Self {
Self {
signature: Signature::one_of(
vec![
Exact(vec![Int8]),
Exact(vec![Int16]),
Exact(vec![Int32]),
Exact(vec![Int64]),
Exact(vec![UInt8]),
Exact(vec![UInt16]),
Exact(vec![UInt32]),
Exact(vec![UInt64]),
],
signature: Signature::coercible(
vec![Coercion::new_exact(TypeSignatureClass::Integer)],
Volatility::Immutable,
),
}
Expand All @@ -231,25 +218,37 @@ impl ScalarUDFImpl for ToHexFunc {
&self.signature
}

fn return_type(&self, arg_types: &[DataType]) -> Result<DataType> {
Ok(match arg_types[0] {
Int8 | Int16 | Int32 | Int64 | UInt8 | UInt16 | UInt32 | UInt64 => Utf8,
_ => {
return plan_err!("The to_hex function can only accept integers.");
}
})
fn return_type(&self, _arg_types: &[DataType]) -> Result<DataType> {
Ok(DataType::Utf8)
}

fn invoke_with_args(&self, args: ScalarFunctionArgs) -> Result<ColumnarValue> {
match args.args[0].data_type() {
Int64 => make_scalar_function(to_hex::<Int64Type>, vec![])(&args.args),
UInt64 => make_scalar_function(to_hex::<UInt64Type>, vec![])(&args.args),
Int32 => make_scalar_function(to_hex::<Int32Type>, vec![])(&args.args),
UInt32 => make_scalar_function(to_hex::<UInt32Type>, vec![])(&args.args),
Int16 => make_scalar_function(to_hex::<Int16Type>, vec![])(&args.args),
UInt16 => make_scalar_function(to_hex::<UInt16Type>, vec![])(&args.args),
Int8 => make_scalar_function(to_hex::<Int8Type>, vec![])(&args.args),
UInt8 => make_scalar_function(to_hex::<UInt8Type>, vec![])(&args.args),
DataType::Null => Ok(ColumnarValue::Scalar(ScalarValue::Utf8(None))),
Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I prefer having the DataType:: but this is just personal preference, can revert if is too verbose

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I'm sure some future refactor will change this :)

DataType::Int64 => {
make_scalar_function(to_hex::<Int64Type>, vec![])(&args.args)
}
DataType::UInt64 => {
make_scalar_function(to_hex::<UInt64Type>, vec![])(&args.args)
}
DataType::Int32 => {
make_scalar_function(to_hex::<Int32Type>, vec![])(&args.args)
}
DataType::UInt32 => {
make_scalar_function(to_hex::<UInt32Type>, vec![])(&args.args)
}
DataType::Int16 => {
make_scalar_function(to_hex::<Int16Type>, vec![])(&args.args)
}
DataType::UInt16 => {
make_scalar_function(to_hex::<UInt16Type>, vec![])(&args.args)
}
DataType::Int8 => {
make_scalar_function(to_hex::<Int8Type>, vec![])(&args.args)
}
DataType::UInt8 => {
make_scalar_function(to_hex::<UInt8Type>, vec![])(&args.args)
}
other => exec_err!("Unsupported data type {other:?} for function to_hex"),
}
}
Expand Down
2 changes: 1 addition & 1 deletion datafusion/functions/src/string/uuid.rs
Original file line number Diff line number Diff line change
Expand Up @@ -56,7 +56,7 @@ impl Default for UuidFunc {
impl UuidFunc {
pub fn new() -> Self {
Self {
signature: Signature::exact(vec![], Volatility::Volatile),
signature: Signature::nullary(Volatility::Volatile),
}
}
}
Expand Down
2 changes: 1 addition & 1 deletion datafusion/sqllogictest/test_files/encoding.slt
Original file line number Diff line number Diff line change
Expand Up @@ -73,7 +73,7 @@ select decode('', null) from test;
query error DataFusion error: This feature is not implemented: Encoding must be a scalar; array specified encoding is not yet supported
select decode('', hex_field) from test;

query error DataFusion error: Error during planning: Failed to coerce arguments to satisfy a call to 'to_hex' function
query error DataFusion error: Error during planning: Internal error: Expect TypeSignatureClass::Integer but received NativeType::String, DataType: Utf8View
select to_hex(hex_field) from test;

query error DataFusion error: Execution error: Failed to decode value using base64
Expand Down