-
Notifications
You must be signed in to change notification settings - Fork 1.9k
Various refactors to string functions #19402
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Changes from all commits
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
| Original file line number | Diff line number | Diff line change |
|---|---|---|
|
|
@@ -24,8 +24,11 @@ use arrow::array::{AsArray, GenericStringBuilder}; | |
| use arrow::datatypes::DataType; | ||
| use datafusion_common::ScalarValue; | ||
| use datafusion_common::cast::as_int64_array; | ||
| use datafusion_common::types::{NativeType, logical_int64, logical_string}; | ||
| use datafusion_common::{DataFusionError, Result, exec_err}; | ||
| use datafusion_expr::{ColumnarValue, Documentation, TypeSignature, Volatility}; | ||
| use datafusion_expr::{ | ||
| Coercion, ColumnarValue, Documentation, TypeSignatureClass, Volatility, | ||
| }; | ||
| use datafusion_expr::{ScalarFunctionArgs, ScalarUDFImpl, Signature}; | ||
| use datafusion_macros::user_doc; | ||
| use std::any::Any; | ||
|
|
@@ -60,19 +63,16 @@ impl Default for SplitPartFunc { | |
|
|
||
| impl SplitPartFunc { | ||
| pub fn new() -> Self { | ||
| use DataType::*; | ||
| Self { | ||
| signature: Signature::one_of( | ||
| signature: Signature::coercible( | ||
| vec![ | ||
| TypeSignature::Exact(vec![Utf8View, Utf8View, Int64]), | ||
| TypeSignature::Exact(vec![Utf8View, Utf8, Int64]), | ||
| TypeSignature::Exact(vec![Utf8View, LargeUtf8, Int64]), | ||
| TypeSignature::Exact(vec![Utf8, Utf8View, Int64]), | ||
| TypeSignature::Exact(vec![Utf8, Utf8, Int64]), | ||
| TypeSignature::Exact(vec![LargeUtf8, Utf8View, Int64]), | ||
| TypeSignature::Exact(vec![LargeUtf8, Utf8, Int64]), | ||
| TypeSignature::Exact(vec![Utf8, LargeUtf8, Int64]), | ||
| TypeSignature::Exact(vec![LargeUtf8, LargeUtf8, Int64]), | ||
| Coercion::new_exact(TypeSignatureClass::Native(logical_string())), | ||
|
Contributor
Author
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. More compact API
Contributor
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. nice, however took sometime to get used to the API |
||
| Coercion::new_exact(TypeSignatureClass::Native(logical_string())), | ||
| Coercion::new_implicit( | ||
| TypeSignatureClass::Native(logical_int64()), | ||
| vec![TypeSignatureClass::Integer], | ||
| NativeType::Int64, | ||
| ), | ||
| ], | ||
| Volatility::Immutable, | ||
| ), | ||
|
|
||
| Original file line number | Diff line number | Diff line change |
|---|---|---|
|
|
@@ -21,20 +21,16 @@ use std::sync::Arc; | |
| use crate::utils::make_scalar_function; | ||
| use arrow::array::{Array, ArrayRef, StringArray}; | ||
| use arrow::buffer::{Buffer, OffsetBuffer}; | ||
| use arrow::datatypes::DataType::{ | ||
| Int8, Int16, Int32, Int64, UInt8, UInt16, UInt32, UInt64, Utf8, | ||
| }; | ||
| use arrow::datatypes::{ | ||
| ArrowNativeType, ArrowPrimitiveType, DataType, Int8Type, Int16Type, Int32Type, | ||
| Int64Type, UInt8Type, UInt16Type, UInt32Type, UInt64Type, | ||
| }; | ||
| use datafusion_common::Result; | ||
| use datafusion_common::cast::as_primitive_array; | ||
| use datafusion_common::{exec_err, plan_err}; | ||
|
|
||
| use datafusion_expr::{ColumnarValue, Documentation}; | ||
| use datafusion_expr::{ScalarFunctionArgs, ScalarUDFImpl, Signature, Volatility}; | ||
| use datafusion_expr_common::signature::TypeSignature::Exact; | ||
| use datafusion_common::{Result, ScalarValue, exec_err}; | ||
| use datafusion_expr::{ | ||
| Coercion, ColumnarValue, Documentation, ScalarFunctionArgs, ScalarUDFImpl, Signature, | ||
| TypeSignatureClass, Volatility, | ||
| }; | ||
| use datafusion_macros::user_doc; | ||
|
|
||
| /// Hex lookup table for fast conversion | ||
|
|
@@ -201,17 +197,8 @@ impl Default for ToHexFunc { | |
| impl ToHexFunc { | ||
| pub fn new() -> Self { | ||
| Self { | ||
| signature: Signature::one_of( | ||
| vec![ | ||
| Exact(vec![Int8]), | ||
| Exact(vec![Int16]), | ||
| Exact(vec![Int32]), | ||
| Exact(vec![Int64]), | ||
| Exact(vec![UInt8]), | ||
| Exact(vec![UInt16]), | ||
| Exact(vec![UInt32]), | ||
| Exact(vec![UInt64]), | ||
| ], | ||
| signature: Signature::coercible( | ||
| vec![Coercion::new_exact(TypeSignatureClass::Integer)], | ||
| Volatility::Immutable, | ||
| ), | ||
| } | ||
|
|
@@ -231,25 +218,37 @@ impl ScalarUDFImpl for ToHexFunc { | |
| &self.signature | ||
| } | ||
|
|
||
| fn return_type(&self, arg_types: &[DataType]) -> Result<DataType> { | ||
| Ok(match arg_types[0] { | ||
| Int8 | Int16 | Int32 | Int64 | UInt8 | UInt16 | UInt32 | UInt64 => Utf8, | ||
| _ => { | ||
| return plan_err!("The to_hex function can only accept integers."); | ||
| } | ||
| }) | ||
| fn return_type(&self, _arg_types: &[DataType]) -> Result<DataType> { | ||
| Ok(DataType::Utf8) | ||
| } | ||
|
|
||
| fn invoke_with_args(&self, args: ScalarFunctionArgs) -> Result<ColumnarValue> { | ||
| match args.args[0].data_type() { | ||
| Int64 => make_scalar_function(to_hex::<Int64Type>, vec![])(&args.args), | ||
| UInt64 => make_scalar_function(to_hex::<UInt64Type>, vec![])(&args.args), | ||
| Int32 => make_scalar_function(to_hex::<Int32Type>, vec![])(&args.args), | ||
| UInt32 => make_scalar_function(to_hex::<UInt32Type>, vec![])(&args.args), | ||
| Int16 => make_scalar_function(to_hex::<Int16Type>, vec![])(&args.args), | ||
| UInt16 => make_scalar_function(to_hex::<UInt16Type>, vec![])(&args.args), | ||
| Int8 => make_scalar_function(to_hex::<Int8Type>, vec![])(&args.args), | ||
| UInt8 => make_scalar_function(to_hex::<UInt8Type>, vec![])(&args.args), | ||
| DataType::Null => Ok(ColumnarValue::Scalar(ScalarValue::Utf8(None))), | ||
|
Contributor
Author
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. I prefer having the
Contributor
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. I'm sure some future refactor will change this :) |
||
| DataType::Int64 => { | ||
| make_scalar_function(to_hex::<Int64Type>, vec![])(&args.args) | ||
| } | ||
| DataType::UInt64 => { | ||
| make_scalar_function(to_hex::<UInt64Type>, vec![])(&args.args) | ||
| } | ||
| DataType::Int32 => { | ||
| make_scalar_function(to_hex::<Int32Type>, vec![])(&args.args) | ||
| } | ||
| DataType::UInt32 => { | ||
| make_scalar_function(to_hex::<UInt32Type>, vec![])(&args.args) | ||
| } | ||
| DataType::Int16 => { | ||
| make_scalar_function(to_hex::<Int16Type>, vec![])(&args.args) | ||
| } | ||
| DataType::UInt16 => { | ||
| make_scalar_function(to_hex::<UInt16Type>, vec![])(&args.args) | ||
| } | ||
| DataType::Int8 => { | ||
| make_scalar_function(to_hex::<Int8Type>, vec![])(&args.args) | ||
| } | ||
| DataType::UInt8 => { | ||
| make_scalar_function(to_hex::<UInt8Type>, vec![])(&args.args) | ||
| } | ||
| other => exec_err!("Unsupported data type {other:?} for function to_hex"), | ||
| } | ||
| } | ||
|
|
||
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
utf8_to_str_type()preserves type to LargeUtf8 if input is large, otherwise just Utf8; we can see we also preserve view type, so in this case we are just preserving the input type as is