From 5be98fed62d6288c51ff6461a786ef87a2c4c0a9 Mon Sep 17 00:00:00 2001 From: Joe Isaacs Date: Fri, 13 Feb 2026 16:50:38 +0000 Subject: [PATCH 01/20] break[array]: lazy compare function Signed-off-by: Joe Isaacs --- encodings/alp/src/alp/compute/compare.rs | 360 ------------------ encodings/alp/src/alp/compute/mod.rs | 1 - .../datetime-parts/src/compute/compare.rs | 327 ---------------- encodings/datetime-parts/src/compute/mod.rs | 1 - .../src/decimal_byte_parts/compute/compare.rs | 259 ------------- .../src/decimal_byte_parts/compute/mod.rs | 1 - .../fastlanes/src/for/compute/compare.rs | 217 ----------- encodings/fastlanes/src/for/compute/mod.rs | 1 - encodings/fsst/src/compute/compare.rs | 196 ---------- encodings/fsst/src/compute/mod.rs | 1 - encodings/runend/src/compute/compare.rs | 83 ---- encodings/runend/src/compute/mod.rs | 1 - encodings/sequence/src/compute/compare.rs | 57 --- .../src/arrays/chunked/compute/compare.rs | 65 ---- .../src/arrays/chunked/compute/mod.rs | 1 - .../src/arrays/constant/compute/compare.rs | 36 -- .../src/arrays/constant/compute/mod.rs | 1 - .../src/arrays/dict/compute/compare.rs | 158 -------- vortex-array/src/arrays/dict/compute/mod.rs | 1 - .../src/arrays/extension/compute/compare.rs | 45 --- .../src/arrays/extension/compute/mod.rs | 1 - .../src/arrays/masked/compute/compare.rs | 143 ------- vortex-array/src/arrays/masked/compute/mod.rs | 1 - .../src/arrays/varbin/compute/compare.rs | 232 ----------- vortex-array/src/arrays/varbin/compute/mod.rs | 1 - vortex-array/src/compute/compare.rs | 285 +------------- vortex-array/src/compute/mod.rs | 1 - vortex-array/src/expr/exprs/binary/compare.rs | 98 +++++ vortex-array/src/expr/exprs/binary/mod.rs | 38 +- vortex-array/src/expr/exprs/operators.rs | 7 + vortex-test/e2e/src/lib.rs | 2 +- 31 files changed, 153 insertions(+), 2468 deletions(-) delete mode 100644 encodings/alp/src/alp/compute/compare.rs delete mode 100644 encodings/datetime-parts/src/compute/compare.rs delete mode 100644 encodings/decimal-byte-parts/src/decimal_byte_parts/compute/compare.rs delete mode 100644 encodings/fastlanes/src/for/compute/compare.rs delete mode 100644 encodings/fsst/src/compute/compare.rs delete mode 100644 encodings/runend/src/compute/compare.rs delete mode 100644 vortex-array/src/arrays/chunked/compute/compare.rs delete mode 100644 vortex-array/src/arrays/constant/compute/compare.rs delete mode 100644 vortex-array/src/arrays/dict/compute/compare.rs delete mode 100644 vortex-array/src/arrays/extension/compute/compare.rs delete mode 100644 vortex-array/src/arrays/masked/compute/compare.rs delete mode 100644 vortex-array/src/arrays/varbin/compute/compare.rs create mode 100644 vortex-array/src/expr/exprs/binary/compare.rs diff --git a/encodings/alp/src/alp/compute/compare.rs b/encodings/alp/src/alp/compute/compare.rs deleted file mode 100644 index 6f04b58841b..00000000000 --- a/encodings/alp/src/alp/compute/compare.rs +++ /dev/null @@ -1,360 +0,0 @@ -// SPDX-License-Identifier: Apache-2.0 -// SPDX-FileCopyrightText: Copyright the Vortex contributors - -use std::fmt::Debug; - -use vortex_array::Array; -use vortex_array::ArrayRef; -use vortex_array::IntoArray; -use vortex_array::arrays::ConstantArray; -use vortex_array::compute::CompareKernel; -use vortex_array::compute::CompareKernelAdapter; -use vortex_array::compute::Operator; -use vortex_array::compute::compare; -use vortex_array::register_kernel; -use vortex_dtype::NativePType; -use vortex_error::VortexResult; -use vortex_error::vortex_bail; -use vortex_error::vortex_err; -use vortex_scalar::Scalar; - -use crate::ALPArray; -use crate::ALPFloat; -use crate::ALPVTable; -use crate::match_each_alp_float_ptype; - -// TODO(joe): add fuzzing. - -impl CompareKernel for ALPVTable { - fn compare( - &self, - lhs: &ALPArray, - rhs: &dyn Array, - operator: Operator, - ) -> VortexResult> { - if lhs.patches().is_some() { - // TODO(joe): support patches - return Ok(None); - } - if lhs.dtype().is_nullable() || rhs.dtype().is_nullable() { - // TODO(joe): support nullability - return Ok(None); - } - - if let Some(const_scalar) = rhs.as_constant() { - let pscalar = const_scalar.as_primitive_opt().ok_or_else(|| { - vortex_err!( - "ALP Compare RHS had the wrong type {}, expected {}", - const_scalar, - const_scalar.dtype() - ) - })?; - - match_each_alp_float_ptype!(pscalar.ptype(), |T| { - match pscalar.typed_value::() { - Some(value) => return alp_scalar_compare(lhs, value, operator), - None => vortex_bail!( - "Failed to convert scalar {:?} to ALP type {:?}", - pscalar, - pscalar.ptype() - ), - } - }); - } - - Ok(None) - } -} - -register_kernel!(CompareKernelAdapter(ALPVTable).lift()); - -/// We can compare a scalar to an ALPArray by encoding the scalar into the ALP domain and comparing -/// the encoded value to the encoded values in the ALPArray. There are fixups when the value doesn't -/// encode into the ALP domain. -fn alp_scalar_compare>( - alp: &ALPArray, - value: F, - operator: Operator, -) -> VortexResult> -where - F::ALPInt: Into, - ::ALPInt: Debug, -{ - // TODO(joe): support patches, this is checked above. - if alp.patches().is_some() { - return Ok(None); - } - - let exponents = alp.exponents(); - // If the scalar doesn't fit into the ALP domain, - // it cannot be equal to any values in the encoded array. - let encoded = F::encode_single(value, alp.exponents()); - match encoded { - Some(encoded) => { - let s = ConstantArray::new(encoded, alp.len()); - Ok(Some(compare(alp.encoded(), s.as_ref(), operator)?)) - } - None => match operator { - // Since this value is not encodable it cannot be equal to any value in the encoded - // array. - Operator::Eq => Ok(Some(ConstantArray::new(false, alp.len()).into_array())), - // Since this value is not encodable it cannot be equal to any value in the encoded - // array, hence != to all values in the encoded array. - Operator::NotEq => Ok(Some(ConstantArray::new(true, alp.len()).into_array())), - Operator::Gt | Operator::Gte => { - // Per IEEE 754 totalOrder semantics the ordering is -Nan < -Inf < Inf < Nan. - // All values in the encoded array are definitely finite - let is_not_finite = NativePType::is_infinite(value) || NativePType::is_nan(value); - if is_not_finite { - Ok(Some( - ConstantArray::new(value.is_sign_negative(), alp.len()).into_array(), - )) - } else { - Ok(Some(compare( - alp.encoded(), - ConstantArray::new(F::encode_above(value, exponents), alp.len()).as_ref(), - // Since the encoded value is unencodable gte is equivalent to gt. - // Consider a value v, between two encodable values v_l (just less) and - // v_a (just above), then for all encodable values (u), v > u <=> v_g >= u - Operator::Gte, - )?)) - } - } - Operator::Lt | Operator::Lte => { - // Per IEEE 754 totalOrder semantics the ordering is -Nan < -Inf < Inf < Nan. - // All values in the encoded array are definitely finite - let is_not_finite = NativePType::is_infinite(value) || NativePType::is_nan(value); - if is_not_finite { - Ok(Some( - ConstantArray::new(value.is_sign_positive(), alp.len()).into_array(), - )) - } else { - Ok(Some(compare( - alp.encoded(), - ConstantArray::new(F::encode_below(value, exponents), alp.len()).as_ref(), - // Since the encoded values unencodable lt is equivalent to lte. - // See Gt | Gte for further explanation. - Operator::Lte, - )?)) - } - } - }, - } -} - -#[cfg(test)] -mod tests { - use rstest::rstest; - use vortex_array::ArrayRef; - use vortex_array::ToCanonical; - use vortex_array::arrays::BoolArray; - use vortex_array::arrays::ConstantArray; - use vortex_array::arrays::PrimitiveArray; - use vortex_array::assert_arrays_eq; - use vortex_array::compute::Operator; - use vortex_array::compute::compare; - use vortex_dtype::DType; - use vortex_dtype::Nullability; - use vortex_dtype::PType; - use vortex_scalar::Scalar; - - use super::*; - use crate::alp_encode; - - fn test_alp_compare>( - alp: &ALPArray, - value: F, - operator: Operator, - ) -> Option - where - F::ALPInt: Into, - ::ALPInt: Debug, - { - alp_scalar_compare(alp, value, operator).unwrap() - } - - #[test] - fn basic_comparison_test() { - let array = PrimitiveArray::from_iter([1.234f32; 1025]); - let encoded = alp_encode(&array, None).unwrap(); - assert!(encoded.patches().is_none()); - assert_eq!( - encoded.encoded().to_primitive().as_slice::(), - vec![1234; 1025] - ); - - let r = alp_scalar_compare(&encoded, 1.3_f32, Operator::Eq) - .unwrap() - .unwrap(); - let expected = BoolArray::from_iter([false; 1025]); - assert_arrays_eq!(r, expected); - - let r = alp_scalar_compare(&encoded, 1.234f32, Operator::Eq) - .unwrap() - .unwrap(); - let expected = BoolArray::from_iter([true; 1025]); - assert_arrays_eq!(r, expected); - } - - #[test] - fn comparison_with_unencodable_value() { - let array = PrimitiveArray::from_iter([1.234f32; 1025]); - let encoded = alp_encode(&array, None).unwrap(); - assert!(encoded.patches().is_none()); - assert_eq!( - encoded.encoded().to_primitive().as_slice::(), - vec![1234; 1025] - ); - - #[allow(clippy::excessive_precision)] - let r_eq = alp_scalar_compare(&encoded, 1.234444_f32, Operator::Eq) - .unwrap() - .unwrap(); - let expected = BoolArray::from_iter([false; 1025]); - assert_arrays_eq!(r_eq, expected); - - #[allow(clippy::excessive_precision)] - let r_neq = alp_scalar_compare(&encoded, 1.234444f32, Operator::NotEq) - .unwrap() - .unwrap(); - let expected = BoolArray::from_iter([true; 1025]); - assert_arrays_eq!(r_neq, expected); - } - - #[test] - fn comparison_range() { - let array = PrimitiveArray::from_iter([0.0605_f32; 10]); - let encoded = alp_encode(&array, None).unwrap(); - assert!(encoded.patches().is_none()); - assert_eq!( - encoded.encoded().to_primitive().as_slice::(), - vec![605; 10] - ); - - // !(0.0605_f32 >= 0.06051_f32); - let r_gte = alp_scalar_compare(&encoded, 0.06051_f32, Operator::Gte) - .unwrap() - .unwrap(); - let expected = BoolArray::from_iter([false; 10]); - assert_arrays_eq!(r_gte, expected); - - // (0.0605_f32 > 0.06051_f32); - let r_gt = alp_scalar_compare(&encoded, 0.06051_f32, Operator::Gt) - .unwrap() - .unwrap(); - let expected = BoolArray::from_iter([false; 10]); - assert_arrays_eq!(r_gt, expected); - - // 0.0605_f32 <= 0.06051_f32; - let r_lte = alp_scalar_compare(&encoded, 0.06051_f32, Operator::Lte) - .unwrap() - .unwrap(); - let expected = BoolArray::from_iter([true; 10]); - assert_arrays_eq!(r_lte, expected); - - //0.0605_f32 < 0.06051_f32; - let r_lt = alp_scalar_compare(&encoded, 0.06051_f32, Operator::Lt) - .unwrap() - .unwrap(); - let expected = BoolArray::from_iter([true; 10]); - assert_arrays_eq!(r_lt, expected); - } - - #[test] - fn comparison_zeroes() { - let array = PrimitiveArray::from_iter([0.0_f32; 10]); - let encoded = alp_encode(&array, None).unwrap(); - assert!(encoded.patches().is_none()); - assert_eq!( - encoded.encoded().to_primitive().as_slice::(), - vec![0; 10] - ); - - let r_gte = test_alp_compare(&encoded, -0.00000001_f32, Operator::Gte).unwrap(); - let expected = BoolArray::from_iter([true; 10]); - assert_arrays_eq!(r_gte, expected); - - let r_gte = test_alp_compare(&encoded, -0.0_f32, Operator::Gte).unwrap(); - let expected = BoolArray::from_iter([true; 10]); - assert_arrays_eq!(r_gte, expected); - - let r_gt = test_alp_compare(&encoded, -0.0000000001f32, Operator::Gt).unwrap(); - let expected = BoolArray::from_iter([true; 10]); - assert_arrays_eq!(r_gt, expected); - - let r_gte = test_alp_compare(&encoded, -0.0_f32, Operator::Gt).unwrap(); - let expected = BoolArray::from_iter([true; 10]); - assert_arrays_eq!(r_gte, expected); - - let r_lte = test_alp_compare(&encoded, 0.06051_f32, Operator::Lte).unwrap(); - let expected = BoolArray::from_iter([true; 10]); - assert_arrays_eq!(r_lte, expected); - - let r_lt = test_alp_compare(&encoded, 0.06051_f32, Operator::Lt).unwrap(); - let expected = BoolArray::from_iter([true; 10]); - assert_arrays_eq!(r_lt, expected); - - let r_lt = test_alp_compare(&encoded, -0.00001_f32, Operator::Lt).unwrap(); - let expected = BoolArray::from_iter([false; 10]); - assert_arrays_eq!(r_lt, expected); - } - - #[test] - fn compare_with_patches() { - let array = - PrimitiveArray::from_iter([1.234f32, 1.5, 19.0, std::f32::consts::E, 1_000_000.9]); - let encoded = alp_encode(&array, None).unwrap(); - assert!(encoded.patches().is_some()); - - // Not supported! - assert!( - alp_scalar_compare(&encoded, 1_000_000.9_f32, Operator::Eq) - .unwrap() - .is_none() - ) - } - - #[test] - fn compare_to_null() { - let array = PrimitiveArray::from_iter([1.234f32; 10]); - let encoded = alp_encode(&array, None).unwrap(); - - let other = ConstantArray::new( - Scalar::null(DType::Primitive(PType::F32, Nullability::Nullable)), - array.len(), - ); - - let r = compare(encoded.as_ref(), other.as_ref(), Operator::Eq).unwrap(); - // Comparing to null yields null results - let expected = BoolArray::from_iter([None::; 10]); - assert_arrays_eq!(r, expected); - } - - #[rstest] - #[case(f32::NAN, false)] - #[case(-1.0f32 / 0.0f32, true)] - #[case(f32::INFINITY, false)] - #[case(f32::NEG_INFINITY, true)] - fn compare_to_non_finite_gt(#[case] value: f32, #[case] result: bool) { - let array = PrimitiveArray::from_iter([1.234f32; 10]); - let encoded = alp_encode(&array, None).unwrap(); - - let r = test_alp_compare(&encoded, value, Operator::Gt).unwrap(); - let expected = BoolArray::from_iter([result; 10]); - assert_arrays_eq!(r, expected); - } - - #[rstest] - #[case(f32::NAN, true)] - #[case(-1.0f32 / 0.0f32, false)] - #[case(f32::INFINITY, true)] - #[case(f32::NEG_INFINITY, false)] - fn compare_to_non_finite_lt(#[case] value: f32, #[case] result: bool) { - let array = PrimitiveArray::from_iter([1.234f32; 10]); - let encoded = alp_encode(&array, None).unwrap(); - - let r = test_alp_compare(&encoded, value, Operator::Lt).unwrap(); - let expected = BoolArray::from_iter([result; 10]); - assert_arrays_eq!(r, expected); - } -} diff --git a/encodings/alp/src/alp/compute/mod.rs b/encodings/alp/src/alp/compute/mod.rs index af435684496..a3f1de38efc 100644 --- a/encodings/alp/src/alp/compute/mod.rs +++ b/encodings/alp/src/alp/compute/mod.rs @@ -3,7 +3,6 @@ mod between; mod cast; -mod compare; mod filter; mod mask; mod nan_count; diff --git a/encodings/datetime-parts/src/compute/compare.rs b/encodings/datetime-parts/src/compute/compare.rs deleted file mode 100644 index 2885b35de26..00000000000 --- a/encodings/datetime-parts/src/compute/compare.rs +++ /dev/null @@ -1,327 +0,0 @@ -// SPDX-License-Identifier: Apache-2.0 -// SPDX-FileCopyrightText: Copyright the Vortex contributors - -use vortex_array::Array; -use vortex_array::ArrayRef; -use vortex_array::IntoArray; -use vortex_array::arrays::ConstantArray; -use vortex_array::builtins::ArrayBuiltins; -use vortex_array::compute::CompareKernel; -use vortex_array::compute::CompareKernelAdapter; -use vortex_array::compute::Operator; -use vortex_array::compute::and_kleene; -use vortex_array::compute::compare; -use vortex_array::compute::or_kleene; -use vortex_array::register_kernel; -use vortex_dtype::DType; -use vortex_dtype::Nullability; -use vortex_dtype::datetime::Timestamp; -use vortex_error::VortexResult; -use vortex_scalar::Scalar; - -use crate::array::DateTimePartsArray; -use crate::array::DateTimePartsVTable; -use crate::timestamp; - -impl CompareKernel for DateTimePartsVTable { - /// Compares two arrays and returns a new boolean array with the result of the comparison. - /// Or, returns None if comparison is not supported. - fn compare( - &self, - lhs: &DateTimePartsArray, - rhs: &dyn Array, - operator: Operator, - ) -> VortexResult> { - let Some(rhs_const) = rhs.as_constant() else { - return Ok(None); - }; - let Some(timestamp) = rhs_const - .as_extension() - .to_storage_scalar() - .as_primitive() - .as_::() - else { - return Ok(None); - }; - - let DType::Extension(ext_dtype) = rhs_const.dtype() else { - return Ok(None); - }; - - let nullability = lhs.dtype().nullability() | rhs.dtype().nullability(); - - let Some(options) = ext_dtype.metadata_opt::() else { - return Ok(None); - }; - let ts_parts = timestamp::split(timestamp, options.unit)?; - - match operator { - Operator::Eq => compare_eq(lhs, &ts_parts, nullability), - Operator::NotEq => compare_ne(lhs, &ts_parts, nullability), - // lt and lte have identical behavior, as we optimize - // for the case that all days on the lhs are smaller. - // If that special case is not hit, we return `Ok(None)` to - // signal that the comparison wasn't handled within dtp. - Operator::Lt => compare_lt(lhs, &ts_parts, nullability), - Operator::Lte => compare_lt(lhs, &ts_parts, nullability), - // (Like for lt, lte) - Operator::Gt => compare_gt(lhs, &ts_parts, nullability), - Operator::Gte => compare_gt(lhs, &ts_parts, nullability), - } - } -} - -register_kernel!(CompareKernelAdapter(DateTimePartsVTable).lift()); - -fn compare_eq( - lhs: &DateTimePartsArray, - ts_parts: ×tamp::TimestampParts, - nullability: Nullability, -) -> VortexResult> { - let mut comparison = compare_dtp(lhs.days(), ts_parts.days, Operator::Eq, nullability)?; - if comparison.statistics().compute_max::() == Some(false) { - // All values are different. - return Ok(Some(comparison)); - } - - comparison = and_kleene( - &compare_dtp(lhs.seconds(), ts_parts.seconds, Operator::Eq, nullability)?, - &comparison, - )?; - - if comparison.statistics().compute_max::() == Some(false) { - // All values are different. - return Ok(Some(comparison)); - } - - comparison = and_kleene( - &compare_dtp( - lhs.subseconds(), - ts_parts.subseconds, - Operator::Eq, - nullability, - )?, - &comparison, - )?; - - Ok(Some(comparison)) -} - -fn compare_ne( - lhs: &DateTimePartsArray, - ts_parts: ×tamp::TimestampParts, - nullability: Nullability, -) -> VortexResult> { - let mut comparison = compare_dtp(lhs.days(), ts_parts.days, Operator::NotEq, nullability)?; - if comparison.statistics().compute_min::() == Some(true) { - // All values are different. - return Ok(Some(comparison)); - } - - comparison = or_kleene( - &compare_dtp( - lhs.seconds(), - ts_parts.seconds, - Operator::NotEq, - nullability, - )?, - &comparison, - )?; - - if comparison.statistics().compute_min::() == Some(true) { - // All values are different. - return Ok(Some(comparison)); - } - - comparison = or_kleene( - &compare_dtp( - lhs.subseconds(), - ts_parts.subseconds, - Operator::NotEq, - nullability, - )?, - &comparison, - )?; - - Ok(Some(comparison)) -} - -fn compare_lt( - lhs: &DateTimePartsArray, - ts_parts: ×tamp::TimestampParts, - nullability: Nullability, -) -> VortexResult> { - let days_lt = compare_dtp(lhs.days(), ts_parts.days, Operator::Lt, nullability)?; - if days_lt.statistics().compute_min::() == Some(true) { - // All values on the lhs are smaller. - return Ok(Some(days_lt)); - } - - Ok(None) -} - -fn compare_gt( - lhs: &DateTimePartsArray, - ts_parts: ×tamp::TimestampParts, - nullability: Nullability, -) -> VortexResult> { - let days_gt = compare_dtp(lhs.days(), ts_parts.days, Operator::Gt, nullability)?; - if days_gt.statistics().compute_min::() == Some(true) { - // All values on the lhs are larger. - return Ok(Some(days_gt)); - } - - Ok(None) -} - -fn compare_dtp( - lhs: &dyn Array, - rhs: i64, - operator: Operator, - nullability: Nullability, -) -> VortexResult { - // Since nullability is stripped from RHS and carried forward through nullability argument we want to incorporate it into lhs.dtype() that we cast rhs into - match ConstantArray::new(rhs, lhs.len()) - .into_array() - .cast(lhs.dtype().with_nullability(nullability)) - { - Ok(casted) => compare(lhs, &casted, operator), - // The narrowing cast failed. Therefore, we know lhs < rhs. - _ => { - let constant_value = match operator { - Operator::Eq | Operator::Gte | Operator::Gt => false, - Operator::NotEq | Operator::Lte | Operator::Lt => true, - }; - Ok( - ConstantArray::new(Scalar::bool(constant_value, nullability), lhs.len()) - .into_array(), - ) - } - } -} - -#[cfg(test)] -mod test { - use rstest::rstest; - use vortex_array::arrays::PrimitiveArray; - use vortex_array::arrays::TemporalArray; - use vortex_array::compute::Operator; - use vortex_array::validity::Validity; - use vortex_buffer::buffer; - use vortex_dtype::IntegerPType; - use vortex_dtype::datetime::TimeUnit; - - use super::*; - - fn dtp_array_from_timestamp( - value: T, - validity: Validity, - ) -> DateTimePartsArray { - DateTimePartsArray::try_from(TemporalArray::new_timestamp( - PrimitiveArray::new(buffer![value], validity).into_array(), - TimeUnit::Seconds, - Some("UTC".into()), - )) - .expect("Failed to construct DateTimePartsArray from TemporalArray") - } - - #[rstest] - #[case(Validity::NonNullable, Validity::NonNullable)] - #[case(Validity::NonNullable, Validity::AllValid)] - #[case(Validity::AllValid, Validity::NonNullable)] - #[case(Validity::AllValid, Validity::AllValid)] - fn compare_date_time_parts_eq(#[case] lhs_validity: Validity, #[case] rhs_validity: Validity) { - let lhs = dtp_array_from_timestamp(86400i64, lhs_validity); // January 2, 1970, 00:00:00 UTC - let rhs = dtp_array_from_timestamp(86400i64, rhs_validity.clone()); // January 2, 1970, 00:00:00 UTC - let comparison = compare(lhs.as_ref(), rhs.as_ref(), Operator::Eq).unwrap(); - assert_eq!(comparison.as_bool_typed().true_count().unwrap(), 1); - - let rhs = dtp_array_from_timestamp(0i64, rhs_validity); // January 1, 1970, 00:00:00 UTC - let comparison = compare(lhs.as_ref(), rhs.as_ref(), Operator::Eq).unwrap(); - assert_eq!(comparison.as_bool_typed().true_count().unwrap(), 0); - } - - #[rstest] - #[case(Validity::NonNullable, Validity::NonNullable)] - #[case(Validity::NonNullable, Validity::AllValid)] - #[case(Validity::AllValid, Validity::NonNullable)] - #[case(Validity::AllValid, Validity::AllValid)] - fn compare_date_time_parts_ne(#[case] lhs_validity: Validity, #[case] rhs_validity: Validity) { - let lhs = dtp_array_from_timestamp(86400i64, lhs_validity); // January 2, 1970, 00:00:00 UTC - let rhs = dtp_array_from_timestamp(86401i64, rhs_validity.clone()); // January 2, 1970, 00:00:01 UTC - let comparison = compare(lhs.as_ref(), rhs.as_ref(), Operator::NotEq).unwrap(); - assert_eq!(comparison.as_bool_typed().true_count().unwrap(), 1); - - let rhs = dtp_array_from_timestamp(86400i64, rhs_validity); // January 2, 1970, 00:00:00 UTC - let comparison = compare(lhs.as_ref(), rhs.as_ref(), Operator::NotEq).unwrap(); - assert_eq!(comparison.as_bool_typed().true_count().unwrap(), 0); - } - - #[rstest] - #[case(Validity::NonNullable, Validity::NonNullable)] - #[case(Validity::NonNullable, Validity::AllValid)] - #[case(Validity::AllValid, Validity::NonNullable)] - #[case(Validity::AllValid, Validity::AllValid)] - fn compare_date_time_parts_lt(#[case] lhs_validity: Validity, #[case] rhs_validity: Validity) { - let lhs = dtp_array_from_timestamp(0i64, lhs_validity); // January 1, 1970, 01:00:00 UTC - let rhs = dtp_array_from_timestamp(86400i64, rhs_validity); // January 2, 1970, 00:00:00 UTC - - let comparison = compare(lhs.as_ref(), rhs.as_ref(), Operator::Lt).unwrap(); - assert_eq!(comparison.as_bool_typed().true_count().unwrap(), 1); - } - - #[rstest] - #[case(Validity::NonNullable, Validity::NonNullable)] - #[case(Validity::NonNullable, Validity::AllValid)] - #[case(Validity::AllValid, Validity::NonNullable)] - #[case(Validity::AllValid, Validity::AllValid)] - fn compare_date_time_parts_gt(#[case] lhs_validity: Validity, #[case] rhs_validity: Validity) { - let lhs = dtp_array_from_timestamp(86400i64, lhs_validity); // January 2, 1970, 02:00:00 UTC - let rhs = dtp_array_from_timestamp(0i64, rhs_validity); // January 1, 1970, 01:00:00 UTC - - let comparison = compare(lhs.as_ref(), rhs.as_ref(), Operator::Gt).unwrap(); - assert_eq!(comparison.as_bool_typed().true_count().unwrap(), 1); - } - - #[rstest] - #[case(Validity::NonNullable, Validity::NonNullable)] - #[case(Validity::NonNullable, Validity::AllValid)] - #[case(Validity::AllValid, Validity::NonNullable)] - #[case(Validity::AllValid, Validity::AllValid)] - fn compare_date_time_parts_narrowing( - #[case] lhs_validity: Validity, - #[case] rhs_validity: Validity, - ) { - let temporal_array = TemporalArray::new_timestamp( - PrimitiveArray::new(buffer![0i64], lhs_validity.clone()).into_array(), - TimeUnit::Seconds, - Some("UTC".into()), - ); - - let lhs = DateTimePartsArray::try_new( - DType::Extension(temporal_array.ext_dtype()), - PrimitiveArray::new(buffer![0i32], lhs_validity).into_array(), - PrimitiveArray::new(buffer![0u32], Validity::NonNullable).into_array(), - PrimitiveArray::new(buffer![0i64], Validity::NonNullable).into_array(), - ) - .unwrap(); - - // Timestamp with a value larger than i32::MAX. - let rhs = dtp_array_from_timestamp(i64::MAX, rhs_validity); - - let comparison = compare(lhs.as_ref(), rhs.as_ref(), Operator::Eq).unwrap(); - assert_eq!(comparison.as_bool_typed().true_count().unwrap(), 0); - - let comparison = compare(lhs.as_ref(), rhs.as_ref(), Operator::NotEq).unwrap(); - assert_eq!(comparison.as_bool_typed().true_count().unwrap(), 1); - - let comparison = compare(lhs.as_ref(), rhs.as_ref(), Operator::Lt).unwrap(); - assert_eq!(comparison.as_bool_typed().true_count().unwrap(), 1); - - let comparison = compare(lhs.as_ref(), rhs.as_ref(), Operator::Lte).unwrap(); - assert_eq!(comparison.as_bool_typed().true_count().unwrap(), 1); - - // `Operator::Gt` and `Operator::Gte` only cover the case of all lhs values - // being larger. Therefore, these cases are not covered by unit tests. - } -} diff --git a/encodings/datetime-parts/src/compute/mod.rs b/encodings/datetime-parts/src/compute/mod.rs index d606daccb59..4c9d87765ef 100644 --- a/encodings/datetime-parts/src/compute/mod.rs +++ b/encodings/datetime-parts/src/compute/mod.rs @@ -2,7 +2,6 @@ // SPDX-FileCopyrightText: Copyright the Vortex contributors mod cast; -mod compare; mod filter; mod is_constant; pub(crate) mod kernel; diff --git a/encodings/decimal-byte-parts/src/decimal_byte_parts/compute/compare.rs b/encodings/decimal-byte-parts/src/decimal_byte_parts/compute/compare.rs deleted file mode 100644 index 2ff776c36b3..00000000000 --- a/encodings/decimal-byte-parts/src/decimal_byte_parts/compute/compare.rs +++ /dev/null @@ -1,259 +0,0 @@ -// SPDX-License-Identifier: Apache-2.0 -// SPDX-FileCopyrightText: Copyright the Vortex contributors - -use Sign::Negative; -use num_traits::NumCast; -use vortex_array::Array; -use vortex_array::ArrayRef; -use vortex_array::arrays::ConstantArray; -use vortex_array::compute::CompareKernel; -use vortex_array::compute::CompareKernelAdapter; -use vortex_array::compute::Operator; -use vortex_array::compute::compare; -use vortex_array::register_kernel; -use vortex_dtype::IntegerPType; -use vortex_dtype::Nullability; -use vortex_dtype::PType; -use vortex_dtype::ToI256; -use vortex_dtype::match_each_decimal_value; -use vortex_dtype::match_each_integer_ptype; -use vortex_error::VortexExpect; -use vortex_error::VortexResult; -use vortex_scalar::DecimalValue; -use vortex_scalar::Scalar; -use vortex_scalar::ScalarValue; - -use crate::DecimalBytePartsVTable; -use crate::decimal_byte_parts::compute::compare::Sign::Positive; - -impl CompareKernel for DecimalBytePartsVTable { - fn compare( - &self, - lhs: &Self::Array, - rhs: &dyn Array, - operator: Operator, - ) -> VortexResult> { - let Some(rhs_const) = rhs.as_constant() else { - return Ok(None); - }; - - let nullability = lhs.dtype.nullability() | rhs.dtype().nullability(); - let scalar_type = lhs.msp.dtype().with_nullability(nullability); - - let rhs_decimal = rhs_const - .as_decimal() - .decimal_value() - .vortex_expect("checked for null in entry func"); - - match decimal_value_wrapper_to_primitive(rhs_decimal, lhs.msp.as_primitive_typed().ptype()) - { - Ok(value) => { - let encoded_scalar = Scalar::try_new(scalar_type, Some(value))?; - let encoded_const = ConstantArray::new(encoded_scalar, rhs.len()); - compare(&lhs.msp, &encoded_const.to_array(), operator).map(Some) - } - - Err(sign) => { - // If the MSP and the constant are non-null, we know that failing to coerce the - // constant into the MSP bit-width means that it is larger/smaller - // (depending on the `sign`) than all values in MSP. - // If the LHS or the RHS contain nulls, then we must fallback to the canonicalized - // implementation which does null-checking instead. - if lhs.all_valid()? && rhs.all_valid()? { - Ok(Some( - ConstantArray::new( - unconvertible_value(sign, operator, nullability), - lhs.len(), - ) - .to_array(), - )) - } else { - Ok(None) - } - } - } - } -} - -// Used to represent the overflow direction when trying to -// convert into the scalar type. -#[derive(Debug)] -enum Sign { - Positive, - Negative, -} - -fn unconvertible_value(sign: Sign, operator: Operator, nullability: Nullability) -> Scalar { - match operator { - Operator::Eq => Scalar::bool(false, nullability), - Operator::NotEq => Scalar::bool(true, nullability), - Operator::Gt | Operator::Gte => Scalar::bool(matches!(sign, Negative), nullability), - Operator::Lt | Operator::Lte => Scalar::bool(matches!(sign, Positive), nullability), - } -} - -// this value return None is the decimal scalar cannot be cast the ptype. -fn decimal_value_wrapper_to_primitive( - decimal_value: DecimalValue, - ptype: PType, -) -> Result { - match_each_integer_ptype!(ptype, |P| { - decimal_value_to_primitive::

(decimal_value) - }) -} - -fn decimal_value_to_primitive

(decimal_value: DecimalValue) -> Result -where - P: IntegerPType + ToI256, - ScalarValue: From

, -{ - match_each_decimal_value!(decimal_value, |decimal_v| { - let Some(encoded) =

::from(decimal_v) else { - let decimal_i256 = decimal_v - .to_i256() - .vortex_expect("i256 is big enough for any DecimalValue"); - return if decimal_i256 - > P::max_value() - .to_i256() - .vortex_expect("i256 is big enough for any PType") - { - Err(Positive) - } else { - assert!( - decimal_i256 - < P::min_value() - .to_i256() - .vortex_expect("i256 is big enough for any PType") - ); - Err(Negative) - }; - }; - Ok(ScalarValue::from(encoded)) - }) -} - -register_kernel!(CompareKernelAdapter(DecimalBytePartsVTable).lift()); - -#[cfg(test)] -mod tests { - use vortex_array::Array; - use vortex_array::IntoArray; - use vortex_array::arrays::BoolArray; - use vortex_array::arrays::ConstantArray; - use vortex_array::arrays::PrimitiveArray; - use vortex_array::assert_arrays_eq; - use vortex_array::compute::Operator; - use vortex_array::compute::compare; - use vortex_array::validity::Validity; - use vortex_buffer::buffer; - use vortex_dtype::DType; - use vortex_dtype::DecimalDType; - use vortex_dtype::Nullability; - use vortex_error::VortexResult; - use vortex_scalar::DecimalValue; - use vortex_scalar::Scalar; - - use crate::DecimalBytePartsArray; - - #[test] - fn compare_decimal_const() { - let decimal_dtype = DecimalDType::new(8, 2); - let dtype = DType::Decimal(decimal_dtype, Nullability::Nullable); - let lhs = DecimalBytePartsArray::try_new( - PrimitiveArray::new(buffer![100i32, 200i32, 400i32], Validity::AllValid).to_array(), - decimal_dtype, - ) - .unwrap() - .to_array(); - let rhs = ConstantArray::new( - Scalar::try_new(dtype, Some(DecimalValue::I64(400).into())).unwrap(), - lhs.len(), - ); - - let res = compare(lhs.as_ref(), rhs.as_ref(), Operator::Eq).unwrap(); - - let expected = BoolArray::from_iter([Some(false), Some(false), Some(true)]).into_array(); - assert_arrays_eq!(res, expected); - } - - #[test] - fn test_byteparts_compare_nullable() -> VortexResult<()> { - let decimal_type = DecimalDType::new(19, -11); - let lhs = DecimalBytePartsArray::try_new( - PrimitiveArray::new( - buffer![1i64, 2i64, 3i64, 4i64], - Validity::Array(BoolArray::from_iter([false, true, true, true]).into_array()), - ) - .into_array(), - decimal_type, - )?; - - let rhs = ConstantArray::new( - Scalar::decimal( - DecimalValue::I128(289888198), - decimal_type, - Nullability::NonNullable, - ), - 4, - ) - .into_array(); - - let res = compare(lhs.as_ref(), rhs.as_ref(), Operator::Lte)?; - let expected = - BoolArray::from_iter([None, Some(true), Some(true), Some(true)]).into_array(); - assert_arrays_eq!(res, expected); - - Ok(()) - } - - #[test] - fn compare_decimal_const_unconvertible_comparison() { - let decimal_dtype = DecimalDType::new(40, 2); - let dtype = DType::Decimal(decimal_dtype, Nullability::Nullable); - let lhs = DecimalBytePartsArray::try_new( - PrimitiveArray::new(buffer![100i32, 200i32, 400i32], Validity::AllValid).to_array(), - decimal_dtype, - ) - .unwrap() - .to_array(); - // This cannot be converted to a i32. - let rhs = ConstantArray::new( - Scalar::try_new( - dtype.clone(), - Some(DecimalValue::I128(-9999999999999965304).into()), - ) - .unwrap(), - lhs.len(), - ); - - let res = compare(lhs.as_ref(), rhs.as_ref(), Operator::Eq).unwrap(); - let expected = BoolArray::from_iter([Some(false), Some(false), Some(false)]).into_array(); - assert_arrays_eq!(res, expected); - - let res = compare(lhs.as_ref(), rhs.as_ref(), Operator::Gt).unwrap(); - let expected = BoolArray::from_iter([Some(true), Some(true), Some(true)]).into_array(); - assert_arrays_eq!(res, expected); - - let res = compare(lhs.as_ref(), rhs.as_ref(), Operator::Lt).unwrap(); - let expected = BoolArray::from_iter([Some(false), Some(false), Some(false)]).into_array(); - assert_arrays_eq!(res, expected); - - // This cannot be converted to a i32. - let rhs = ConstantArray::new( - Scalar::try_new(dtype, Some(DecimalValue::I128(9999999999999965304).into())).unwrap(), - lhs.len(), - ); - - let res = compare(lhs.as_ref(), rhs.as_ref(), Operator::Eq).unwrap(); - let expected = BoolArray::from_iter([Some(false), Some(false), Some(false)]).into_array(); - assert_arrays_eq!(res, expected); - - let res = compare(lhs.as_ref(), rhs.as_ref(), Operator::Gt).unwrap(); - let expected = BoolArray::from_iter([Some(false), Some(false), Some(false)]).into_array(); - assert_arrays_eq!(res, expected); - - let res = compare(lhs.as_ref(), rhs.as_ref(), Operator::Lt).unwrap(); - let expected = BoolArray::from_iter([Some(true), Some(true), Some(true)]).into_array(); - assert_arrays_eq!(res, expected); - } -} diff --git a/encodings/decimal-byte-parts/src/decimal_byte_parts/compute/mod.rs b/encodings/decimal-byte-parts/src/decimal_byte_parts/compute/mod.rs index 2e798106a7e..a12bebf3b1e 100644 --- a/encodings/decimal-byte-parts/src/decimal_byte_parts/compute/mod.rs +++ b/encodings/decimal-byte-parts/src/decimal_byte_parts/compute/mod.rs @@ -2,7 +2,6 @@ // SPDX-FileCopyrightText: Copyright the Vortex contributors mod cast; -mod compare; mod filter; mod is_constant; pub(crate) mod kernel; diff --git a/encodings/fastlanes/src/for/compute/compare.rs b/encodings/fastlanes/src/for/compute/compare.rs deleted file mode 100644 index 6ae51867ad8..00000000000 --- a/encodings/fastlanes/src/for/compute/compare.rs +++ /dev/null @@ -1,217 +0,0 @@ -// SPDX-License-Identifier: Apache-2.0 -// SPDX-FileCopyrightText: Copyright the Vortex contributors - -use std::ops::Shr; - -use num_traits::WrappingSub; -use vortex_array::Array; -use vortex_array::ArrayRef; -use vortex_array::arrays::ConstantArray; -use vortex_array::compute::CompareKernel; -use vortex_array::compute::CompareKernelAdapter; -use vortex_array::compute::Operator; -use vortex_array::compute::compare; -use vortex_array::register_kernel; -use vortex_dtype::NativePType; -use vortex_dtype::Nullability; -use vortex_dtype::match_each_integer_ptype; -use vortex_error::VortexError; -use vortex_error::VortexExpect as _; -use vortex_error::VortexResult; -use vortex_scalar::PValue; -use vortex_scalar::Scalar; - -use crate::FoRArray; -use crate::FoRVTable; - -impl CompareKernel for FoRVTable { - fn compare( - &self, - lhs: &FoRArray, - rhs: &dyn Array, - operator: Operator, - ) -> VortexResult> { - if let Some(constant) = rhs.as_constant() - && let Some(constant) = constant.as_primitive_opt() - { - match_each_integer_ptype!(constant.ptype(), |T| { - return compare_constant( - lhs, - constant - .typed_value::() - .vortex_expect("null scalar handled in top-level"), - rhs.dtype().nullability(), - operator, - ); - }) - } - - Ok(None) - } -} - -register_kernel!(CompareKernelAdapter(FoRVTable).lift()); - -fn compare_constant( - lhs: &FoRArray, - mut rhs: T, - nullability: Nullability, - operator: Operator, -) -> VortexResult> -where - T: NativePType + WrappingSub + Shr, - T: TryFrom, - PValue: From, -{ - // For now, we only support equals and not equals. Comparisons are a little more fiddly to - // get right regarding how to handle overflow and the wrapping subtraction. - if !matches!(operator, Operator::Eq | Operator::NotEq) { - return Ok(None); - } - - let reference = lhs.reference_scalar(); - let reference = reference.as_primitive().typed_value::(); - - // We encode the RHS into the FoR domain. - if let Some(reference) = reference { - rhs = rhs.wrapping_sub(&reference); - } - - // Wrap up the RHS into a scalar and cast to the encoded DType (this will be the equivalent - // unsigned integer type). - let rhs = Scalar::primitive(rhs, nullability); - - compare( - lhs.encoded(), - ConstantArray::new(rhs, lhs.len()).as_ref(), - operator, - ) - .map(Some) -} - -#[cfg(test)] -mod tests { - use vortex_array::IntoArray; - use vortex_array::arrays::BoolArray; - use vortex_array::arrays::PrimitiveArray; - use vortex_array::assert_arrays_eq; - use vortex_array::validity::Validity; - use vortex_buffer::buffer; - use vortex_dtype::DType; - - use super::*; - - #[test] - fn test_compare_constant() { - let reference = Scalar::from(10); - // 10, 30, 12 - let lhs = FoRArray::try_new( - PrimitiveArray::new(buffer!(0i32, 20, 2), Validity::AllValid).into_array(), - reference, - ) - .unwrap(); - - let result = compare_constant(&lhs, 30i32, Nullability::NonNullable, Operator::Eq) - .unwrap() - .unwrap(); - assert_arrays_eq!(result, BoolArray::from_iter([false, true, false].map(Some))); - - let result = compare_constant(&lhs, 12i32, Nullability::NonNullable, Operator::NotEq) - .unwrap() - .unwrap(); - assert_arrays_eq!(result, BoolArray::from_iter([true, true, false].map(Some))); - - for op in [Operator::Lt, Operator::Lte, Operator::Gt, Operator::Gte] { - assert!( - compare_constant(&lhs, 30i32, Nullability::NonNullable, op) - .unwrap() - .is_none() - ); - } - } - - #[test] - fn test_compare_nullable_constant() { - let reference = Scalar::from(0); - // 10, 30, 12 - let lhs = FoRArray::try_new( - PrimitiveArray::new(buffer!(0i32, 20, 2), Validity::NonNullable).into_array(), - reference, - ) - .unwrap(); - - assert_eq!( - compare_constant(&lhs, 30i32, Nullability::Nullable, Operator::Eq) - .unwrap() - .unwrap() - .dtype(), - &DType::Bool(Nullability::Nullable) - ); - assert_eq!( - compare_constant(&lhs, 30i32, Nullability::NonNullable, Operator::Eq) - .unwrap() - .unwrap() - .dtype(), - &DType::Bool(Nullability::NonNullable) - ); - } - - #[test] - fn compare_non_encodable_constant() { - let reference = Scalar::from(10); - // 10, 30, 12 - let lhs = FoRArray::try_new( - PrimitiveArray::new(buffer!(0i32, 10, 1), Validity::AllValid).into_array(), - reference, - ) - .unwrap(); - - let result = compare_constant(&lhs, -1i32, Nullability::NonNullable, Operator::Eq) - .unwrap() - .unwrap(); - assert_arrays_eq!( - result, - BoolArray::from_iter([false, false, false].map(Some)) - ); - - let result = compare_constant(&lhs, -1i32, Nullability::NonNullable, Operator::NotEq) - .unwrap() - .unwrap(); - assert_arrays_eq!(result, BoolArray::from_iter([true, true, true].map(Some))); - } - - #[test] - fn compare_large_constant() { - let reference = Scalar::from(-9219218377546224477i64); - #[allow(clippy::cast_possible_truncation)] - let lhs = FoRArray::try_new( - PrimitiveArray::new( - buffer![0i64, 9654309310445864926u64 as i64], - Validity::AllValid, - ) - .into_array(), - reference, - ) - .unwrap(); - - let result = compare_constant( - &lhs, - 435090932899640449i64, - Nullability::Nullable, - Operator::Eq, - ) - .unwrap() - .unwrap(); - assert_arrays_eq!(result, BoolArray::from_iter([Some(false), Some(true)])); - - let result = compare_constant( - &lhs, - 435090932899640449i64, - Nullability::Nullable, - Operator::NotEq, - ) - .unwrap() - .unwrap(); - assert_arrays_eq!(result, BoolArray::from_iter([Some(true), Some(false)])); - } -} diff --git a/encodings/fastlanes/src/for/compute/mod.rs b/encodings/fastlanes/src/for/compute/mod.rs index a8efc731793..95103cbee4a 100644 --- a/encodings/fastlanes/src/for/compute/mod.rs +++ b/encodings/fastlanes/src/for/compute/mod.rs @@ -2,7 +2,6 @@ // SPDX-FileCopyrightText: Copyright the Vortex contributors mod cast; -mod compare; mod is_constant; mod is_sorted; diff --git a/encodings/fsst/src/compute/compare.rs b/encodings/fsst/src/compute/compare.rs deleted file mode 100644 index 24ff9e9f3a1..00000000000 --- a/encodings/fsst/src/compute/compare.rs +++ /dev/null @@ -1,196 +0,0 @@ -// SPDX-License-Identifier: Apache-2.0 -// SPDX-FileCopyrightText: Copyright the Vortex contributors - -use vortex_array::Array; -use vortex_array::ArrayRef; -use vortex_array::IntoArray; -use vortex_array::ToCanonical; -use vortex_array::arrays::BoolArray; -use vortex_array::arrays::ConstantArray; -use vortex_array::compute::CompareKernel; -use vortex_array::compute::CompareKernelAdapter; -use vortex_array::compute::Operator; -use vortex_array::compute::compare; -use vortex_array::compute::compare_lengths_to_empty; -use vortex_array::register_kernel; -use vortex_array::validity::Validity; -use vortex_buffer::BitBuffer; -use vortex_buffer::ByteBuffer; -use vortex_dtype::DType; -use vortex_dtype::match_each_integer_ptype; -use vortex_error::VortexExpect; -use vortex_error::VortexResult; -use vortex_error::vortex_bail; -use vortex_scalar::Scalar; - -use crate::FSSTArray; -use crate::FSSTVTable; - -impl CompareKernel for FSSTVTable { - fn compare( - &self, - lhs: &FSSTArray, - rhs: &dyn Array, - operator: Operator, - ) -> VortexResult> { - match rhs.as_constant() { - Some(constant) => compare_fsst_constant(lhs, &constant, operator), - // Otherwise, fall back to the default comparison behavior. - _ => Ok(None), - } - } -} - -register_kernel!(CompareKernelAdapter(FSSTVTable).lift()); - -/// Specialized compare function implementation used when performing against a constant -fn compare_fsst_constant( - left: &FSSTArray, - right: &Scalar, - operator: Operator, -) -> VortexResult> { - let is_rhs_empty = match right.dtype() { - DType::Binary(_) => right - .as_binary() - .is_empty() - .vortex_expect("RHS should not be null"), - DType::Utf8(_) => right - .as_utf8() - .is_empty() - .vortex_expect("RHS should not be null"), - _ => vortex_bail!("VarBinArray can only have type of Binary or Utf8"), - }; - if is_rhs_empty { - let buffer = match operator { - // Every possible value is gte "" - Operator::Gte => BitBuffer::new_set(left.len()), - // No value is lt "" - Operator::Lt => BitBuffer::new_unset(left.len()), - _ => { - let uncompressed_lengths = left.uncompressed_lengths().to_primitive(); - match_each_integer_ptype!(uncompressed_lengths.ptype(), |P| { - compare_lengths_to_empty( - uncompressed_lengths.as_slice::

().iter().copied(), - operator, - ) - }) - } - }; - - return Ok(Some( - BoolArray::new( - buffer, - Validity::copy_from_array(left.as_ref())? - .union_nullability(right.dtype().nullability()), - ) - .into_array(), - )); - } - - // The following section only supports Eq/NotEq - if !matches!(operator, Operator::Eq | Operator::NotEq) { - return Ok(None); - } - - let compressor = left.compressor(); - let encoded_buffer = match left.dtype() { - DType::Utf8(_) => { - let value = right - .as_utf8() - .value() - .vortex_expect("Expected non-null scalar"); - ByteBuffer::from(compressor.compress(value.as_bytes())) - } - DType::Binary(_) => { - let value = right - .as_binary() - .value() - .vortex_expect("Expected non-null scalar"); - ByteBuffer::from(compressor.compress(value.as_slice())) - } - _ => unreachable!("FSSTArray can only have string or binary data type"), - }; - - let encoded_scalar = Scalar::binary( - encoded_buffer, - left.dtype().nullability() | right.dtype().nullability(), - ); - - let rhs = ConstantArray::new(encoded_scalar, left.len()); - compare(left.codes().as_ref(), rhs.as_ref(), operator).map(Some) -} - -#[cfg(test)] -mod tests { - use vortex_array::Array; - use vortex_array::ToCanonical; - use vortex_array::arrays::BoolArray; - use vortex_array::arrays::ConstantArray; - use vortex_array::arrays::VarBinArray; - use vortex_array::assert_arrays_eq; - use vortex_array::compute::Operator; - use vortex_array::compute::compare; - use vortex_dtype::DType; - use vortex_dtype::Nullability; - use vortex_scalar::Scalar; - - use crate::fsst_compress; - use crate::fsst_train_compressor; - - #[test] - #[cfg_attr(miri, ignore)] - fn test_compare_fsst() { - let lhs = VarBinArray::from_iter( - [ - Some("hello"), - None, - Some("world"), - None, - Some("this is a very long string"), - ], - DType::Utf8(Nullability::Nullable), - ); - let compressor = fsst_train_compressor(&lhs); - let lhs = fsst_compress(lhs, &compressor); - - let rhs = ConstantArray::new("world", lhs.len()); - - // Ensure fastpath for Eq exists, and returns correct answer - let equals = compare(lhs.as_ref(), rhs.as_ref(), Operator::Eq) - .unwrap() - .to_bool(); - - assert_eq!(equals.dtype(), &DType::Bool(Nullability::Nullable)); - - assert_arrays_eq!( - &equals, - &BoolArray::from_iter([Some(false), None, Some(true), None, Some(false)]) - ); - - // Ensure fastpath for Eq exists, and returns correct answer - let not_equals = compare(lhs.as_ref(), rhs.as_ref(), Operator::NotEq) - .unwrap() - .to_bool(); - - assert_eq!(not_equals.dtype(), &DType::Bool(Nullability::Nullable)); - assert_arrays_eq!( - ¬_equals, - &BoolArray::from_iter([Some(true), None, Some(false), None, Some(true)]) - ); - - // Ensure null constants are handled correctly. - let null_rhs = - ConstantArray::new(Scalar::null(DType::Utf8(Nullability::Nullable)), lhs.len()); - let equals_null = compare(lhs.as_ref(), null_rhs.as_ref(), Operator::Eq).unwrap(); - assert_arrays_eq!( - &equals_null, - &BoolArray::from_iter([None::, None, None, None, None]) - ); - - let noteq_null = compare(lhs.as_ref(), null_rhs.as_ref(), Operator::NotEq).unwrap(); - assert_arrays_eq!( - ¬eq_null, - &BoolArray::from_iter([None::, None, None, None, None]) - ); - } -} diff --git a/encodings/fsst/src/compute/mod.rs b/encodings/fsst/src/compute/mod.rs index 02f6663722c..d657bffb807 100644 --- a/encodings/fsst/src/compute/mod.rs +++ b/encodings/fsst/src/compute/mod.rs @@ -2,7 +2,6 @@ // SPDX-FileCopyrightText: Copyright the Vortex contributors mod cast; -mod compare; mod filter; use vortex_array::Array; diff --git a/encodings/runend/src/compute/compare.rs b/encodings/runend/src/compute/compare.rs deleted file mode 100644 index 4161898af61..00000000000 --- a/encodings/runend/src/compute/compare.rs +++ /dev/null @@ -1,83 +0,0 @@ -// SPDX-License-Identifier: Apache-2.0 -// SPDX-FileCopyrightText: Copyright the Vortex contributors - -use vortex_array::Array; -use vortex_array::ArrayRef; -use vortex_array::IntoArray; -use vortex_array::ToCanonical; -use vortex_array::arrays::ConstantArray; -use vortex_array::compute::CompareKernel; -use vortex_array::compute::CompareKernelAdapter; -use vortex_array::compute::Operator; -use vortex_array::compute::compare; -use vortex_array::register_kernel; -use vortex_error::VortexResult; - -use crate::RunEndArray; -use crate::RunEndVTable; -use crate::compress::runend_decode_bools; - -impl CompareKernel for RunEndVTable { - fn compare( - &self, - lhs: &RunEndArray, - rhs: &dyn Array, - operator: Operator, - ) -> VortexResult> { - // If the RHS is constant, then we just need to compare against our encoded values. - if let Some(const_scalar) = rhs.as_constant() { - let values = compare( - lhs.values(), - ConstantArray::new(const_scalar, lhs.values().len()).as_ref(), - operator, - )?; - let decoded = runend_decode_bools( - lhs.ends().to_primitive(), - values.to_bool(), - lhs.offset(), - lhs.len(), - )?; - return Ok(Some(decoded.into_array())); - } - - // Otherwise, fall back - Ok(None) - } -} - -register_kernel!(CompareKernelAdapter(RunEndVTable).lift()); - -#[cfg(test)] -mod test { - use vortex_array::IntoArray; - use vortex_array::arrays::BoolArray; - use vortex_array::arrays::ConstantArray; - use vortex_array::arrays::PrimitiveArray; - use vortex_array::assert_arrays_eq; - use vortex_array::compute::Operator; - use vortex_array::compute::compare; - - use crate::RunEndArray; - - fn ree_array() -> RunEndArray { - RunEndArray::encode( - PrimitiveArray::from_iter([1, 1, 1, 4, 4, 4, 2, 2, 5, 5, 5, 5]).into_array(), - ) - .unwrap() - } - - #[test] - fn compare_run_end() { - let arr = ree_array(); - let res = compare( - arr.as_ref(), - ConstantArray::new(5, 12).as_ref(), - Operator::Eq, - ) - .unwrap(); - let expected = BoolArray::from_iter([ - false, false, false, false, false, false, false, false, true, true, true, true, - ]); - assert_arrays_eq!(res, expected); - } -} diff --git a/encodings/runend/src/compute/mod.rs b/encodings/runend/src/compute/mod.rs index 8b390bef750..9dfdfc82455 100644 --- a/encodings/runend/src/compute/mod.rs +++ b/encodings/runend/src/compute/mod.rs @@ -2,7 +2,6 @@ // SPDX-FileCopyrightText: Copyright the Vortex contributors mod cast; -mod compare; mod fill_null; pub(crate) mod filter; mod is_constant; diff --git a/encodings/sequence/src/compute/compare.rs b/encodings/sequence/src/compute/compare.rs index fc652216a15..67e64fe8e93 100644 --- a/encodings/sequence/src/compute/compare.rs +++ b/encodings/sequence/src/compute/compare.rs @@ -1,67 +1,10 @@ // SPDX-License-Identifier: Apache-2.0 // SPDX-FileCopyrightText: Copyright the Vortex contributors -use vortex_array::Array; -use vortex_array::ArrayRef; -use vortex_array::arrays::BoolArray; -use vortex_array::arrays::ConstantArray; -use vortex_array::compute::CompareKernel; -use vortex_array::compute::Operator; -use vortex_array::validity::Validity; -use vortex_buffer::BitBuffer; use vortex_dtype::NativePType; -use vortex_dtype::Nullability; use vortex_dtype::match_each_integer_ptype; use vortex_error::VortexExpect; -use vortex_error::VortexResult; use vortex_scalar::PValue; -use vortex_scalar::Scalar; - -use crate::SequenceArray; -use crate::array::SequenceVTable; - -impl CompareKernel for SequenceVTable { - fn compare( - &self, - lhs: &SequenceArray, - rhs: &dyn Array, - operator: Operator, - ) -> VortexResult> { - if operator != Operator::Eq { - return Ok(None); - }; - - let Some(constant) = rhs.as_constant() else { - return Ok(None); - }; - - // Check if there exists an integer solution to const = base + (0..len) * multiplier. - let set_idx = find_intersection_scalar( - lhs.base(), - lhs.multiplier(), - lhs.len(), - constant - .as_primitive() - .pvalue() - .vortex_expect("non-null constant"), - ); - - let nullability = lhs.dtype().nullability() | rhs.dtype().nullability(); - let validity = match nullability { - Nullability::NonNullable => Validity::NonNullable, - Nullability::Nullable => Validity::AllValid, - }; - - if let Some(set_idx) = set_idx { - let buffer = BitBuffer::from_iter((0..lhs.len()).map(|idx| idx == set_idx)); - Ok(Some(BoolArray::new(buffer, validity).to_array())) - } else { - Ok(Some( - ConstantArray::new(Scalar::bool(false, nullability), lhs.len()).to_array(), - )) - } - } -} /// Find the index where `base + idx * multiplier == intercept`, if one exists. /// diff --git a/vortex-array/src/arrays/chunked/compute/compare.rs b/vortex-array/src/arrays/chunked/compute/compare.rs deleted file mode 100644 index d005527e5cb..00000000000 --- a/vortex-array/src/arrays/chunked/compute/compare.rs +++ /dev/null @@ -1,65 +0,0 @@ -// SPDX-License-Identifier: Apache-2.0 -// SPDX-FileCopyrightText: Copyright the Vortex contributors - -use vortex_error::VortexResult; - -use crate::Array; -use crate::ArrayRef; -use crate::arrays::ChunkedArray; -use crate::arrays::ChunkedVTable; -use crate::builders::ArrayBuilder; -use crate::builders::BoolBuilder; -use crate::compute::CompareKernel; -use crate::compute::CompareKernelAdapter; -use crate::compute::Operator; -use crate::compute::compare; -use crate::register_kernel; - -impl CompareKernel for ChunkedVTable { - fn compare( - &self, - lhs: &ChunkedArray, - rhs: &dyn Array, - operator: Operator, - ) -> VortexResult> { - let mut idx = 0; - - let mut bool_builder = BoolBuilder::with_capacity( - // nullable <= non-nullable - (lhs.dtype().is_nullable() || rhs.dtype().is_nullable()).into(), - lhs.len(), - ); - - for chunk in lhs.non_empty_chunks() { - let sliced = rhs.slice(idx..idx + chunk.len())?; - let cmp_result = compare(chunk, &sliced, operator)?; - - bool_builder.extend_from_array(&cmp_result); - idx += chunk.len(); - } - - Ok(Some(bool_builder.finish())) - } -} - -register_kernel!(CompareKernelAdapter(ChunkedVTable).lift()); - -#[cfg(test)] -mod tests { - use vortex_buffer::Buffer; - - use super::*; - use crate::IntoArray; - - #[test] - fn empty_compare() { - let base = Buffer::::empty().into_array(); - let chunked = - ChunkedArray::try_new(vec![base.clone(), base.clone()], base.dtype().clone()).unwrap(); - let chunked_empty = ChunkedArray::try_new(vec![], base.dtype().clone()).unwrap(); - - let r = compare(chunked.as_ref(), chunked_empty.as_ref(), Operator::Eq).unwrap(); - - assert!(r.is_empty()); - } -} diff --git a/vortex-array/src/arrays/chunked/compute/mod.rs b/vortex-array/src/arrays/chunked/compute/mod.rs index 4212e0a2025..f4e954c88cc 100644 --- a/vortex-array/src/arrays/chunked/compute/mod.rs +++ b/vortex-array/src/arrays/chunked/compute/mod.rs @@ -2,7 +2,6 @@ // SPDX-FileCopyrightText: Copyright the Vortex contributors mod cast; -mod compare; mod fill_null; mod filter; mod is_constant; diff --git a/vortex-array/src/arrays/constant/compute/compare.rs b/vortex-array/src/arrays/constant/compute/compare.rs deleted file mode 100644 index 078fb27ef75..00000000000 --- a/vortex-array/src/arrays/constant/compute/compare.rs +++ /dev/null @@ -1,36 +0,0 @@ -// SPDX-License-Identifier: Apache-2.0 -// SPDX-FileCopyrightText: Copyright the Vortex contributors - -use vortex_error::VortexResult; - -use crate::Array; -use crate::ArrayRef; -use crate::IntoArray; -use crate::arrays::ConstantArray; -use crate::arrays::ConstantVTable; -use crate::compute::CompareKernel; -use crate::compute::CompareKernelAdapter; -use crate::compute::Operator; -use crate::compute::scalar_cmp; -use crate::register_kernel; - -impl CompareKernel for ConstantVTable { - fn compare( - &self, - lhs: &ConstantArray, - rhs: &dyn Array, - operator: Operator, - ) -> VortexResult> { - // We only support comparing a constant array to another constant array. - // For all other encodings, we assume the constant is on the RHS. - if let Some(const_scalar) = rhs.as_constant() { - let lhs_scalar = lhs.scalar(); - let scalar = scalar_cmp(lhs_scalar, &const_scalar, operator); - return Ok(Some(ConstantArray::new(scalar, lhs.len()).into_array())); - } - - Ok(None) - } -} - -register_kernel!(CompareKernelAdapter(ConstantVTable).lift()); diff --git a/vortex-array/src/arrays/constant/compute/mod.rs b/vortex-array/src/arrays/constant/compute/mod.rs index 96e21b0efd1..2384df41a88 100644 --- a/vortex-array/src/arrays/constant/compute/mod.rs +++ b/vortex-array/src/arrays/constant/compute/mod.rs @@ -2,7 +2,6 @@ // SPDX-FileCopyrightText: Copyright the Vortex contributors mod cast; -mod compare; mod fill_null; mod filter; mod mask; diff --git a/vortex-array/src/arrays/dict/compute/compare.rs b/vortex-array/src/arrays/dict/compute/compare.rs deleted file mode 100644 index ed5b3dd45f7..00000000000 --- a/vortex-array/src/arrays/dict/compute/compare.rs +++ /dev/null @@ -1,158 +0,0 @@ -// SPDX-License-Identifier: Apache-2.0 -// SPDX-FileCopyrightText: Copyright the Vortex contributors - -use vortex_error::VortexResult; - -use super::DictArray; -use super::DictVTable; -use crate::Array; -use crate::ArrayRef; -use crate::IntoArray; -use crate::arrays::ConstantArray; -use crate::compute::CompareKernel; -use crate::compute::CompareKernelAdapter; -use crate::compute::Operator; -use crate::compute::compare; -use crate::register_kernel; - -impl CompareKernel for DictVTable { - fn compare( - &self, - lhs: &DictArray, - rhs: &dyn Array, - operator: Operator, - ) -> VortexResult> { - // if we have more values than codes, it is faster to canonicalise first. - if lhs.values().len() > lhs.codes().len() { - return Ok(None); - } - - // If the RHS is constant, then we just need to compare against our encoded values. - if let Some(rhs) = rhs.as_constant() { - let compare_result = compare( - lhs.values(), - ConstantArray::new(rhs, lhs.values().len()).as_ref(), - operator, - )?; - - // SAFETY: values len preserved, codes all still point to valid values - let result = unsafe { - DictArray::new_unchecked(lhs.codes().clone(), compare_result) - .set_all_values_referenced(lhs.has_all_values_referenced()) - .into_array() - }; - - // We canonicalize the result because dictionary-encoded bools is dumb. - return Ok(Some(result.to_canonical()?.into_array())); - } - - // It's a little more complex, but we could perform a comparison against the dictionary - // values in the future. - Ok(None) - } -} - -register_kernel!(CompareKernelAdapter(DictVTable).lift()); -#[cfg(test)] -mod tests { - use vortex_buffer::buffer; - use vortex_dtype::Nullability; - use vortex_mask::Mask; - use vortex_scalar::Scalar; - - use crate::IntoArray; - use crate::arrays::BoolArray; - use crate::arrays::ConstantArray; - use crate::arrays::PrimitiveArray; - use crate::arrays::dict::DictArray; - use crate::assert_arrays_eq; - use crate::compute::Operator; - use crate::compute::compare; - use crate::validity::Validity; - - #[test] - fn test_compare_value() { - let dict = DictArray::try_new( - buffer![0u32, 1, 2].into_array(), - buffer![1i32, 2, 3].into_array(), - ) - .unwrap(); - - let res = compare( - dict.as_ref(), - ConstantArray::new(Scalar::from(1i32), 3).as_ref(), - Operator::Eq, - ) - .unwrap(); - assert_arrays_eq!(res, BoolArray::from_iter([true, false, false])); - } - - #[test] - fn test_compare_non_eq() { - let dict = DictArray::try_new( - buffer![0u32, 1, 2].into_array(), - buffer![1i32, 2, 3].into_array(), - ) - .unwrap(); - - let res = compare( - dict.as_ref(), - ConstantArray::new(Scalar::from(1i32), 3).as_ref(), - Operator::Gt, - ) - .unwrap(); - assert_arrays_eq!(res, BoolArray::from_iter([false, true, true])); - } - - #[test] - fn test_compare_nullable() { - let dict = DictArray::try_new( - PrimitiveArray::new( - buffer![0u32, 1, 2], - Validity::from_iter([false, true, false]), - ) - .into_array(), - PrimitiveArray::new(buffer![1i32, 2, 3], Validity::AllValid).into_array(), - ) - .unwrap(); - - let res = compare( - dict.as_ref(), - ConstantArray::new(Scalar::primitive(4i32, Nullability::Nullable), 3).as_ref(), - Operator::Eq, - ) - .unwrap(); - assert_arrays_eq!(res, BoolArray::from_iter([None, Some(false), None])); - assert_eq!(res.dtype().nullability(), Nullability::Nullable); - assert_eq!( - res.validity_mask().unwrap(), - Mask::from_iter([false, true, false]) - ); - } - - #[test] - fn test_compare_null_values() { - let dict = DictArray::try_new( - buffer![0u32, 1, 2].into_array(), - PrimitiveArray::new( - buffer![1i32, 2, 0], - Validity::from_iter([true, true, false]), - ) - .into_array(), - ) - .unwrap(); - - let res = compare( - dict.as_ref(), - ConstantArray::new(Scalar::primitive(4i32, Nullability::NonNullable), 3).as_ref(), - Operator::Eq, - ) - .unwrap(); - assert_arrays_eq!(res, BoolArray::from_iter([Some(false), Some(false), None])); - assert_eq!(res.dtype().nullability(), Nullability::Nullable); - assert_eq!( - res.validity_mask().unwrap(), - Mask::from_iter([true, true, false]) - ); - } -} diff --git a/vortex-array/src/arrays/dict/compute/mod.rs b/vortex-array/src/arrays/dict/compute/mod.rs index 056b151ec06..ac9742303cb 100644 --- a/vortex-array/src/arrays/dict/compute/mod.rs +++ b/vortex-array/src/arrays/dict/compute/mod.rs @@ -2,7 +2,6 @@ // SPDX-FileCopyrightText: Copyright the Vortex contributors mod cast; -mod compare; mod fill_null; mod is_constant; mod is_sorted; diff --git a/vortex-array/src/arrays/extension/compute/compare.rs b/vortex-array/src/arrays/extension/compute/compare.rs deleted file mode 100644 index fd3425937e0..00000000000 --- a/vortex-array/src/arrays/extension/compute/compare.rs +++ /dev/null @@ -1,45 +0,0 @@ -// SPDX-License-Identifier: Apache-2.0 -// SPDX-FileCopyrightText: Copyright the Vortex contributors - -use vortex_error::VortexResult; - -use crate::Array; -use crate::ArrayRef; -use crate::arrays::ConstantArray; -use crate::arrays::ExtensionArray; -use crate::arrays::ExtensionVTable; -use crate::compute::CompareKernel; -use crate::compute::CompareKernelAdapter; -use crate::compute::Operator; -use crate::compute::{self}; -use crate::register_kernel; - -impl CompareKernel for ExtensionVTable { - fn compare( - &self, - lhs: &ExtensionArray, - rhs: &dyn Array, - operator: Operator, - ) -> VortexResult> { - // If the RHS is a constant, we can extract the storage scalar. - if let Some(const_ext) = rhs.as_constant() { - let storage_scalar = const_ext.as_extension().to_storage_scalar(); - return compute::compare( - lhs.storage(), - ConstantArray::new(storage_scalar, lhs.len()).as_ref(), - operator, - ) - .map(Some); - } - - // If the RHS is an extension array matching ours, we can extract the storage. - if let Some(rhs_ext) = rhs.as_opt::() { - return compute::compare(lhs.storage(), rhs_ext.storage(), operator).map(Some); - } - - // Otherwise, we need the RHS to handle this comparison. - Ok(None) - } -} - -register_kernel!(CompareKernelAdapter(ExtensionVTable).lift()); diff --git a/vortex-array/src/arrays/extension/compute/mod.rs b/vortex-array/src/arrays/extension/compute/mod.rs index 71770541b56..90dba24e2fa 100644 --- a/vortex-array/src/arrays/extension/compute/mod.rs +++ b/vortex-array/src/arrays/extension/compute/mod.rs @@ -2,7 +2,6 @@ // SPDX-FileCopyrightText: Copyright the Vortex contributors mod cast; -mod compare; mod filter; mod is_constant; mod is_sorted; diff --git a/vortex-array/src/arrays/masked/compute/compare.rs b/vortex-array/src/arrays/masked/compute/compare.rs deleted file mode 100644 index 0a9f86aa245..00000000000 --- a/vortex-array/src/arrays/masked/compute/compare.rs +++ /dev/null @@ -1,143 +0,0 @@ -// SPDX-License-Identifier: Apache-2.0 -// SPDX-FileCopyrightText: Copyright the Vortex contributors - -use vortex_error::VortexResult; - -use crate::Array; -use crate::ArrayRef; -use crate::IntoArray; -use crate::arrays::BoolArray; -use crate::arrays::MaskedArray; -use crate::arrays::MaskedVTable; -use crate::canonical::ToCanonical; -use crate::compute::CompareKernel; -use crate::compute::CompareKernelAdapter; -use crate::compute::Operator; -use crate::compute::compare; -use crate::register_kernel; -use crate::vtable::ValidityHelper; - -impl CompareKernel for MaskedVTable { - fn compare( - &self, - lhs: &MaskedArray, - rhs: &dyn Array, - operator: Operator, - ) -> VortexResult> { - // Compare the child arrays - let compare_result = compare(&lhs.child, rhs, operator)?; - - // Get the boolean buffer from the comparison result - let bool_array = compare_result.to_bool(); - let combined_validity = bool_array.validity().clone().and(lhs.validity().clone()); - - // Return a plain BoolArray with the combined validity - Ok(Some( - BoolArray::new(bool_array.to_bit_buffer(), combined_validity).into_array(), - )) - } -} - -register_kernel!(CompareKernelAdapter(MaskedVTable).lift()); - -#[cfg(test)] -mod tests { - use vortex_dtype::Nullability; - use vortex_mask::Mask; - use vortex_scalar::Scalar; - - use crate::IntoArray; - use crate::arrays::BoolArray; - use crate::arrays::ConstantArray; - use crate::arrays::MaskedArray; - use crate::arrays::PrimitiveArray; - use crate::assert_arrays_eq; - use crate::compute::Operator; - use crate::compute::compare; - use crate::validity::Validity; - - #[test] - fn test_compare_value() { - let masked = MaskedArray::try_new( - PrimitiveArray::from_iter([1i32, 2, 3]).into_array(), - Validity::AllValid, - ) - .unwrap(); - - let res = compare( - masked.as_ref(), - ConstantArray::new(Scalar::from(2i32), 3).as_ref(), - Operator::Eq, - ) - .unwrap(); - assert_arrays_eq!( - res, - BoolArray::from_iter([Some(false), Some(true), Some(false)]) - ); - } - - #[test] - fn test_compare_non_eq() { - let masked = MaskedArray::try_new( - PrimitiveArray::from_iter([1i32, 2, 3]).into_array(), - Validity::AllValid, - ) - .unwrap(); - - let res = compare( - masked.as_ref(), - ConstantArray::new(Scalar::from(2i32), 3).as_ref(), - Operator::Gt, - ) - .unwrap(); - assert_arrays_eq!( - res, - BoolArray::from_iter([Some(false), Some(false), Some(true)]) - ); - } - - #[test] - fn test_compare_nullable() { - // MaskedArray with nulls - let masked = MaskedArray::try_new( - PrimitiveArray::from_iter([1i32, 2, 3]).into_array(), - Validity::from_iter([false, true, false]), - ) - .unwrap(); - - let res = compare( - masked.as_ref(), - ConstantArray::new(Scalar::primitive(2i32, Nullability::Nullable), 3).as_ref(), - Operator::Eq, - ) - .unwrap(); - assert_arrays_eq!(res, BoolArray::from_iter([None, Some(true), None])); - assert_eq!(res.dtype().nullability(), Nullability::Nullable); - assert_eq!( - res.validity_mask().unwrap(), - Mask::from_iter([false, true, false]) - ); - } - - #[test] - fn test_compare_with_null_rhs() { - // MaskedArray with some nulls - let masked = MaskedArray::try_new( - PrimitiveArray::from_iter([1i32, 2, 3]).into_array(), - Validity::from_iter([true, true, false]), - ) - .unwrap(); - - // RHS has a null value - let rhs = PrimitiveArray::from_option_iter([Some(1i32), None, Some(3)]); - - let res = compare(masked.as_ref(), rhs.as_ref(), Operator::Eq).unwrap(); - assert_arrays_eq!(res, BoolArray::from_iter([Some(true), None, None])); - assert_eq!(res.dtype().nullability(), Nullability::Nullable); - // Validity is union of both: lhs=[T,T,F], rhs=[T,F,T] => result=[T,F,F] - assert_eq!( - res.validity_mask().unwrap(), - Mask::from_iter([true, false, false]) - ); - } -} diff --git a/vortex-array/src/arrays/masked/compute/mod.rs b/vortex-array/src/arrays/masked/compute/mod.rs index 86840ca4e2a..404ced01ea3 100644 --- a/vortex-array/src/arrays/masked/compute/mod.rs +++ b/vortex-array/src/arrays/masked/compute/mod.rs @@ -1,7 +1,6 @@ // SPDX-License-Identifier: Apache-2.0 // SPDX-FileCopyrightText: Copyright the Vortex contributors -mod compare; mod filter; mod mask; pub(crate) mod rules; diff --git a/vortex-array/src/arrays/varbin/compute/compare.rs b/vortex-array/src/arrays/varbin/compute/compare.rs deleted file mode 100644 index 7dd9b9d48e1..00000000000 --- a/vortex-array/src/arrays/varbin/compute/compare.rs +++ /dev/null @@ -1,232 +0,0 @@ -// SPDX-License-Identifier: Apache-2.0 -// SPDX-FileCopyrightText: Copyright the Vortex contributors - -use arrow_array::BinaryArray; -use arrow_array::StringArray; -use arrow_ord::cmp; -use itertools::Itertools; -use vortex_buffer::BitBuffer; -use vortex_dtype::DType; -use vortex_dtype::IntegerPType; -use vortex_dtype::match_each_integer_ptype; -use vortex_error::VortexExpect as _; -use vortex_error::VortexResult; -use vortex_error::vortex_bail; -use vortex_error::vortex_err; - -use crate::Array; -use crate::ArrayRef; -use crate::IntoArray; -use crate::ToCanonical; -use crate::arrays::BoolArray; -use crate::arrays::PrimitiveArray; -use crate::arrays::VarBinArray; -use crate::arrays::VarBinVTable; -use crate::arrow::Datum; -use crate::arrow::from_arrow_array_with_len; -use crate::compute::CompareKernel; -use crate::compute::CompareKernelAdapter; -use crate::compute::Operator; -use crate::compute::compare; -use crate::compute::compare_lengths_to_empty; -use crate::register_kernel; -use crate::vtable::ValidityHelper; - -// This implementation exists so we can have custom translation of RHS to arrow that's not the same as IntoCanonical -impl CompareKernel for VarBinVTable { - fn compare( - &self, - lhs: &VarBinArray, - rhs: &dyn Array, - operator: Operator, - ) -> VortexResult> { - if let Some(rhs_const) = rhs.as_constant() { - let nullable = lhs.dtype().is_nullable() || rhs_const.dtype().is_nullable(); - let len = lhs.len(); - - let rhs_is_empty = match rhs_const.dtype() { - DType::Binary(_) => rhs_const - .as_binary() - .is_empty() - .vortex_expect("RHS should not be null"), - DType::Utf8(_) => rhs_const - .as_utf8() - .is_empty() - .vortex_expect("RHS should not be null"), - _ => vortex_bail!("VarBinArray can only have type of Binary or Utf8"), - }; - - if rhs_is_empty { - let buffer = match operator { - Operator::Gte => BitBuffer::new_set(len), // Every possible value is >= "" - Operator::Lt => BitBuffer::new_unset(len), // No value is < "" - Operator::Eq | Operator::NotEq | Operator::Gt | Operator::Lte => { - let lhs_offsets = lhs.offsets().to_primitive(); - match_each_integer_ptype!(lhs_offsets.ptype(), |P| { - compare_offsets_to_empty::

(lhs_offsets, operator) - }) - } - }; - - return Ok(Some( - BoolArray::new( - buffer, - lhs.validity() - .clone() - .union_nullability(rhs.dtype().nullability()), - ) - .into_array(), - )); - } - - let lhs = Datum::try_new(lhs.as_ref())?; - - // Use StringViewArray/BinaryViewArray to match the Utf8View/BinaryView types - // produced by Datum::try_new (which uses into_arrow_preferred()) - let arrow_rhs: &dyn arrow_array::Datum = match rhs_const.dtype() { - DType::Utf8(_) => &rhs_const - .as_utf8() - .value() - .map(StringArray::new_scalar) - .unwrap_or_else(|| arrow_array::Scalar::new(StringArray::new_null(1))), - DType::Binary(_) => &rhs_const - .as_binary() - .value() - .map(BinaryArray::new_scalar) - .unwrap_or_else(|| arrow_array::Scalar::new(BinaryArray::new_null(1))), - _ => vortex_bail!( - "VarBin array RHS can only be Utf8 or Binary, given {}", - rhs_const.dtype() - ), - }; - - let array = match operator { - Operator::Eq => cmp::eq(&lhs, arrow_rhs), - Operator::NotEq => cmp::neq(&lhs, arrow_rhs), - Operator::Gt => cmp::gt(&lhs, arrow_rhs), - Operator::Gte => cmp::gt_eq(&lhs, arrow_rhs), - Operator::Lt => cmp::lt(&lhs, arrow_rhs), - Operator::Lte => cmp::lt_eq(&lhs, arrow_rhs), - } - .map_err(|err| vortex_err!("Failed to compare VarBin array: {}", err))?; - - Ok(Some(from_arrow_array_with_len(&array, len, nullable)?)) - } else if !rhs.is::() { - // NOTE: If the rhs is not a VarBin array it will be canonicalized to a VarBinView - // Arrow doesn't support comparing VarBin to VarBinView arrays, so we convert ourselves - // to VarBinView and re-invoke. - return Ok(Some(compare(lhs.to_varbinview().as_ref(), rhs, operator)?)); - } else { - Ok(None) - } - } -} - -register_kernel!(CompareKernelAdapter(VarBinVTable).lift()); - -fn compare_offsets_to_empty( - offsets: PrimitiveArray, - operator: Operator, -) -> BitBuffer { - let lengths_iter = offsets - .as_slice::

() - .iter() - .tuple_windows() - .map(|(&s, &e)| e - s); - compare_lengths_to_empty(lengths_iter, operator) -} - -#[cfg(test)] -mod test { - use vortex_buffer::BitBuffer; - use vortex_buffer::ByteBuffer; - use vortex_dtype::DType; - use vortex_dtype::Nullability; - use vortex_scalar::Scalar; - - use crate::ToCanonical; - use crate::arrays::ConstantArray; - use crate::arrays::VarBinArray; - use crate::arrays::VarBinViewArray; - use crate::compute::Operator; - use crate::compute::compare; - - #[test] - fn test_binary_compare() { - let array = VarBinArray::from_iter( - [Some(b"abc".to_vec()), None, Some(b"def".to_vec())], - DType::Binary(Nullability::Nullable), - ); - let result = compare( - array.as_ref(), - ConstantArray::new( - Scalar::binary(ByteBuffer::copy_from(b"abc"), Nullability::Nullable), - 3, - ) - .as_ref(), - Operator::Eq, - ) - .unwrap() - .to_bool(); - - assert_eq!( - &result.validity_mask().unwrap().to_bit_buffer(), - &BitBuffer::from_iter([true, false, true]) - ); - assert_eq!( - result.to_bit_buffer(), - BitBuffer::from_iter([true, false, false]) - ); - } - - #[test] - fn varbinview_compare() { - let array = VarBinArray::from_iter( - [Some(b"abc".to_vec()), None, Some(b"def".to_vec())], - DType::Binary(Nullability::Nullable), - ); - let vbv = VarBinViewArray::from_iter( - [None, None, Some(b"def".to_vec())], - DType::Binary(Nullability::Nullable), - ); - let result = compare(array.as_ref(), vbv.as_ref(), Operator::Eq) - .unwrap() - .to_bool(); - - assert_eq!( - result.validity_mask().unwrap().to_bit_buffer(), - BitBuffer::from_iter([false, false, true]) - ); - assert_eq!( - result.to_bit_buffer(), - BitBuffer::from_iter([false, true, true]) - ); - } -} - -#[cfg(test)] -mod tests { - use vortex_dtype::DType; - use vortex_dtype::Nullability; - use vortex_scalar::Scalar; - - use crate::Array; - use crate::arrays::ConstantArray; - use crate::arrays::VarBinArray; - use crate::compute::Operator; - use crate::compute::compare; - - #[test] - fn test_null_compare() { - let arr = VarBinArray::from_iter([Some("h")], DType::Utf8(Nullability::NonNullable)); - - let const_ = ConstantArray::new(Scalar::utf8("", Nullability::Nullable), 1); - - assert_eq!( - compare(arr.as_ref(), const_.as_ref(), Operator::Eq) - .unwrap() - .dtype(), - &DType::Bool(Nullability::Nullable) - ); - } -} diff --git a/vortex-array/src/arrays/varbin/compute/mod.rs b/vortex-array/src/arrays/varbin/compute/mod.rs index 4af8bd61d13..6d2eb68044c 100644 --- a/vortex-array/src/arrays/varbin/compute/mod.rs +++ b/vortex-array/src/arrays/varbin/compute/mod.rs @@ -6,7 +6,6 @@ mod slice; pub(crate) use min_max::varbin_compute_min_max; mod cast; -mod compare; mod filter; mod is_constant; mod is_sorted; diff --git a/vortex-array/src/compute/compare.rs b/vortex-array/src/compute/compare.rs index 44c7282f7f2..66ba9bc1ed3 100644 --- a/vortex-array/src/compute/compare.rs +++ b/vortex-array/src/compute/compare.rs @@ -2,68 +2,39 @@ // SPDX-FileCopyrightText: Copyright the Vortex contributors use core::fmt; -use std::any::Any; use std::cmp::Ordering; use std::fmt::Display; use std::fmt::Formatter; -use std::sync::LazyLock; -use arcref::ArcRef; -use arrow_array::Array as ArrowArray; use arrow_array::BooleanArray; use arrow_buffer::NullBuffer; -use arrow_ord::cmp; use arrow_ord::ord::make_comparator; use arrow_schema::SortOptions; use vortex_buffer::BitBuffer; use vortex_dtype::DType; use vortex_dtype::IntegerPType; use vortex_dtype::Nullability; -use vortex_error::VortexError; -use vortex_error::VortexExpect; use vortex_error::VortexResult; -use vortex_error::vortex_bail; -use vortex_error::vortex_err; use vortex_scalar::Scalar; use crate::Array; use crate::ArrayRef; -use crate::Canonical; use crate::IntoArray; -use crate::arrays::ConstantArray; -use crate::arrays::ConstantVTable; -use crate::arrow::Datum; -use crate::arrow::IntoArrowArray; -use crate::arrow::from_arrow_array_with_len; -use crate::compute::ComputeFn; -use crate::compute::ComputeFnVTable; -use crate::compute::InvocationArgs; -use crate::compute::Kernel; -use crate::compute::Options; -use crate::compute::Output; -use crate::vtable::VTable; - -static COMPARE_FN: LazyLock = LazyLock::new(|| { - let compute = ComputeFn::new("compare".into(), ArcRef::new_ref(&Compare)); - for kernel in inventory::iter:: { - compute.register_kernel(kernel.0.clone()); - } - compute -}); - -pub(crate) fn warm_up_vtable() -> usize { - COMPARE_FN.kernels().len() -} +use crate::arrays::ScalarFnArray; +use crate::expr::Binary; +use crate::expr::ScalarFn; /// Compares two arrays and returns a new boolean array with the result of the comparison. -/// Or, returns None if comparison is not supported for these arrays. +/// +/// The returned array is lazy (a [`ScalarFnArray`]) and will be evaluated on demand. pub fn compare(left: &dyn Array, right: &dyn Array, operator: Operator) -> VortexResult { - COMPARE_FN - .invoke(&InvocationArgs { - inputs: &[left.into(), right.into()], - options: &operator, - })? - .unwrap_array() + let expr_op: crate::expr::operators::Operator = operator.into(); + Ok(ScalarFnArray::try_new( + ScalarFn::new(Binary, expr_op), + vec![left.to_array(), right.to_array()], + left.len(), + )? + .into_array()) } #[derive(Copy, Clone, Debug, Eq, PartialEq, PartialOrd, Hash)] @@ -121,183 +92,6 @@ impl Operator { } } -pub struct CompareKernelRef(ArcRef); -inventory::collect!(CompareKernelRef); - -pub trait CompareKernel: VTable { - fn compare( - &self, - lhs: &Self::Array, - rhs: &dyn Array, - operator: Operator, - ) -> VortexResult>; -} - -#[derive(Debug)] -pub struct CompareKernelAdapter(pub V); - -impl CompareKernelAdapter { - pub const fn lift(&'static self) -> CompareKernelRef { - CompareKernelRef(ArcRef::new_ref(self)) - } -} - -impl Kernel for CompareKernelAdapter { - fn invoke(&self, args: &InvocationArgs) -> VortexResult> { - let inputs = CompareArgs::try_from(args)?; - let Some(array) = inputs.lhs.as_opt::() else { - return Ok(None); - }; - Ok(V::compare(&self.0, array, inputs.rhs, inputs.operator)?.map(|array| array.into())) - } -} - -struct Compare; - -impl ComputeFnVTable for Compare { - fn invoke( - &self, - args: &InvocationArgs, - kernels: &[ArcRef], - ) -> VortexResult { - let CompareArgs { lhs, rhs, operator } = CompareArgs::try_from(args)?; - - let return_dtype = self.return_dtype(args)?; - - if lhs.is_empty() { - return Ok(Canonical::empty(&return_dtype).into_array().into()); - } - - let left_constant_null = lhs.as_constant().map(|l| l.is_null()).unwrap_or(false); - let right_constant_null = rhs.as_constant().map(|r| r.is_null()).unwrap_or(false); - if left_constant_null || right_constant_null { - return Ok(ConstantArray::new(Scalar::null(return_dtype), lhs.len()) - .into_array() - .into()); - } - - let right_is_constant = rhs.is::(); - - // Always try to put constants on the right-hand side so encodings can optimise themselves. - if lhs.is::() && !right_is_constant { - return Ok(compare(rhs, lhs, operator.swap())?.into()); - } - - // First try lhs op rhs, then invert and try again. - for kernel in kernels { - if let Some(output) = kernel.invoke(args)? { - return Ok(output); - } - } - - // Try inverting the operator and swapping the arguments - let inverted_args = InvocationArgs { - inputs: &[rhs.into(), lhs.into()], - options: &operator.swap(), - }; - for kernel in kernels { - if let Some(output) = kernel.invoke(&inverted_args)? { - return Ok(output); - } - } - - // Only log missing compare implementation if there's possibly better one than arrow, - // i.e. lhs isn't arrow or rhs isn't arrow or constant - if !(lhs.is_arrow() && (rhs.is_arrow() || right_is_constant)) { - tracing::debug!( - "No compare implementation found for LHS {}, RHS {}, and operator {} (or inverse)", - lhs.encoding_id(), - rhs.encoding_id(), - operator, - ); - } - - // Fallback to arrow on canonical types - Ok(arrow_compare(lhs, rhs, operator)?.into()) - } - - fn return_dtype(&self, args: &InvocationArgs) -> VortexResult { - let CompareArgs { lhs, rhs, .. } = CompareArgs::try_from(args)?; - - if !lhs.dtype().eq_ignore_nullability(rhs.dtype()) { - if lhs.dtype().is_float() && rhs.dtype().is_float() { - vortex_bail!( - "Cannot compare different floating-point types ({}, {}). Consider using cast.", - lhs.dtype(), - rhs.dtype(), - ); - } - if lhs.dtype().is_int() && rhs.dtype().is_int() { - vortex_bail!( - "Cannot compare different fixed-width types ({}, {}). Consider using cast.", - lhs.dtype(), - rhs.dtype() - ); - } - vortex_bail!( - "Cannot compare different DTypes {} and {}", - lhs.dtype(), - rhs.dtype() - ); - } - - Ok(DType::Bool( - lhs.dtype().nullability() | rhs.dtype().nullability(), - )) - } - - fn return_len(&self, args: &InvocationArgs) -> VortexResult { - let CompareArgs { lhs, rhs, .. } = CompareArgs::try_from(args)?; - if lhs.len() != rhs.len() { - vortex_bail!( - "Compare operations only support arrays of the same length, got {} and {}", - lhs.len(), - rhs.len() - ); - } - Ok(lhs.len()) - } - - fn is_elementwise(&self) -> bool { - true - } -} - -struct CompareArgs<'a> { - lhs: &'a dyn Array, - rhs: &'a dyn Array, - operator: Operator, -} - -impl Options for Operator { - fn as_any(&self) -> &dyn Any { - self - } -} - -impl<'a> TryFrom<&InvocationArgs<'a>> for CompareArgs<'a> { - type Error = VortexError; - - fn try_from(value: &InvocationArgs<'a>) -> Result { - if value.inputs.len() != 2 { - vortex_bail!("Expected 2 inputs, found {}", value.inputs.len()); - } - let lhs = value.inputs[0] - .array() - .ok_or_else(|| vortex_err!("Expected first input to be an array"))?; - let rhs = value.inputs[1] - .array() - .ok_or_else(|| vortex_err!("Expected second input to be an array"))?; - let operator = *value - .options - .as_any() - .downcast_ref::() - .vortex_expect("Expected options to be an operator"); - - Ok(CompareArgs { lhs, rhs, operator }) - } -} - /// Helper function to compare empty values with arrays that have external value length information /// like `VarBin`. pub fn compare_lengths_to_empty(lengths: I, op: Operator) -> BitBuffer @@ -347,48 +141,6 @@ pub(crate) fn compare_nested_arrow_arrays( Ok(BooleanArray::new(values, nulls)) } -/// Implementation of `CompareFn` using the Arrow crate. -fn arrow_compare( - left: &dyn Array, - right: &dyn Array, - operator: Operator, -) -> VortexResult { - assert_eq!(left.len(), right.len()); - - let nullable = left.dtype().is_nullable() || right.dtype().is_nullable(); - - // Arrow's vectorized comparison kernels (`cmp::eq`, etc.) are faster but don't support nested - // types. For nested types, we fall back to `make_comparator` which does element-wise - // comparison. - let array = if left.dtype().is_nested() || right.dtype().is_nested() { - let rhs = right.to_array().into_arrow_preferred()?; - let lhs = left.to_array().into_arrow(rhs.data_type())?; - - assert!( - lhs.data_type().equals_datatype(rhs.data_type()), - "lhs data_type: {}, rhs data_type: {}", - lhs.data_type(), - rhs.data_type() - ); - - compare_nested_arrow_arrays(lhs.as_ref(), rhs.as_ref(), operator)? - } else { - // Fast path: use vectorized kernels for primitive types. - let lhs = Datum::try_new(left)?; - let rhs = Datum::try_new_with_target_datatype(right, lhs.data_type())?; - - match operator { - Operator::Eq => cmp::eq(&lhs, &rhs)?, - Operator::NotEq => cmp::neq(&lhs, &rhs)?, - Operator::Gt => cmp::gt(&lhs, &rhs)?, - Operator::Gte => cmp::gt_eq(&lhs, &rhs)?, - Operator::Lt => cmp::lt(&lhs, &rhs)?, - Operator::Lte => cmp::lt_eq(&lhs, &rhs)?, - } - }; - from_arrow_array_with_len(&array, left.len(), nullable) -} - pub fn scalar_cmp(lhs: &Scalar, rhs: &Scalar, operator: Operator) -> Scalar { if lhs.is_null() | rhs.is_null() { Scalar::null(DType::Bool(Nullability::Nullable)) @@ -480,15 +232,10 @@ mod tests { let left = ConstantArray::new(Scalar::from(2u32), 10); let right = ConstantArray::new(Scalar::from(10u32), 10); - let compare = compare(left.as_ref(), right.as_ref(), Operator::Gt).unwrap(); - let res = compare.as_constant().unwrap(); - assert_eq!(res.as_bool().value(), Some(false)); - assert_eq!(compare.len(), 10); - - let compare = arrow_compare(&left.into_array(), &right.into_array(), Operator::Gt).unwrap(); - let res = compare.as_constant().unwrap(); - assert_eq!(res.as_bool().value(), Some(false)); - assert_eq!(compare.len(), 10); + let result = compare(left.as_ref(), right.as_ref(), Operator::Gt).unwrap(); + assert_eq!(result.len(), 10); + let scalar = result.scalar_at(0).unwrap(); + assert_eq!(scalar.as_bool().value(), Some(false)); } #[rstest] diff --git a/vortex-array/src/compute/mod.rs b/vortex-array/src/compute/mod.rs index 8d603de9a24..ff594b75926 100644 --- a/vortex-array/src/compute/mod.rs +++ b/vortex-array/src/compute/mod.rs @@ -96,7 +96,6 @@ pub struct ComputeFn { pub fn warm_up_vtables() { #[allow(unused_qualifications)] between::warm_up_vtable(); - compare::warm_up_vtable(); is_constant::warm_up_vtable(); is_sorted::warm_up_vtable(); like::warm_up_vtable(); diff --git a/vortex-array/src/expr/exprs/binary/compare.rs b/vortex-array/src/expr/exprs/binary/compare.rs new file mode 100644 index 00000000000..1fcb7d9bb88 --- /dev/null +++ b/vortex-array/src/expr/exprs/binary/compare.rs @@ -0,0 +1,98 @@ +// SPDX-License-Identifier: Apache-2.0 +// SPDX-FileCopyrightText: Copyright the Vortex contributors + +use arrow_array::BooleanArray; +use arrow_ord::cmp; +use vortex_error::VortexResult; +use vortex_scalar::Scalar; + +use crate::Array; +use crate::ArrayRef; +use crate::Canonical; +use crate::IntoArray; +use crate::arrays::ConstantArray; +use crate::arrays::ConstantVTable; +use crate::arrow::Datum; +use crate::arrow::IntoArrowArray; +use crate::arrow::from_arrow_array_with_len; +use crate::compute::Operator; +use crate::compute::compare_nested_arrow_arrays; +use crate::compute::scalar_cmp; + +/// Execute a compare operation between two arrays. +/// +/// This is the entry point for compare operations from the binary expression. +/// Handles empty, constant-null, and constant-constant directly, otherwise falls back to Arrow. +pub(crate) fn execute_compare( + lhs: &dyn Array, + rhs: &dyn Array, + op: Operator, +) -> VortexResult { + let nullable = lhs.dtype().is_nullable() || rhs.dtype().is_nullable(); + + if lhs.is_empty() { + return Ok(Canonical::empty(&vortex_dtype::DType::Bool(nullable.into())).into_array()); + } + + let left_constant_null = lhs.as_constant().map(|l| l.is_null()).unwrap_or(false); + let right_constant_null = rhs.as_constant().map(|r| r.is_null()).unwrap_or(false); + if left_constant_null || right_constant_null { + return Ok(ConstantArray::new( + Scalar::null(vortex_dtype::DType::Bool(nullable.into())), + lhs.len(), + ) + .into_array()); + } + + // Constant-constant fast path + if let (Some(lhs_const), Some(rhs_const)) = ( + lhs.as_opt::(), + rhs.as_opt::(), + ) { + let result = scalar_cmp(lhs_const.scalar(), rhs_const.scalar(), op); + return Ok(ConstantArray::new(result, lhs.len()).into_array()); + } + + arrow_compare_arrays(lhs, rhs, op) +} + +/// Fall back to Arrow for comparison. +fn arrow_compare_arrays( + left: &dyn Array, + right: &dyn Array, + operator: Operator, +) -> VortexResult { + assert_eq!(left.len(), right.len()); + + let nullable = left.dtype().is_nullable() || right.dtype().is_nullable(); + + // Arrow's vectorized comparison kernels don't support nested types. + // For nested types, fall back to `make_comparator` which does element-wise comparison. + let array: BooleanArray = if left.dtype().is_nested() || right.dtype().is_nested() { + let rhs = right.to_array().into_arrow_preferred()?; + let lhs = left.to_array().into_arrow(rhs.data_type())?; + + assert!( + lhs.data_type().equals_datatype(rhs.data_type()), + "lhs data_type: {}, rhs data_type: {}", + lhs.data_type(), + rhs.data_type() + ); + + compare_nested_arrow_arrays(lhs.as_ref(), rhs.as_ref(), operator)? + } else { + // Fast path: use vectorized kernels for primitive types. + let lhs = Datum::try_new(left)?; + let rhs = Datum::try_new_with_target_datatype(right, lhs.data_type())?; + + match operator { + Operator::Eq => cmp::eq(&lhs, &rhs)?, + Operator::NotEq => cmp::neq(&lhs, &rhs)?, + Operator::Gt => cmp::gt(&lhs, &rhs)?, + Operator::Gte => cmp::gt_eq(&lhs, &rhs)?, + Operator::Lt => cmp::lt(&lhs, &rhs)?, + Operator::Lte => cmp::lt_eq(&lhs, &rhs)?, + } + }; + from_arrow_array_with_len(&array, left.len(), nullable) +} diff --git a/vortex-array/src/expr/exprs/binary/mod.rs b/vortex-array/src/expr/exprs/binary/mod.rs index c66eb1a3548..e01231b9ce1 100644 --- a/vortex-array/src/expr/exprs/binary/mod.rs +++ b/vortex-array/src/expr/exprs/binary/mod.rs @@ -14,7 +14,6 @@ use vortex_session::VortexSession; use crate::ArrayRef; use crate::compute; use crate::compute::BooleanOperator; -use crate::compute::compare; use crate::expr::Arity; use crate::expr::ChildName; use crate::expr::ExecutionArgs; @@ -29,6 +28,8 @@ use crate::expr::stats::Stat; mod boolean; pub(crate) use boolean::*; +mod compare; +pub(crate) use compare::*; mod numeric; pub(crate) use numeric::*; @@ -99,6 +100,28 @@ impl VTable for Binary { ); } + if operator.is_comparison() + && !lhs.eq_ignore_nullability(rhs) + && !lhs.is_extension() + && !rhs.is_extension() + { + if lhs.is_float() && rhs.is_float() { + vortex_bail!( + "Cannot compare different floating-point types ({}, {}). Consider using cast.", + lhs, + rhs, + ); + } + if lhs.is_int() && rhs.is_int() { + vortex_bail!( + "Cannot compare different fixed-width types ({}, {}). Consider using cast.", + lhs, + rhs, + ); + } + vortex_bail!("Cannot compare different DTypes {} and {}", lhs, rhs); + } + Ok(DType::Bool((lhs.is_nullable() || rhs.is_nullable()).into())) } @@ -108,12 +131,12 @@ impl VTable for Binary { }; match op { - Operator::Eq => compare(lhs, rhs, compute::Operator::Eq), - Operator::NotEq => compare(lhs, rhs, compute::Operator::NotEq), - Operator::Lt => compare(lhs, rhs, compute::Operator::Lt), - Operator::Lte => compare(lhs, rhs, compute::Operator::Lte), - Operator::Gt => compare(lhs, rhs, compute::Operator::Gt), - Operator::Gte => compare(lhs, rhs, compute::Operator::Gte), + Operator::Eq => execute_compare(lhs, rhs, compute::Operator::Eq), + Operator::NotEq => execute_compare(lhs, rhs, compute::Operator::NotEq), + Operator::Lt => execute_compare(lhs, rhs, compute::Operator::Lt), + Operator::Lte => execute_compare(lhs, rhs, compute::Operator::Lte), + Operator::Gt => execute_compare(lhs, rhs, compute::Operator::Gt), + Operator::Gte => execute_compare(lhs, rhs, compute::Operator::Gte), Operator::And => execute_boolean(lhs, rhs, BooleanOperator::AndKleene), Operator::Or => execute_boolean(lhs, rhs, BooleanOperator::OrKleene), Operator::Add => execute_numeric(lhs, rhs, vortex_scalar::NumericOperator::Add), @@ -554,6 +577,7 @@ mod tests { use super::*; use crate::assert_arrays_eq; + use crate::compute::compare; use crate::expr::Expression; use crate::expr::exprs::get_item::col; use crate::expr::exprs::literal::lit; diff --git a/vortex-array/src/expr/exprs/operators.rs b/vortex-array/src/expr/exprs/operators.rs index c2084c08f6b..5121c506d2c 100644 --- a/vortex-array/src/expr/exprs/operators.rs +++ b/vortex-array/src/expr/exprs/operators.rs @@ -185,6 +185,13 @@ impl Operator { pub fn is_arithmetic(&self) -> bool { matches!(self, Self::Add | Self::Sub | Self::Mul | Self::Div) } + + pub fn is_comparison(&self) -> bool { + matches!( + self, + Self::Eq | Self::NotEq | Self::Gt | Self::Gte | Self::Lt | Self::Lte + ) + } } impl From for Operator { diff --git a/vortex-test/e2e/src/lib.rs b/vortex-test/e2e/src/lib.rs index 2e8d3c77be0..765d4de32bd 100644 --- a/vortex-test/e2e/src/lib.rs +++ b/vortex-test/e2e/src/lib.rs @@ -28,7 +28,7 @@ mod tests { #[cfg(feature = "unstable_encodings")] const EXPECTED_SIZE: usize = 216188; #[cfg(not(feature = "unstable_encodings"))] - const EXPECTED_SIZE: usize = 216156; + const EXPECTED_SIZE: usize = 216188; let futures: Vec<_> = (0..5) .map(|_| { let array = array.clone(); From 01543ff995caf99e21dadecbef06e2108694f52e Mon Sep 17 00:00:00 2001 From: Joe Isaacs Date: Mon, 16 Feb 2026 10:43:53 +0000 Subject: [PATCH 02/20] wip Signed-off-by: Joe Isaacs --- encodings/alp/src/alp/compute/compare.rs | 357 ++++++++++++++++++ encodings/alp/src/alp/compute/mod.rs | 1 + encodings/alp/src/alp/rules.rs | 2 + .../datetime-parts/src/compute/compare.rs | 322 ++++++++++++++++ .../datetime-parts/src/compute/kernel.rs | 9 +- encodings/datetime-parts/src/compute/mod.rs | 1 + .../src/decimal_byte_parts/compute/compare.rs | 262 +++++++++++++ .../src/decimal_byte_parts/compute/kernel.rs | 9 +- .../src/decimal_byte_parts/compute/mod.rs | 1 + .../fastlanes/src/for/compute/compare.rs | 215 +++++++++++ encodings/fastlanes/src/for/compute/mod.rs | 1 + encodings/fastlanes/src/for/vtable/kernels.rs | 7 +- encodings/fsst/src/compute/compare.rs | 197 ++++++++++ encodings/fsst/src/compute/mod.rs | 1 + encodings/fsst/src/kernel.rs | 2 + encodings/runend/src/compute/compare.rs | 80 ++++ encodings/runend/src/compute/mod.rs | 1 + encodings/runend/src/kernel.rs | 2 + .../src/arrays/dict/compute/compare.rs | 52 +++ vortex-array/src/arrays/dict/compute/mod.rs | 1 + vortex-array/src/arrays/dict/vtable/kernel.rs | 2 + .../src/arrays/extension/compute/compare.rs | 42 +++ .../src/arrays/extension/compute/mod.rs | 1 + .../src/arrays/extension/vtable/kernel.rs | 7 +- .../src/arrays/varbin/compute/compare.rs | 238 ++++++++++++ vortex-array/src/arrays/varbin/compute/mod.rs | 1 + .../src/arrays/varbin/vtable/kernel.rs | 2 + vortex-array/src/expr/exprs/binary/kernel.rs | 71 ++++ vortex-array/src/expr/exprs/binary/mod.rs | 3 + 29 files changed, 1878 insertions(+), 12 deletions(-) create mode 100644 encodings/alp/src/alp/compute/compare.rs create mode 100644 encodings/datetime-parts/src/compute/compare.rs create mode 100644 encodings/decimal-byte-parts/src/decimal_byte_parts/compute/compare.rs create mode 100644 encodings/fastlanes/src/for/compute/compare.rs create mode 100644 encodings/fsst/src/compute/compare.rs create mode 100644 encodings/runend/src/compute/compare.rs create mode 100644 vortex-array/src/arrays/dict/compute/compare.rs create mode 100644 vortex-array/src/arrays/extension/compute/compare.rs create mode 100644 vortex-array/src/arrays/varbin/compute/compare.rs create mode 100644 vortex-array/src/expr/exprs/binary/kernel.rs diff --git a/encodings/alp/src/alp/compute/compare.rs b/encodings/alp/src/alp/compute/compare.rs new file mode 100644 index 00000000000..13c0eba4d03 --- /dev/null +++ b/encodings/alp/src/alp/compute/compare.rs @@ -0,0 +1,357 @@ +// SPDX-License-Identifier: Apache-2.0 +// SPDX-FileCopyrightText: Copyright the Vortex contributors + +use std::fmt::Debug; + +use vortex_array::Array; +use vortex_array::ArrayRef; +use vortex_array::ExecutionCtx; +use vortex_array::IntoArray; +use vortex_array::arrays::ConstantArray; +use vortex_array::compute::Operator; +use vortex_array::compute::compare; +use vortex_array::expr::CompareKernel; +use vortex_dtype::NativePType; +use vortex_error::VortexResult; +use vortex_error::vortex_bail; +use vortex_error::vortex_err; +use vortex_scalar::Scalar; + +use crate::ALPArray; +use crate::ALPFloat; +use crate::ALPVTable; +use crate::match_each_alp_float_ptype; + +// TODO(joe): add fuzzing. + +impl CompareKernel for ALPVTable { + fn compare( + array: &ALPArray, + other: &dyn Array, + operator: Operator, + _ctx: &mut ExecutionCtx, + ) -> VortexResult> { + if array.patches().is_some() { + // TODO(joe): support patches + return Ok(None); + } + if array.dtype().is_nullable() || other.dtype().is_nullable() { + // TODO(joe): support nullability + return Ok(None); + } + + if let Some(const_scalar) = other.as_constant() { + let pscalar = const_scalar.as_primitive_opt().ok_or_else(|| { + vortex_err!( + "ALP Compare RHS had the wrong type {}, expected {}", + const_scalar, + const_scalar.dtype() + ) + })?; + + match_each_alp_float_ptype!(pscalar.ptype(), |T| { + match pscalar.typed_value::() { + Some(value) => return alp_scalar_compare(array, value, operator), + None => vortex_bail!( + "Failed to convert scalar {:?} to ALP type {:?}", + pscalar, + pscalar.ptype() + ), + } + }); + } + + Ok(None) + } +} + +/// We can compare a scalar to an ALPArray by encoding the scalar into the ALP domain and comparing +/// the encoded value to the encoded values in the ALPArray. There are fixups when the value doesn't +/// encode into the ALP domain. +fn alp_scalar_compare>( + alp: &ALPArray, + value: F, + operator: Operator, +) -> VortexResult> +where + F::ALPInt: Into, + ::ALPInt: Debug, +{ + // TODO(joe): support patches, this is checked above. + if alp.patches().is_some() { + return Ok(None); + } + + let exponents = alp.exponents(); + // If the scalar doesn't fit into the ALP domain, + // it cannot be equal to any values in the encoded array. + let encoded = F::encode_single(value, alp.exponents()); + match encoded { + Some(encoded) => { + let s = ConstantArray::new(encoded, alp.len()); + Ok(Some(compare(alp.encoded(), s.as_ref(), operator)?)) + } + None => match operator { + // Since this value is not encodable it cannot be equal to any value in the encoded + // array. + Operator::Eq => Ok(Some(ConstantArray::new(false, alp.len()).into_array())), + // Since this value is not encodable it cannot be equal to any value in the encoded + // array, hence != to all values in the encoded array. + Operator::NotEq => Ok(Some(ConstantArray::new(true, alp.len()).into_array())), + Operator::Gt | Operator::Gte => { + // Per IEEE 754 totalOrder semantics the ordering is -Nan < -Inf < Inf < Nan. + // All values in the encoded array are definitely finite + let is_not_finite = NativePType::is_infinite(value) || NativePType::is_nan(value); + if is_not_finite { + Ok(Some( + ConstantArray::new(value.is_sign_negative(), alp.len()).into_array(), + )) + } else { + Ok(Some(compare( + alp.encoded(), + ConstantArray::new(F::encode_above(value, exponents), alp.len()).as_ref(), + // Since the encoded value is unencodable gte is equivalent to gt. + // Consider a value v, between two encodable values v_l (just less) and + // v_a (just above), then for all encodable values (u), v > u <=> v_g >= u + Operator::Gte, + )?)) + } + } + Operator::Lt | Operator::Lte => { + // Per IEEE 754 totalOrder semantics the ordering is -Nan < -Inf < Inf < Nan. + // All values in the encoded array are definitely finite + let is_not_finite = NativePType::is_infinite(value) || NativePType::is_nan(value); + if is_not_finite { + Ok(Some( + ConstantArray::new(value.is_sign_positive(), alp.len()).into_array(), + )) + } else { + Ok(Some(compare( + alp.encoded(), + ConstantArray::new(F::encode_below(value, exponents), alp.len()).as_ref(), + // Since the encoded values unencodable lt is equivalent to lte. + // See Gt | Gte for further explanation. + Operator::Lte, + )?)) + } + } + }, + } +} + +#[cfg(test)] +mod tests { + use rstest::rstest; + use vortex_array::ArrayRef; + use vortex_array::ToCanonical; + use vortex_array::arrays::BoolArray; + use vortex_array::arrays::ConstantArray; + use vortex_array::arrays::PrimitiveArray; + use vortex_array::assert_arrays_eq; + use vortex_array::compute::Operator; + use vortex_array::compute::compare; + use vortex_dtype::DType; + use vortex_dtype::Nullability; + use vortex_dtype::PType; + use vortex_scalar::Scalar; + + use super::*; + use crate::alp_encode; + + fn test_alp_compare>( + alp: &ALPArray, + value: F, + operator: Operator, + ) -> Option + where + F::ALPInt: Into, + ::ALPInt: Debug, + { + alp_scalar_compare(alp, value, operator).unwrap() + } + + #[test] + fn basic_comparison_test() { + let array = PrimitiveArray::from_iter([1.234f32; 1025]); + let encoded = alp_encode(&array, None).unwrap(); + assert!(encoded.patches().is_none()); + assert_eq!( + encoded.encoded().to_primitive().as_slice::(), + vec![1234; 1025] + ); + + let r = alp_scalar_compare(&encoded, 1.3_f32, Operator::Eq) + .unwrap() + .unwrap(); + let expected = BoolArray::from_iter([false; 1025]); + assert_arrays_eq!(r, expected); + + let r = alp_scalar_compare(&encoded, 1.234f32, Operator::Eq) + .unwrap() + .unwrap(); + let expected = BoolArray::from_iter([true; 1025]); + assert_arrays_eq!(r, expected); + } + + #[test] + fn comparison_with_unencodable_value() { + let array = PrimitiveArray::from_iter([1.234f32; 1025]); + let encoded = alp_encode(&array, None).unwrap(); + assert!(encoded.patches().is_none()); + assert_eq!( + encoded.encoded().to_primitive().as_slice::(), + vec![1234; 1025] + ); + + #[allow(clippy::excessive_precision)] + let r_eq = alp_scalar_compare(&encoded, 1.234444_f32, Operator::Eq) + .unwrap() + .unwrap(); + let expected = BoolArray::from_iter([false; 1025]); + assert_arrays_eq!(r_eq, expected); + + #[allow(clippy::excessive_precision)] + let r_neq = alp_scalar_compare(&encoded, 1.234444f32, Operator::NotEq) + .unwrap() + .unwrap(); + let expected = BoolArray::from_iter([true; 1025]); + assert_arrays_eq!(r_neq, expected); + } + + #[test] + fn comparison_range() { + let array = PrimitiveArray::from_iter([0.0605_f32; 10]); + let encoded = alp_encode(&array, None).unwrap(); + assert!(encoded.patches().is_none()); + assert_eq!( + encoded.encoded().to_primitive().as_slice::(), + vec![605; 10] + ); + + // !(0.0605_f32 >= 0.06051_f32); + let r_gte = alp_scalar_compare(&encoded, 0.06051_f32, Operator::Gte) + .unwrap() + .unwrap(); + let expected = BoolArray::from_iter([false; 10]); + assert_arrays_eq!(r_gte, expected); + + // (0.0605_f32 > 0.06051_f32); + let r_gt = alp_scalar_compare(&encoded, 0.06051_f32, Operator::Gt) + .unwrap() + .unwrap(); + let expected = BoolArray::from_iter([false; 10]); + assert_arrays_eq!(r_gt, expected); + + // 0.0605_f32 <= 0.06051_f32; + let r_lte = alp_scalar_compare(&encoded, 0.06051_f32, Operator::Lte) + .unwrap() + .unwrap(); + let expected = BoolArray::from_iter([true; 10]); + assert_arrays_eq!(r_lte, expected); + + //0.0605_f32 < 0.06051_f32; + let r_lt = alp_scalar_compare(&encoded, 0.06051_f32, Operator::Lt) + .unwrap() + .unwrap(); + let expected = BoolArray::from_iter([true; 10]); + assert_arrays_eq!(r_lt, expected); + } + + #[test] + fn comparison_zeroes() { + let array = PrimitiveArray::from_iter([0.0_f32; 10]); + let encoded = alp_encode(&array, None).unwrap(); + assert!(encoded.patches().is_none()); + assert_eq!( + encoded.encoded().to_primitive().as_slice::(), + vec![0; 10] + ); + + let r_gte = test_alp_compare(&encoded, -0.00000001_f32, Operator::Gte).unwrap(); + let expected = BoolArray::from_iter([true; 10]); + assert_arrays_eq!(r_gte, expected); + + let r_gte = test_alp_compare(&encoded, -0.0_f32, Operator::Gte).unwrap(); + let expected = BoolArray::from_iter([true; 10]); + assert_arrays_eq!(r_gte, expected); + + let r_gt = test_alp_compare(&encoded, -0.0000000001f32, Operator::Gt).unwrap(); + let expected = BoolArray::from_iter([true; 10]); + assert_arrays_eq!(r_gt, expected); + + let r_gte = test_alp_compare(&encoded, -0.0_f32, Operator::Gt).unwrap(); + let expected = BoolArray::from_iter([true; 10]); + assert_arrays_eq!(r_gte, expected); + + let r_lte = test_alp_compare(&encoded, 0.06051_f32, Operator::Lte).unwrap(); + let expected = BoolArray::from_iter([true; 10]); + assert_arrays_eq!(r_lte, expected); + + let r_lt = test_alp_compare(&encoded, 0.06051_f32, Operator::Lt).unwrap(); + let expected = BoolArray::from_iter([true; 10]); + assert_arrays_eq!(r_lt, expected); + + let r_lt = test_alp_compare(&encoded, -0.00001_f32, Operator::Lt).unwrap(); + let expected = BoolArray::from_iter([false; 10]); + assert_arrays_eq!(r_lt, expected); + } + + #[test] + fn compare_with_patches() { + let array = + PrimitiveArray::from_iter([1.234f32, 1.5, 19.0, std::f32::consts::E, 1_000_000.9]); + let encoded = alp_encode(&array, None).unwrap(); + assert!(encoded.patches().is_some()); + + // Not supported! + assert!( + alp_scalar_compare(&encoded, 1_000_000.9_f32, Operator::Eq) + .unwrap() + .is_none() + ) + } + + #[test] + fn compare_to_null() { + let array = PrimitiveArray::from_iter([1.234f32; 10]); + let encoded = alp_encode(&array, None).unwrap(); + + let other = ConstantArray::new( + Scalar::null(DType::Primitive(PType::F32, Nullability::Nullable)), + array.len(), + ); + + let r = compare(encoded.as_ref(), other.as_ref(), Operator::Eq).unwrap(); + // Comparing to null yields null results + let expected = BoolArray::from_iter([None::; 10]); + assert_arrays_eq!(r, expected); + } + + #[rstest] + #[case(f32::NAN, false)] + #[case(-1.0f32 / 0.0f32, true)] + #[case(f32::INFINITY, false)] + #[case(f32::NEG_INFINITY, true)] + fn compare_to_non_finite_gt(#[case] value: f32, #[case] result: bool) { + let array = PrimitiveArray::from_iter([1.234f32; 10]); + let encoded = alp_encode(&array, None).unwrap(); + + let r = test_alp_compare(&encoded, value, Operator::Gt).unwrap(); + let expected = BoolArray::from_iter([result; 10]); + assert_arrays_eq!(r, expected); + } + + #[rstest] + #[case(f32::NAN, true)] + #[case(-1.0f32 / 0.0f32, false)] + #[case(f32::INFINITY, true)] + #[case(f32::NEG_INFINITY, false)] + fn compare_to_non_finite_lt(#[case] value: f32, #[case] result: bool) { + let array = PrimitiveArray::from_iter([1.234f32; 10]); + let encoded = alp_encode(&array, None).unwrap(); + + let r = test_alp_compare(&encoded, value, Operator::Lt).unwrap(); + let expected = BoolArray::from_iter([result; 10]); + assert_arrays_eq!(r, expected); + } +} diff --git a/encodings/alp/src/alp/compute/mod.rs b/encodings/alp/src/alp/compute/mod.rs index a3f1de38efc..af435684496 100644 --- a/encodings/alp/src/alp/compute/mod.rs +++ b/encodings/alp/src/alp/compute/mod.rs @@ -3,6 +3,7 @@ mod between; mod cast; +mod compare; mod filter; mod mask; mod nan_count; diff --git a/encodings/alp/src/alp/rules.rs b/encodings/alp/src/alp/rules.rs index f90739c1733..57d177f99f8 100644 --- a/encodings/alp/src/alp/rules.rs +++ b/encodings/alp/src/alp/rules.rs @@ -5,12 +5,14 @@ use vortex_array::arrays::FilterExecuteAdaptor; use vortex_array::arrays::SliceExecuteAdaptor; use vortex_array::arrays::TakeExecuteAdaptor; use vortex_array::compute::CastReduceAdaptor; +use vortex_array::expr::CompareExecuteAdaptor; use vortex_array::kernel::ParentKernelSet; use vortex_array::optimizer::rules::ParentRuleSet; use crate::ALPVTable; pub(super) const PARENT_KERNELS: ParentKernelSet = ParentKernelSet::new(&[ + ParentKernelSet::lift(&CompareExecuteAdaptor(ALPVTable)), ParentKernelSet::lift(&FilterExecuteAdaptor(ALPVTable)), ParentKernelSet::lift(&SliceExecuteAdaptor(ALPVTable)), ParentKernelSet::lift(&TakeExecuteAdaptor(ALPVTable)), diff --git a/encodings/datetime-parts/src/compute/compare.rs b/encodings/datetime-parts/src/compute/compare.rs new file mode 100644 index 00000000000..b1462fc897b --- /dev/null +++ b/encodings/datetime-parts/src/compute/compare.rs @@ -0,0 +1,322 @@ +// SPDX-License-Identifier: Apache-2.0 +// SPDX-FileCopyrightText: Copyright the Vortex contributors + +use vortex_array::Array; +use vortex_array::ArrayRef; +use vortex_array::ExecutionCtx; +use vortex_array::IntoArray; +use vortex_array::arrays::ConstantArray; +use vortex_array::builtins::ArrayBuiltins; +use vortex_array::compute::Operator; +use vortex_array::compute::and_kleene; +use vortex_array::compute::compare; +use vortex_array::compute::or_kleene; +use vortex_array::expr::CompareKernel; +use vortex_dtype::DType; +use vortex_dtype::Nullability; +use vortex_dtype::datetime::Timestamp; +use vortex_error::VortexResult; +use vortex_scalar::Scalar; + +use crate::array::DateTimePartsArray; +use crate::array::DateTimePartsVTable; +use crate::timestamp; + +impl CompareKernel for DateTimePartsVTable { + fn compare( + array: &DateTimePartsArray, + other: &dyn Array, + operator: Operator, + _ctx: &mut ExecutionCtx, + ) -> VortexResult> { + let Some(rhs_const) = other.as_constant() else { + return Ok(None); + }; + let Some(timestamp) = rhs_const + .as_extension() + .to_storage_scalar() + .as_primitive() + .as_::() + else { + return Ok(None); + }; + + let DType::Extension(ext_dtype) = rhs_const.dtype() else { + return Ok(None); + }; + + let nullability = array.dtype().nullability() | other.dtype().nullability(); + + let Some(options) = ext_dtype.metadata_opt::() else { + return Ok(None); + }; + let ts_parts = timestamp::split(timestamp, options.unit)?; + + match operator { + Operator::Eq => compare_eq(array, &ts_parts, nullability), + Operator::NotEq => compare_ne(array, &ts_parts, nullability), + // lt and lte have identical behavior, as we optimize + // for the case that all days on the lhs are smaller. + // If that special case is not hit, we return `Ok(None)` to + // signal that the comparison wasn't handled within dtp. + Operator::Lt => compare_lt(array, &ts_parts, nullability), + Operator::Lte => compare_lt(array, &ts_parts, nullability), + // (Like for lt, lte) + Operator::Gt => compare_gt(array, &ts_parts, nullability), + Operator::Gte => compare_gt(array, &ts_parts, nullability), + } + } +} + +fn compare_eq( + lhs: &DateTimePartsArray, + ts_parts: ×tamp::TimestampParts, + nullability: Nullability, +) -> VortexResult> { + let mut comparison = compare_dtp(lhs.days(), ts_parts.days, Operator::Eq, nullability)?; + if comparison.statistics().compute_max::() == Some(false) { + // All values are different. + return Ok(Some(comparison)); + } + + comparison = and_kleene( + &compare_dtp(lhs.seconds(), ts_parts.seconds, Operator::Eq, nullability)?, + &comparison, + )?; + + if comparison.statistics().compute_max::() == Some(false) { + // All values are different. + return Ok(Some(comparison)); + } + + comparison = and_kleene( + &compare_dtp( + lhs.subseconds(), + ts_parts.subseconds, + Operator::Eq, + nullability, + )?, + &comparison, + )?; + + Ok(Some(comparison)) +} + +fn compare_ne( + lhs: &DateTimePartsArray, + ts_parts: ×tamp::TimestampParts, + nullability: Nullability, +) -> VortexResult> { + let mut comparison = compare_dtp(lhs.days(), ts_parts.days, Operator::NotEq, nullability)?; + if comparison.statistics().compute_min::() == Some(true) { + // All values are different. + return Ok(Some(comparison)); + } + + comparison = or_kleene( + &compare_dtp( + lhs.seconds(), + ts_parts.seconds, + Operator::NotEq, + nullability, + )?, + &comparison, + )?; + + if comparison.statistics().compute_min::() == Some(true) { + // All values are different. + return Ok(Some(comparison)); + } + + comparison = or_kleene( + &compare_dtp( + lhs.subseconds(), + ts_parts.subseconds, + Operator::NotEq, + nullability, + )?, + &comparison, + )?; + + Ok(Some(comparison)) +} + +fn compare_lt( + lhs: &DateTimePartsArray, + ts_parts: ×tamp::TimestampParts, + nullability: Nullability, +) -> VortexResult> { + let days_lt = compare_dtp(lhs.days(), ts_parts.days, Operator::Lt, nullability)?; + if days_lt.statistics().compute_min::() == Some(true) { + // All values on the lhs are smaller. + return Ok(Some(days_lt)); + } + + Ok(None) +} + +fn compare_gt( + lhs: &DateTimePartsArray, + ts_parts: ×tamp::TimestampParts, + nullability: Nullability, +) -> VortexResult> { + let days_gt = compare_dtp(lhs.days(), ts_parts.days, Operator::Gt, nullability)?; + if days_gt.statistics().compute_min::() == Some(true) { + // All values on the lhs are larger. + return Ok(Some(days_gt)); + } + + Ok(None) +} + +fn compare_dtp( + lhs: &dyn Array, + rhs: i64, + operator: Operator, + nullability: Nullability, +) -> VortexResult { + // Since nullability is stripped from RHS and carried forward through nullability argument we want to incorporate it into lhs.dtype() that we cast rhs into + match ConstantArray::new(rhs, lhs.len()) + .into_array() + .cast(lhs.dtype().with_nullability(nullability)) + { + Ok(casted) => compare(lhs, &casted, operator), + // The narrowing cast failed. Therefore, we know lhs < rhs. + _ => { + let constant_value = match operator { + Operator::Eq | Operator::Gte | Operator::Gt => false, + Operator::NotEq | Operator::Lte | Operator::Lt => true, + }; + Ok( + ConstantArray::new(Scalar::bool(constant_value, nullability), lhs.len()) + .into_array(), + ) + } + } +} + +#[cfg(test)] +mod test { + use rstest::rstest; + use vortex_array::arrays::PrimitiveArray; + use vortex_array::arrays::TemporalArray; + use vortex_array::compute::Operator; + use vortex_array::validity::Validity; + use vortex_buffer::buffer; + use vortex_dtype::IntegerPType; + use vortex_dtype::datetime::TimeUnit; + + use super::*; + + fn dtp_array_from_timestamp( + value: T, + validity: Validity, + ) -> DateTimePartsArray { + DateTimePartsArray::try_from(TemporalArray::new_timestamp( + PrimitiveArray::new(buffer![value], validity).into_array(), + TimeUnit::Seconds, + Some("UTC".into()), + )) + .expect("Failed to construct DateTimePartsArray from TemporalArray") + } + + #[rstest] + #[case(Validity::NonNullable, Validity::NonNullable)] + #[case(Validity::NonNullable, Validity::AllValid)] + #[case(Validity::AllValid, Validity::NonNullable)] + #[case(Validity::AllValid, Validity::AllValid)] + fn compare_date_time_parts_eq(#[case] lhs_validity: Validity, #[case] rhs_validity: Validity) { + let lhs = dtp_array_from_timestamp(86400i64, lhs_validity); // January 2, 1970, 00:00:00 UTC + let rhs = dtp_array_from_timestamp(86400i64, rhs_validity.clone()); // January 2, 1970, 00:00:00 UTC + let comparison = compare(lhs.as_ref(), rhs.as_ref(), Operator::Eq).unwrap(); + assert_eq!(comparison.as_bool_typed().true_count().unwrap(), 1); + + let rhs = dtp_array_from_timestamp(0i64, rhs_validity); // January 1, 1970, 00:00:00 UTC + let comparison = compare(lhs.as_ref(), rhs.as_ref(), Operator::Eq).unwrap(); + assert_eq!(comparison.as_bool_typed().true_count().unwrap(), 0); + } + + #[rstest] + #[case(Validity::NonNullable, Validity::NonNullable)] + #[case(Validity::NonNullable, Validity::AllValid)] + #[case(Validity::AllValid, Validity::NonNullable)] + #[case(Validity::AllValid, Validity::AllValid)] + fn compare_date_time_parts_ne(#[case] lhs_validity: Validity, #[case] rhs_validity: Validity) { + let lhs = dtp_array_from_timestamp(86400i64, lhs_validity); // January 2, 1970, 00:00:00 UTC + let rhs = dtp_array_from_timestamp(86401i64, rhs_validity.clone()); // January 2, 1970, 00:00:01 UTC + let comparison = compare(lhs.as_ref(), rhs.as_ref(), Operator::NotEq).unwrap(); + assert_eq!(comparison.as_bool_typed().true_count().unwrap(), 1); + + let rhs = dtp_array_from_timestamp(86400i64, rhs_validity); // January 2, 1970, 00:00:00 UTC + let comparison = compare(lhs.as_ref(), rhs.as_ref(), Operator::NotEq).unwrap(); + assert_eq!(comparison.as_bool_typed().true_count().unwrap(), 0); + } + + #[rstest] + #[case(Validity::NonNullable, Validity::NonNullable)] + #[case(Validity::NonNullable, Validity::AllValid)] + #[case(Validity::AllValid, Validity::NonNullable)] + #[case(Validity::AllValid, Validity::AllValid)] + fn compare_date_time_parts_lt(#[case] lhs_validity: Validity, #[case] rhs_validity: Validity) { + let lhs = dtp_array_from_timestamp(0i64, lhs_validity); // January 1, 1970, 01:00:00 UTC + let rhs = dtp_array_from_timestamp(86400i64, rhs_validity); // January 2, 1970, 00:00:00 UTC + + let comparison = compare(lhs.as_ref(), rhs.as_ref(), Operator::Lt).unwrap(); + assert_eq!(comparison.as_bool_typed().true_count().unwrap(), 1); + } + + #[rstest] + #[case(Validity::NonNullable, Validity::NonNullable)] + #[case(Validity::NonNullable, Validity::AllValid)] + #[case(Validity::AllValid, Validity::NonNullable)] + #[case(Validity::AllValid, Validity::AllValid)] + fn compare_date_time_parts_gt(#[case] lhs_validity: Validity, #[case] rhs_validity: Validity) { + let lhs = dtp_array_from_timestamp(86400i64, lhs_validity); // January 2, 1970, 02:00:00 UTC + let rhs = dtp_array_from_timestamp(0i64, rhs_validity); // January 1, 1970, 01:00:00 UTC + + let comparison = compare(lhs.as_ref(), rhs.as_ref(), Operator::Gt).unwrap(); + assert_eq!(comparison.as_bool_typed().true_count().unwrap(), 1); + } + + #[rstest] + #[case(Validity::NonNullable, Validity::NonNullable)] + #[case(Validity::NonNullable, Validity::AllValid)] + #[case(Validity::AllValid, Validity::NonNullable)] + #[case(Validity::AllValid, Validity::AllValid)] + fn compare_date_time_parts_narrowing( + #[case] lhs_validity: Validity, + #[case] rhs_validity: Validity, + ) { + let temporal_array = TemporalArray::new_timestamp( + PrimitiveArray::new(buffer![0i64], lhs_validity.clone()).into_array(), + TimeUnit::Seconds, + Some("UTC".into()), + ); + + let lhs = DateTimePartsArray::try_new( + DType::Extension(temporal_array.ext_dtype()), + PrimitiveArray::new(buffer![0i32], lhs_validity).into_array(), + PrimitiveArray::new(buffer![0u32], Validity::NonNullable).into_array(), + PrimitiveArray::new(buffer![0i64], Validity::NonNullable).into_array(), + ) + .unwrap(); + + // Timestamp with a value larger than i32::MAX. + let rhs = dtp_array_from_timestamp(i64::MAX, rhs_validity); + + let comparison = compare(lhs.as_ref(), rhs.as_ref(), Operator::Eq).unwrap(); + assert_eq!(comparison.as_bool_typed().true_count().unwrap(), 0); + + let comparison = compare(lhs.as_ref(), rhs.as_ref(), Operator::NotEq).unwrap(); + assert_eq!(comparison.as_bool_typed().true_count().unwrap(), 1); + + let comparison = compare(lhs.as_ref(), rhs.as_ref(), Operator::Lt).unwrap(); + assert_eq!(comparison.as_bool_typed().true_count().unwrap(), 1); + + let comparison = compare(lhs.as_ref(), rhs.as_ref(), Operator::Lte).unwrap(); + assert_eq!(comparison.as_bool_typed().true_count().unwrap(), 1); + + // `Operator::Gt` and `Operator::Gte` only cover the case of all lhs values + // being larger. Therefore, these cases are not covered by unit tests. + } +} diff --git a/encodings/datetime-parts/src/compute/kernel.rs b/encodings/datetime-parts/src/compute/kernel.rs index 9c95c3439ca..301d8580340 100644 --- a/encodings/datetime-parts/src/compute/kernel.rs +++ b/encodings/datetime-parts/src/compute/kernel.rs @@ -2,11 +2,12 @@ // SPDX-FileCopyrightText: Copyright the Vortex contributors use vortex_array::arrays::TakeExecuteAdaptor; +use vortex_array::expr::CompareExecuteAdaptor; use vortex_array::kernel::ParentKernelSet; use crate::DateTimePartsVTable; -pub(crate) const PARENT_KERNELS: ParentKernelSet = - ParentKernelSet::new(&[ParentKernelSet::lift(&TakeExecuteAdaptor( - DateTimePartsVTable, - ))]); +pub(crate) const PARENT_KERNELS: ParentKernelSet = ParentKernelSet::new(&[ + ParentKernelSet::lift(&CompareExecuteAdaptor(DateTimePartsVTable)), + ParentKernelSet::lift(&TakeExecuteAdaptor(DateTimePartsVTable)), +]); diff --git a/encodings/datetime-parts/src/compute/mod.rs b/encodings/datetime-parts/src/compute/mod.rs index 4c9d87765ef..d606daccb59 100644 --- a/encodings/datetime-parts/src/compute/mod.rs +++ b/encodings/datetime-parts/src/compute/mod.rs @@ -2,6 +2,7 @@ // SPDX-FileCopyrightText: Copyright the Vortex contributors mod cast; +mod compare; mod filter; mod is_constant; pub(crate) mod kernel; diff --git a/encodings/decimal-byte-parts/src/decimal_byte_parts/compute/compare.rs b/encodings/decimal-byte-parts/src/decimal_byte_parts/compute/compare.rs new file mode 100644 index 00000000000..af2b6342fa3 --- /dev/null +++ b/encodings/decimal-byte-parts/src/decimal_byte_parts/compute/compare.rs @@ -0,0 +1,262 @@ +// SPDX-License-Identifier: Apache-2.0 +// SPDX-FileCopyrightText: Copyright the Vortex contributors + +use Sign::Negative; +use num_traits::NumCast; +use vortex_array::Array; +use vortex_array::ArrayRef; +use vortex_array::ExecutionCtx; +use vortex_array::arrays::ConstantArray; +use vortex_array::compute::Operator; +use vortex_array::compute::compare; +use vortex_array::expr::CompareKernel; +use vortex_dtype::IntegerPType; +use vortex_dtype::Nullability; +use vortex_dtype::PType; +use vortex_dtype::ToI256; +use vortex_dtype::match_each_decimal_value; +use vortex_dtype::match_each_integer_ptype; +use vortex_error::VortexExpect; +use vortex_error::VortexResult; +use vortex_scalar::DecimalValue; +use vortex_scalar::Scalar; +use vortex_scalar::ScalarValue; + +use crate::DecimalBytePartsVTable; +use crate::decimal_byte_parts::compute::compare::Sign::Positive; + +impl CompareKernel for DecimalBytePartsVTable { + fn compare( + array: &Self::Array, + other: &dyn Array, + operator: Operator, + _ctx: &mut ExecutionCtx, + ) -> VortexResult> { + let Some(rhs_const) = other.as_constant() else { + return Ok(None); + }; + + let nullability = array.dtype.nullability() | other.dtype().nullability(); + let scalar_type = array.msp.dtype().with_nullability(nullability); + + if rhs_const.is_null() { + return Ok(None); + } + + let rhs_decimal = rhs_const + .as_decimal() + .decimal_value() + .vortex_expect("RHS is not null"); + + match decimal_value_wrapper_to_primitive( + rhs_decimal, + array.msp.as_primitive_typed().ptype(), + ) { + Ok(value) => { + let encoded_scalar = Scalar::try_new(scalar_type, Some(value))?; + let encoded_const = ConstantArray::new(encoded_scalar, other.len()); + compare(&array.msp, &encoded_const.to_array(), operator).map(Some) + } + + Err(sign) => { + // If the MSP and the constant are non-null, we know that failing to coerce the + // constant into the MSP bit-width means that it is larger/smaller + // (depending on the `sign`) than all values in MSP. + // If the LHS or the RHS contain nulls, then we must fallback to the canonicalized + // implementation which does null-checking instead. + if array.all_valid()? && other.all_valid()? { + Ok(Some( + ConstantArray::new( + unconvertible_value(sign, operator, nullability), + array.len(), + ) + .to_array(), + )) + } else { + Ok(None) + } + } + } + } +} + +// Used to represent the overflow direction when trying to +// convert into the scalar type. +#[derive(Debug)] +enum Sign { + Positive, + Negative, +} + +fn unconvertible_value(sign: Sign, operator: Operator, nullability: Nullability) -> Scalar { + match operator { + Operator::Eq => Scalar::bool(false, nullability), + Operator::NotEq => Scalar::bool(true, nullability), + Operator::Gt | Operator::Gte => Scalar::bool(matches!(sign, Negative), nullability), + Operator::Lt | Operator::Lte => Scalar::bool(matches!(sign, Positive), nullability), + } +} + +// this value return None is the decimal scalar cannot be cast the ptype. +fn decimal_value_wrapper_to_primitive( + decimal_value: DecimalValue, + ptype: PType, +) -> Result { + match_each_integer_ptype!(ptype, |P| { + decimal_value_to_primitive::

(decimal_value) + }) +} + +fn decimal_value_to_primitive

(decimal_value: DecimalValue) -> Result +where + P: IntegerPType + ToI256, + ScalarValue: From

, +{ + match_each_decimal_value!(decimal_value, |decimal_v| { + let Some(encoded) =

::from(decimal_v) else { + let decimal_i256 = decimal_v + .to_i256() + .vortex_expect("i256 is big enough for any DecimalValue"); + return if decimal_i256 + > P::max_value() + .to_i256() + .vortex_expect("i256 is big enough for any PType") + { + Err(Positive) + } else { + assert!( + decimal_i256 + < P::min_value() + .to_i256() + .vortex_expect("i256 is big enough for any PType") + ); + Err(Negative) + }; + }; + Ok(ScalarValue::from(encoded)) + }) +} + +#[cfg(test)] +mod tests { + use vortex_array::Array; + use vortex_array::IntoArray; + use vortex_array::arrays::BoolArray; + use vortex_array::arrays::ConstantArray; + use vortex_array::arrays::PrimitiveArray; + use vortex_array::assert_arrays_eq; + use vortex_array::compute::Operator; + use vortex_array::compute::compare; + use vortex_array::validity::Validity; + use vortex_buffer::buffer; + use vortex_dtype::DType; + use vortex_dtype::DecimalDType; + use vortex_dtype::Nullability; + use vortex_error::VortexResult; + use vortex_scalar::DecimalValue; + use vortex_scalar::Scalar; + + use crate::DecimalBytePartsArray; + + #[test] + fn compare_decimal_const() { + let decimal_dtype = DecimalDType::new(8, 2); + let dtype = DType::Decimal(decimal_dtype, Nullability::Nullable); + let lhs = DecimalBytePartsArray::try_new( + PrimitiveArray::new(buffer![100i32, 200i32, 400i32], Validity::AllValid).to_array(), + decimal_dtype, + ) + .unwrap() + .to_array(); + let rhs = ConstantArray::new( + Scalar::try_new(dtype, Some(DecimalValue::I64(400).into())).unwrap(), + lhs.len(), + ); + + let res = compare(lhs.as_ref(), rhs.as_ref(), Operator::Eq).unwrap(); + + let expected = BoolArray::from_iter([Some(false), Some(false), Some(true)]).into_array(); + assert_arrays_eq!(res, expected); + } + + #[test] + fn test_byteparts_compare_nullable() -> VortexResult<()> { + let decimal_type = DecimalDType::new(19, -11); + let lhs = DecimalBytePartsArray::try_new( + PrimitiveArray::new( + buffer![1i64, 2i64, 3i64, 4i64], + Validity::Array(BoolArray::from_iter([false, true, true, true]).into_array()), + ) + .into_array(), + decimal_type, + )?; + + let rhs = ConstantArray::new( + Scalar::decimal( + DecimalValue::I128(289888198), + decimal_type, + Nullability::NonNullable, + ), + 4, + ) + .into_array(); + + let res = compare(lhs.as_ref(), rhs.as_ref(), Operator::Lte)?; + let expected = + BoolArray::from_iter([None, Some(true), Some(true), Some(true)]).into_array(); + assert_arrays_eq!(res, expected); + + Ok(()) + } + + #[test] + fn compare_decimal_const_unconvertible_comparison() { + let decimal_dtype = DecimalDType::new(40, 2); + let dtype = DType::Decimal(decimal_dtype, Nullability::Nullable); + let lhs = DecimalBytePartsArray::try_new( + PrimitiveArray::new(buffer![100i32, 200i32, 400i32], Validity::AllValid).to_array(), + decimal_dtype, + ) + .unwrap() + .to_array(); + // This cannot be converted to a i32. + let rhs = ConstantArray::new( + Scalar::try_new( + dtype.clone(), + Some(DecimalValue::I128(-9999999999999965304).into()), + ) + .unwrap(), + lhs.len(), + ); + + let res = compare(lhs.as_ref(), rhs.as_ref(), Operator::Eq).unwrap(); + let expected = BoolArray::from_iter([Some(false), Some(false), Some(false)]).into_array(); + assert_arrays_eq!(res, expected); + + let res = compare(lhs.as_ref(), rhs.as_ref(), Operator::Gt).unwrap(); + let expected = BoolArray::from_iter([Some(true), Some(true), Some(true)]).into_array(); + assert_arrays_eq!(res, expected); + + let res = compare(lhs.as_ref(), rhs.as_ref(), Operator::Lt).unwrap(); + let expected = BoolArray::from_iter([Some(false), Some(false), Some(false)]).into_array(); + assert_arrays_eq!(res, expected); + + // This cannot be converted to a i32. + let rhs = ConstantArray::new( + Scalar::try_new(dtype, Some(DecimalValue::I128(9999999999999965304).into())).unwrap(), + lhs.len(), + ); + + let res = compare(lhs.as_ref(), rhs.as_ref(), Operator::Eq).unwrap(); + let expected = BoolArray::from_iter([Some(false), Some(false), Some(false)]).into_array(); + assert_arrays_eq!(res, expected); + + let res = compare(lhs.as_ref(), rhs.as_ref(), Operator::Gt).unwrap(); + let expected = BoolArray::from_iter([Some(false), Some(false), Some(false)]).into_array(); + assert_arrays_eq!(res, expected); + + let res = compare(lhs.as_ref(), rhs.as_ref(), Operator::Lt).unwrap(); + let expected = BoolArray::from_iter([Some(true), Some(true), Some(true)]).into_array(); + assert_arrays_eq!(res, expected); + } +} diff --git a/encodings/decimal-byte-parts/src/decimal_byte_parts/compute/kernel.rs b/encodings/decimal-byte-parts/src/decimal_byte_parts/compute/kernel.rs index a802fad5db1..b53d03359b5 100644 --- a/encodings/decimal-byte-parts/src/decimal_byte_parts/compute/kernel.rs +++ b/encodings/decimal-byte-parts/src/decimal_byte_parts/compute/kernel.rs @@ -2,11 +2,12 @@ // SPDX-FileCopyrightText: Copyright the Vortex contributors use vortex_array::arrays::TakeExecuteAdaptor; +use vortex_array::expr::CompareExecuteAdaptor; use vortex_array::kernel::ParentKernelSet; use crate::DecimalBytePartsVTable; -pub(crate) const PARENT_KERNELS: ParentKernelSet = - ParentKernelSet::new(&[ParentKernelSet::lift(&TakeExecuteAdaptor( - DecimalBytePartsVTable, - ))]); +pub(crate) const PARENT_KERNELS: ParentKernelSet = ParentKernelSet::new(&[ + ParentKernelSet::lift(&CompareExecuteAdaptor(DecimalBytePartsVTable)), + ParentKernelSet::lift(&TakeExecuteAdaptor(DecimalBytePartsVTable)), +]); diff --git a/encodings/decimal-byte-parts/src/decimal_byte_parts/compute/mod.rs b/encodings/decimal-byte-parts/src/decimal_byte_parts/compute/mod.rs index a12bebf3b1e..2e798106a7e 100644 --- a/encodings/decimal-byte-parts/src/decimal_byte_parts/compute/mod.rs +++ b/encodings/decimal-byte-parts/src/decimal_byte_parts/compute/mod.rs @@ -2,6 +2,7 @@ // SPDX-FileCopyrightText: Copyright the Vortex contributors mod cast; +mod compare; mod filter; mod is_constant; pub(crate) mod kernel; diff --git a/encodings/fastlanes/src/for/compute/compare.rs b/encodings/fastlanes/src/for/compute/compare.rs new file mode 100644 index 00000000000..cb13a89eeab --- /dev/null +++ b/encodings/fastlanes/src/for/compute/compare.rs @@ -0,0 +1,215 @@ +// SPDX-License-Identifier: Apache-2.0 +// SPDX-FileCopyrightText: Copyright the Vortex contributors + +use std::ops::Shr; + +use num_traits::WrappingSub; +use vortex_array::Array; +use vortex_array::ArrayRef; +use vortex_array::ExecutionCtx; +use vortex_array::arrays::ConstantArray; +use vortex_array::compute::Operator; +use vortex_array::compute::compare; +use vortex_array::expr::CompareKernel; +use vortex_dtype::NativePType; +use vortex_dtype::Nullability; +use vortex_dtype::match_each_integer_ptype; +use vortex_error::VortexError; +use vortex_error::VortexExpect as _; +use vortex_error::VortexResult; +use vortex_scalar::PValue; +use vortex_scalar::Scalar; + +use crate::FoRArray; +use crate::FoRVTable; + +impl CompareKernel for FoRVTable { + fn compare( + array: &FoRArray, + other: &dyn Array, + operator: Operator, + _ctx: &mut ExecutionCtx, + ) -> VortexResult> { + if let Some(constant) = other.as_constant() + && let Some(constant) = constant.as_primitive_opt() + { + if constant.pvalue().is_none() { + return Ok(None); + } + match_each_integer_ptype!(constant.ptype(), |T| { + return compare_constant( + array, + constant.typed_value::().vortex_expect("RHS is not null"), + other.dtype().nullability(), + operator, + ); + }) + } + + Ok(None) + } +} + +fn compare_constant( + lhs: &FoRArray, + mut rhs: T, + nullability: Nullability, + operator: Operator, +) -> VortexResult> +where + T: NativePType + WrappingSub + Shr, + T: TryFrom, + PValue: From, +{ + // For now, we only support equals and not equals. Comparisons are a little more fiddly to + // get right regarding how to handle overflow and the wrapping subtraction. + if !matches!(operator, Operator::Eq | Operator::NotEq) { + return Ok(None); + } + + let reference = lhs.reference_scalar(); + let reference = reference.as_primitive().typed_value::(); + + // We encode the RHS into the FoR domain. + if let Some(reference) = reference { + rhs = rhs.wrapping_sub(&reference); + } + + // Wrap up the RHS into a scalar and cast to the encoded DType (this will be the equivalent + // unsigned integer type). + let rhs = Scalar::primitive(rhs, nullability); + + compare( + lhs.encoded(), + ConstantArray::new(rhs, lhs.len()).as_ref(), + operator, + ) + .map(Some) +} + +#[cfg(test)] +mod tests { + use vortex_array::IntoArray; + use vortex_array::arrays::BoolArray; + use vortex_array::arrays::PrimitiveArray; + use vortex_array::assert_arrays_eq; + use vortex_array::validity::Validity; + use vortex_buffer::buffer; + use vortex_dtype::DType; + + use super::*; + + #[test] + fn test_compare_constant() { + let reference = Scalar::from(10); + // 10, 30, 12 + let lhs = FoRArray::try_new( + PrimitiveArray::new(buffer!(0i32, 20, 2), Validity::AllValid).into_array(), + reference, + ) + .unwrap(); + + let result = compare_constant(&lhs, 30i32, Nullability::NonNullable, Operator::Eq) + .unwrap() + .unwrap(); + assert_arrays_eq!(result, BoolArray::from_iter([false, true, false].map(Some))); + + let result = compare_constant(&lhs, 12i32, Nullability::NonNullable, Operator::NotEq) + .unwrap() + .unwrap(); + assert_arrays_eq!(result, BoolArray::from_iter([true, true, false].map(Some))); + + for op in [Operator::Lt, Operator::Lte, Operator::Gt, Operator::Gte] { + assert!( + compare_constant(&lhs, 30i32, Nullability::NonNullable, op) + .unwrap() + .is_none() + ); + } + } + + #[test] + fn test_compare_nullable_constant() { + let reference = Scalar::from(0); + // 10, 30, 12 + let lhs = FoRArray::try_new( + PrimitiveArray::new(buffer!(0i32, 20, 2), Validity::NonNullable).into_array(), + reference, + ) + .unwrap(); + + assert_eq!( + compare_constant(&lhs, 30i32, Nullability::Nullable, Operator::Eq) + .unwrap() + .unwrap() + .dtype(), + &DType::Bool(Nullability::Nullable) + ); + assert_eq!( + compare_constant(&lhs, 30i32, Nullability::NonNullable, Operator::Eq) + .unwrap() + .unwrap() + .dtype(), + &DType::Bool(Nullability::NonNullable) + ); + } + + #[test] + fn compare_non_encodable_constant() { + let reference = Scalar::from(10); + // 10, 30, 12 + let lhs = FoRArray::try_new( + PrimitiveArray::new(buffer!(0i32, 10, 1), Validity::AllValid).into_array(), + reference, + ) + .unwrap(); + + let result = compare_constant(&lhs, -1i32, Nullability::NonNullable, Operator::Eq) + .unwrap() + .unwrap(); + assert_arrays_eq!( + result, + BoolArray::from_iter([false, false, false].map(Some)) + ); + + let result = compare_constant(&lhs, -1i32, Nullability::NonNullable, Operator::NotEq) + .unwrap() + .unwrap(); + assert_arrays_eq!(result, BoolArray::from_iter([true, true, true].map(Some))); + } + + #[test] + fn compare_large_constant() { + let reference = Scalar::from(-9219218377546224477i64); + #[allow(clippy::cast_possible_truncation)] + let lhs = FoRArray::try_new( + PrimitiveArray::new( + buffer![0i64, 9654309310445864926u64 as i64], + Validity::AllValid, + ) + .into_array(), + reference, + ) + .unwrap(); + + let result = compare_constant( + &lhs, + 435090932899640449i64, + Nullability::Nullable, + Operator::Eq, + ) + .unwrap() + .unwrap(); + assert_arrays_eq!(result, BoolArray::from_iter([Some(false), Some(true)])); + + let result = compare_constant( + &lhs, + 435090932899640449i64, + Nullability::Nullable, + Operator::NotEq, + ) + .unwrap() + .unwrap(); + assert_arrays_eq!(result, BoolArray::from_iter([Some(true), Some(false)])); + } +} diff --git a/encodings/fastlanes/src/for/compute/mod.rs b/encodings/fastlanes/src/for/compute/mod.rs index 95103cbee4a..a8efc731793 100644 --- a/encodings/fastlanes/src/for/compute/mod.rs +++ b/encodings/fastlanes/src/for/compute/mod.rs @@ -2,6 +2,7 @@ // SPDX-FileCopyrightText: Copyright the Vortex contributors mod cast; +mod compare; mod is_constant; mod is_sorted; diff --git a/encodings/fastlanes/src/for/vtable/kernels.rs b/encodings/fastlanes/src/for/vtable/kernels.rs index 60a009afe15..96e1f010178 100644 --- a/encodings/fastlanes/src/for/vtable/kernels.rs +++ b/encodings/fastlanes/src/for/vtable/kernels.rs @@ -2,9 +2,12 @@ // SPDX-FileCopyrightText: Copyright the Vortex contributors use vortex_array::arrays::TakeExecuteAdaptor; +use vortex_array::expr::CompareExecuteAdaptor; use vortex_array::kernel::ParentKernelSet; use crate::FoRVTable; -pub(crate) const PARENT_KERNELS: ParentKernelSet = - ParentKernelSet::new(&[ParentKernelSet::lift(&TakeExecuteAdaptor(FoRVTable))]); +pub(crate) const PARENT_KERNELS: ParentKernelSet = ParentKernelSet::new(&[ + ParentKernelSet::lift(&CompareExecuteAdaptor(FoRVTable)), + ParentKernelSet::lift(&TakeExecuteAdaptor(FoRVTable)), +]); diff --git a/encodings/fsst/src/compute/compare.rs b/encodings/fsst/src/compute/compare.rs new file mode 100644 index 00000000000..e337f380e0e --- /dev/null +++ b/encodings/fsst/src/compute/compare.rs @@ -0,0 +1,197 @@ +// SPDX-License-Identifier: Apache-2.0 +// SPDX-FileCopyrightText: Copyright the Vortex contributors + +use vortex_array::Array; +use vortex_array::ArrayRef; +use vortex_array::ExecutionCtx; +use vortex_array::IntoArray; +use vortex_array::ToCanonical; +use vortex_array::arrays::BoolArray; +use vortex_array::arrays::ConstantArray; +use vortex_array::compute::Operator; +use vortex_array::compute::compare; +use vortex_array::compute::compare_lengths_to_empty; +use vortex_array::expr::CompareKernel; +use vortex_array::validity::Validity; +use vortex_buffer::BitBuffer; +use vortex_buffer::ByteBuffer; +use vortex_dtype::DType; +use vortex_dtype::match_each_integer_ptype; +use vortex_error::VortexExpect; +use vortex_error::VortexResult; +use vortex_error::vortex_bail; +use vortex_scalar::Scalar; + +use crate::FSSTArray; +use crate::FSSTVTable; + +impl CompareKernel for FSSTVTable { + fn compare( + array: &FSSTArray, + other: &dyn Array, + operator: Operator, + _ctx: &mut ExecutionCtx, + ) -> VortexResult> { + match other.as_constant() { + Some(constant) => compare_fsst_constant(array, &constant, operator), + // Otherwise, fall back to the default comparison behavior. + _ => Ok(None), + } + } +} + +/// Specialized compare function implementation used when performing against a constant +fn compare_fsst_constant( + left: &FSSTArray, + right: &Scalar, + operator: Operator, +) -> VortexResult> { + if right.is_null() { + return Ok(None); + } + + let is_rhs_empty = match right.dtype() { + DType::Binary(_) => right + .as_binary() + .is_empty() + .vortex_expect("RHS should not be null"), + DType::Utf8(_) => right + .as_utf8() + .is_empty() + .vortex_expect("RHS should not be null"), + _ => vortex_bail!("VarBinArray can only have type of Binary or Utf8"), + }; + if is_rhs_empty { + let buffer = match operator { + // Every possible value is gte "" + Operator::Gte => BitBuffer::new_set(left.len()), + // No value is lt "" + Operator::Lt => BitBuffer::new_unset(left.len()), + _ => { + let uncompressed_lengths = left.uncompressed_lengths().to_primitive(); + match_each_integer_ptype!(uncompressed_lengths.ptype(), |P| { + compare_lengths_to_empty( + uncompressed_lengths.as_slice::

().iter().copied(), + operator, + ) + }) + } + }; + + return Ok(Some( + BoolArray::new( + buffer, + Validity::copy_from_array(left.as_ref())? + .union_nullability(right.dtype().nullability()), + ) + .into_array(), + )); + } + + // The following section only supports Eq/NotEq + if !matches!(operator, Operator::Eq | Operator::NotEq) { + return Ok(None); + } + + let compressor = left.compressor(); + let encoded_buffer = match left.dtype() { + DType::Utf8(_) => { + let value = right + .as_utf8() + .value() + .vortex_expect("Expected non-null scalar"); + ByteBuffer::from(compressor.compress(value.as_bytes())) + } + DType::Binary(_) => { + let value = right + .as_binary() + .value() + .vortex_expect("Expected non-null scalar"); + ByteBuffer::from(compressor.compress(value.as_slice())) + } + _ => unreachable!("FSSTArray can only have string or binary data type"), + }; + + let encoded_scalar = Scalar::binary( + encoded_buffer, + left.dtype().nullability() | right.dtype().nullability(), + ); + + let rhs = ConstantArray::new(encoded_scalar, left.len()); + compare(left.codes().as_ref(), rhs.as_ref(), operator).map(Some) +} + +#[cfg(test)] +mod tests { + use vortex_array::Array; + use vortex_array::ToCanonical; + use vortex_array::arrays::BoolArray; + use vortex_array::arrays::ConstantArray; + use vortex_array::arrays::VarBinArray; + use vortex_array::assert_arrays_eq; + use vortex_array::compute::Operator; + use vortex_array::compute::compare; + use vortex_dtype::DType; + use vortex_dtype::Nullability; + use vortex_scalar::Scalar; + + use crate::fsst_compress; + use crate::fsst_train_compressor; + + #[test] + #[cfg_attr(miri, ignore)] + fn test_compare_fsst() { + let lhs = VarBinArray::from_iter( + [ + Some("hello"), + None, + Some("world"), + None, + Some("this is a very long string"), + ], + DType::Utf8(Nullability::Nullable), + ); + let compressor = fsst_train_compressor(&lhs); + let lhs = fsst_compress(lhs, &compressor); + + let rhs = ConstantArray::new("world", lhs.len()); + + // Ensure fastpath for Eq exists, and returns correct answer + let equals = compare(lhs.as_ref(), rhs.as_ref(), Operator::Eq) + .unwrap() + .to_bool(); + + assert_eq!(equals.dtype(), &DType::Bool(Nullability::Nullable)); + + assert_arrays_eq!( + &equals, + &BoolArray::from_iter([Some(false), None, Some(true), None, Some(false)]) + ); + + // Ensure fastpath for Eq exists, and returns correct answer + let not_equals = compare(lhs.as_ref(), rhs.as_ref(), Operator::NotEq) + .unwrap() + .to_bool(); + + assert_eq!(not_equals.dtype(), &DType::Bool(Nullability::Nullable)); + assert_arrays_eq!( + ¬_equals, + &BoolArray::from_iter([Some(true), None, Some(false), None, Some(true)]) + ); + + // Ensure null constants are handled correctly. + let null_rhs = + ConstantArray::new(Scalar::null(DType::Utf8(Nullability::Nullable)), lhs.len()); + let equals_null = compare(lhs.as_ref(), null_rhs.as_ref(), Operator::Eq).unwrap(); + assert_arrays_eq!( + &equals_null, + &BoolArray::from_iter([None::, None, None, None, None]) + ); + + let noteq_null = compare(lhs.as_ref(), null_rhs.as_ref(), Operator::NotEq).unwrap(); + assert_arrays_eq!( + ¬eq_null, + &BoolArray::from_iter([None::, None, None, None, None]) + ); + } +} diff --git a/encodings/fsst/src/compute/mod.rs b/encodings/fsst/src/compute/mod.rs index d657bffb807..02f6663722c 100644 --- a/encodings/fsst/src/compute/mod.rs +++ b/encodings/fsst/src/compute/mod.rs @@ -2,6 +2,7 @@ // SPDX-FileCopyrightText: Copyright the Vortex contributors mod cast; +mod compare; mod filter; use vortex_array::Array; diff --git a/encodings/fsst/src/kernel.rs b/encodings/fsst/src/kernel.rs index e3e11a1ed5e..d304e5dc653 100644 --- a/encodings/fsst/src/kernel.rs +++ b/encodings/fsst/src/kernel.rs @@ -3,11 +3,13 @@ use vortex_array::arrays::FilterExecuteAdaptor; use vortex_array::arrays::TakeExecuteAdaptor; +use vortex_array::expr::CompareExecuteAdaptor; use vortex_array::kernel::ParentKernelSet; use crate::FSSTVTable; pub(super) const PARENT_KERNELS: ParentKernelSet = ParentKernelSet::new(&[ + ParentKernelSet::lift(&CompareExecuteAdaptor(FSSTVTable)), ParentKernelSet::lift(&FilterExecuteAdaptor(FSSTVTable)), ParentKernelSet::lift(&TakeExecuteAdaptor(FSSTVTable)), ]); diff --git a/encodings/runend/src/compute/compare.rs b/encodings/runend/src/compute/compare.rs new file mode 100644 index 00000000000..3023bd4c218 --- /dev/null +++ b/encodings/runend/src/compute/compare.rs @@ -0,0 +1,80 @@ +// SPDX-License-Identifier: Apache-2.0 +// SPDX-FileCopyrightText: Copyright the Vortex contributors + +use vortex_array::Array; +use vortex_array::ArrayRef; +use vortex_array::ExecutionCtx; +use vortex_array::IntoArray; +use vortex_array::ToCanonical; +use vortex_array::arrays::ConstantArray; +use vortex_array::compute::Operator; +use vortex_array::compute::compare; +use vortex_array::expr::CompareKernel; +use vortex_error::VortexResult; + +use crate::RunEndArray; +use crate::RunEndVTable; +use crate::compress::runend_decode_bools; + +impl CompareKernel for RunEndVTable { + fn compare( + array: &RunEndArray, + other: &dyn Array, + operator: Operator, + _ctx: &mut ExecutionCtx, + ) -> VortexResult> { + // If the RHS is constant, then we just need to compare against our encoded values. + if let Some(const_scalar) = other.as_constant() { + let values = compare( + array.values(), + ConstantArray::new(const_scalar, array.values().len()).as_ref(), + operator, + )?; + let decoded = runend_decode_bools( + array.ends().to_primitive(), + values.to_bool(), + array.offset(), + array.len(), + )?; + return Ok(Some(decoded.into_array())); + } + + // Otherwise, fall back + Ok(None) + } +} + +#[cfg(test)] +mod test { + use vortex_array::IntoArray; + use vortex_array::arrays::BoolArray; + use vortex_array::arrays::ConstantArray; + use vortex_array::arrays::PrimitiveArray; + use vortex_array::assert_arrays_eq; + use vortex_array::compute::Operator; + use vortex_array::compute::compare; + + use crate::RunEndArray; + + fn ree_array() -> RunEndArray { + RunEndArray::encode( + PrimitiveArray::from_iter([1, 1, 1, 4, 4, 4, 2, 2, 5, 5, 5, 5]).into_array(), + ) + .unwrap() + } + + #[test] + fn compare_run_end() { + let arr = ree_array(); + let res = compare( + arr.as_ref(), + ConstantArray::new(5, 12).as_ref(), + Operator::Eq, + ) + .unwrap(); + let expected = BoolArray::from_iter([ + false, false, false, false, false, false, false, false, true, true, true, true, + ]); + assert_arrays_eq!(res, expected); + } +} diff --git a/encodings/runend/src/compute/mod.rs b/encodings/runend/src/compute/mod.rs index 9dfdfc82455..8b390bef750 100644 --- a/encodings/runend/src/compute/mod.rs +++ b/encodings/runend/src/compute/mod.rs @@ -2,6 +2,7 @@ // SPDX-FileCopyrightText: Copyright the Vortex contributors mod cast; +mod compare; mod fill_null; pub(crate) mod filter; mod is_constant; diff --git a/encodings/runend/src/kernel.rs b/encodings/runend/src/kernel.rs index 4873d9e15b1..e74d8338f1a 100644 --- a/encodings/runend/src/kernel.rs +++ b/encodings/runend/src/kernel.rs @@ -11,6 +11,7 @@ use vortex_array::arrays::FilterExecuteAdaptor; use vortex_array::arrays::SliceArray; use vortex_array::arrays::SliceVTable; use vortex_array::arrays::TakeExecuteAdaptor; +use vortex_array::expr::CompareExecuteAdaptor; use vortex_array::kernel::ExecuteParentKernel; use vortex_array::kernel::ParentKernelSet; use vortex_error::VortexResult; @@ -20,6 +21,7 @@ use crate::RunEndVTable; use crate::compute::take_from::RunEndVTableTakeFrom; pub(super) const PARENT_KERNELS: ParentKernelSet = ParentKernelSet::new(&[ + ParentKernelSet::lift(&CompareExecuteAdaptor(RunEndVTable)), ParentKernelSet::lift(&RunEndSliceKernel), ParentKernelSet::lift(&FilterExecuteAdaptor(RunEndVTable)), ParentKernelSet::lift(&TakeExecuteAdaptor(RunEndVTable)), diff --git a/vortex-array/src/arrays/dict/compute/compare.rs b/vortex-array/src/arrays/dict/compute/compare.rs new file mode 100644 index 00000000000..a2aad04a17c --- /dev/null +++ b/vortex-array/src/arrays/dict/compute/compare.rs @@ -0,0 +1,52 @@ +// SPDX-License-Identifier: Apache-2.0 +// SPDX-FileCopyrightText: Copyright the Vortex contributors + +use vortex_error::VortexResult; + +use super::DictArray; +use super::DictVTable; +use crate::Array; +use crate::ArrayRef; +use crate::ExecutionCtx; +use crate::IntoArray; +use crate::arrays::ConstantArray; +use crate::compute::Operator; +use crate::compute::compare; +use crate::expr::CompareKernel; + +impl CompareKernel for DictVTable { + fn compare( + array: &DictArray, + other: &dyn Array, + operator: Operator, + _ctx: &mut ExecutionCtx, + ) -> VortexResult> { + // if we have more values than codes, it is faster to canonicalise first. + if array.values().len() > array.codes().len() { + return Ok(None); + } + + // If the RHS is constant, then we just need to compare against our encoded values. + if let Some(rhs) = other.as_constant() { + let compare_result = compare( + array.values(), + ConstantArray::new(rhs, array.values().len()).as_ref(), + operator, + )?; + + // SAFETY: values len preserved, codes all still point to valid values + let result = unsafe { + DictArray::new_unchecked(array.codes().clone(), compare_result) + .set_all_values_referenced(array.has_all_values_referenced()) + .into_array() + }; + + // We canonicalize the result because dictionary-encoded bools is dumb. + return Ok(Some(result.to_canonical()?.into_array())); + } + + // It's a little more complex, but we could perform a comparison against the dictionary + // values in the future. + Ok(None) + } +} diff --git a/vortex-array/src/arrays/dict/compute/mod.rs b/vortex-array/src/arrays/dict/compute/mod.rs index ac9742303cb..056b151ec06 100644 --- a/vortex-array/src/arrays/dict/compute/mod.rs +++ b/vortex-array/src/arrays/dict/compute/mod.rs @@ -2,6 +2,7 @@ // SPDX-FileCopyrightText: Copyright the Vortex contributors mod cast; +mod compare; mod fill_null; mod is_constant; mod is_sorted; diff --git a/vortex-array/src/arrays/dict/vtable/kernel.rs b/vortex-array/src/arrays/dict/vtable/kernel.rs index 32a9c328532..5ab633d0b21 100644 --- a/vortex-array/src/arrays/dict/vtable/kernel.rs +++ b/vortex-array/src/arrays/dict/vtable/kernel.rs @@ -3,10 +3,12 @@ use crate::arrays::DictVTable; use crate::arrays::TakeExecuteAdaptor; +use crate::expr::CompareExecuteAdaptor; use crate::expr::FillNullExecuteAdaptor; use crate::kernel::ParentKernelSet; pub(super) const PARENT_KERNELS: ParentKernelSet = ParentKernelSet::new(&[ + ParentKernelSet::lift(&CompareExecuteAdaptor(DictVTable)), ParentKernelSet::lift(&TakeExecuteAdaptor(DictVTable)), ParentKernelSet::lift(&FillNullExecuteAdaptor(DictVTable)), ]); diff --git a/vortex-array/src/arrays/extension/compute/compare.rs b/vortex-array/src/arrays/extension/compute/compare.rs new file mode 100644 index 00000000000..3178c63cff5 --- /dev/null +++ b/vortex-array/src/arrays/extension/compute/compare.rs @@ -0,0 +1,42 @@ +// SPDX-License-Identifier: Apache-2.0 +// SPDX-FileCopyrightText: Copyright the Vortex contributors + +use vortex_error::VortexResult; + +use crate::Array; +use crate::ArrayRef; +use crate::ExecutionCtx; +use crate::arrays::ConstantArray; +use crate::arrays::ExtensionArray; +use crate::arrays::ExtensionVTable; +use crate::compute; +use crate::compute::Operator; +use crate::expr::CompareKernel; + +impl CompareKernel for ExtensionVTable { + fn compare( + array: &ExtensionArray, + other: &dyn Array, + operator: Operator, + _ctx: &mut ExecutionCtx, + ) -> VortexResult> { + // If the RHS is a constant, we can extract the storage scalar. + if let Some(const_ext) = other.as_constant() { + let storage_scalar = const_ext.as_extension().to_storage_scalar(); + return compute::compare( + array.storage(), + ConstantArray::new(storage_scalar, array.len()).as_ref(), + operator, + ) + .map(Some); + } + + // If the RHS is an extension array matching ours, we can extract the storage. + if let Some(rhs_ext) = other.as_opt::() { + return compute::compare(array.storage(), rhs_ext.storage(), operator).map(Some); + } + + // Otherwise, we need the RHS to handle this comparison. + Ok(None) + } +} diff --git a/vortex-array/src/arrays/extension/compute/mod.rs b/vortex-array/src/arrays/extension/compute/mod.rs index 90dba24e2fa..71770541b56 100644 --- a/vortex-array/src/arrays/extension/compute/mod.rs +++ b/vortex-array/src/arrays/extension/compute/mod.rs @@ -2,6 +2,7 @@ // SPDX-FileCopyrightText: Copyright the Vortex contributors mod cast; +mod compare; mod filter; mod is_constant; mod is_sorted; diff --git a/vortex-array/src/arrays/extension/vtable/kernel.rs b/vortex-array/src/arrays/extension/vtable/kernel.rs index 41f4f8a0f9a..a5c07cbf2f1 100644 --- a/vortex-array/src/arrays/extension/vtable/kernel.rs +++ b/vortex-array/src/arrays/extension/vtable/kernel.rs @@ -3,7 +3,10 @@ use crate::arrays::ExtensionVTable; use crate::arrays::TakeExecuteAdaptor; +use crate::expr::CompareExecuteAdaptor; use crate::kernel::ParentKernelSet; -pub(super) const PARENT_KERNELS: ParentKernelSet = - ParentKernelSet::new(&[ParentKernelSet::lift(&TakeExecuteAdaptor(ExtensionVTable))]); +pub(super) const PARENT_KERNELS: ParentKernelSet = ParentKernelSet::new(&[ + ParentKernelSet::lift(&CompareExecuteAdaptor(ExtensionVTable)), + ParentKernelSet::lift(&TakeExecuteAdaptor(ExtensionVTable)), +]); diff --git a/vortex-array/src/arrays/varbin/compute/compare.rs b/vortex-array/src/arrays/varbin/compute/compare.rs new file mode 100644 index 00000000000..3c46b80836c --- /dev/null +++ b/vortex-array/src/arrays/varbin/compute/compare.rs @@ -0,0 +1,238 @@ +// SPDX-License-Identifier: Apache-2.0 +// SPDX-FileCopyrightText: Copyright the Vortex contributors + +use arrow_array::BinaryArray; +use arrow_array::StringArray; +use arrow_ord::cmp; +use itertools::Itertools; +use vortex_buffer::BitBuffer; +use vortex_dtype::DType; +use vortex_dtype::IntegerPType; +use vortex_dtype::match_each_integer_ptype; +use vortex_error::VortexExpect as _; +use vortex_error::VortexResult; +use vortex_error::vortex_bail; +use vortex_error::vortex_err; + +use crate::Array; +use crate::ArrayRef; +use crate::ExecutionCtx; +use crate::IntoArray; +use crate::ToCanonical; +use crate::arrays::BoolArray; +use crate::arrays::PrimitiveArray; +use crate::arrays::VarBinArray; +use crate::arrays::VarBinVTable; +use crate::arrow::Datum; +use crate::arrow::from_arrow_array_with_len; +use crate::compute::Operator; +use crate::compute::compare; +use crate::compute::compare_lengths_to_empty; +use crate::expr::CompareKernel; +use crate::vtable::ValidityHelper; + +// This implementation exists so we can have custom translation of RHS to arrow that's not the same as IntoCanonical +impl CompareKernel for VarBinVTable { + fn compare( + array: &VarBinArray, + other: &dyn Array, + operator: Operator, + _ctx: &mut ExecutionCtx, + ) -> VortexResult> { + if let Some(rhs_const) = other.as_constant() { + if rhs_const.is_null() { + return Ok(None); + } + + let nullable = array.dtype().is_nullable() || rhs_const.dtype().is_nullable(); + let len = array.len(); + + let rhs_is_empty = match rhs_const.dtype() { + DType::Binary(_) => rhs_const + .as_binary() + .is_empty() + .vortex_expect("RHS should not be null"), + DType::Utf8(_) => rhs_const + .as_utf8() + .is_empty() + .vortex_expect("RHS should not be null"), + _ => vortex_bail!("VarBinArray can only have type of Binary or Utf8"), + }; + + if rhs_is_empty { + let buffer = match operator { + Operator::Gte => BitBuffer::new_set(len), // Every possible value is >= "" + Operator::Lt => BitBuffer::new_unset(len), // No value is < "" + Operator::Eq | Operator::NotEq | Operator::Gt | Operator::Lte => { + let lhs_offsets = array.offsets().to_primitive(); + match_each_integer_ptype!(lhs_offsets.ptype(), |P| { + compare_offsets_to_empty::

(lhs_offsets, operator) + }) + } + }; + + return Ok(Some( + BoolArray::new( + buffer, + array + .validity() + .clone() + .union_nullability(other.dtype().nullability()), + ) + .into_array(), + )); + } + + let lhs = Datum::try_new(array.as_ref())?; + + // Use StringViewArray/BinaryViewArray to match the Utf8View/BinaryView types + // produced by Datum::try_new (which uses into_arrow_preferred()) + let arrow_rhs: &dyn arrow_array::Datum = match rhs_const.dtype() { + DType::Utf8(_) => &rhs_const + .as_utf8() + .value() + .map(StringArray::new_scalar) + .unwrap_or_else(|| arrow_array::Scalar::new(StringArray::new_null(1))), + DType::Binary(_) => &rhs_const + .as_binary() + .value() + .map(BinaryArray::new_scalar) + .unwrap_or_else(|| arrow_array::Scalar::new(BinaryArray::new_null(1))), + _ => vortex_bail!( + "VarBin array RHS can only be Utf8 or Binary, given {}", + rhs_const.dtype() + ), + }; + + let array = match operator { + Operator::Eq => cmp::eq(&lhs, arrow_rhs), + Operator::NotEq => cmp::neq(&lhs, arrow_rhs), + Operator::Gt => cmp::gt(&lhs, arrow_rhs), + Operator::Gte => cmp::gt_eq(&lhs, arrow_rhs), + Operator::Lt => cmp::lt(&lhs, arrow_rhs), + Operator::Lte => cmp::lt_eq(&lhs, arrow_rhs), + } + .map_err(|err| vortex_err!("Failed to compare VarBin array: {}", err))?; + + Ok(Some(from_arrow_array_with_len(&array, len, nullable)?)) + } else if !other.is::() { + // NOTE: If the rhs is not a VarBin array it will be canonicalized to a VarBinView + // Arrow doesn't support comparing VarBin to VarBinView arrays, so we convert ourselves + // to VarBinView and re-invoke. + return Ok(Some(compare( + array.to_varbinview().as_ref(), + other, + operator, + )?)); + } else { + Ok(None) + } + } +} + +fn compare_offsets_to_empty( + offsets: PrimitiveArray, + operator: Operator, +) -> BitBuffer { + let lengths_iter = offsets + .as_slice::

() + .iter() + .tuple_windows() + .map(|(&s, &e)| e - s); + compare_lengths_to_empty(lengths_iter, operator) +} + +#[cfg(test)] +mod test { + use vortex_buffer::BitBuffer; + use vortex_buffer::ByteBuffer; + use vortex_dtype::DType; + use vortex_dtype::Nullability; + use vortex_scalar::Scalar; + + use crate::ToCanonical; + use crate::arrays::ConstantArray; + use crate::arrays::VarBinArray; + use crate::arrays::VarBinViewArray; + use crate::compute::Operator; + use crate::compute::compare; + + #[test] + fn test_binary_compare() { + let array = VarBinArray::from_iter( + [Some(b"abc".to_vec()), None, Some(b"def".to_vec())], + DType::Binary(Nullability::Nullable), + ); + let result = compare( + array.as_ref(), + ConstantArray::new( + Scalar::binary(ByteBuffer::copy_from(b"abc"), Nullability::Nullable), + 3, + ) + .as_ref(), + Operator::Eq, + ) + .unwrap() + .to_bool(); + + assert_eq!( + &result.validity_mask().unwrap().to_bit_buffer(), + &BitBuffer::from_iter([true, false, true]) + ); + assert_eq!( + result.to_bit_buffer(), + BitBuffer::from_iter([true, false, false]) + ); + } + + #[test] + fn varbinview_compare() { + let array = VarBinArray::from_iter( + [Some(b"abc".to_vec()), None, Some(b"def".to_vec())], + DType::Binary(Nullability::Nullable), + ); + let vbv = VarBinViewArray::from_iter( + [None, None, Some(b"def".to_vec())], + DType::Binary(Nullability::Nullable), + ); + let result = compare(array.as_ref(), vbv.as_ref(), Operator::Eq) + .unwrap() + .to_bool(); + + assert_eq!( + result.validity_mask().unwrap().to_bit_buffer(), + BitBuffer::from_iter([false, false, true]) + ); + assert_eq!( + result.to_bit_buffer(), + BitBuffer::from_iter([false, true, true]) + ); + } +} + +#[cfg(test)] +mod tests { + use vortex_dtype::DType; + use vortex_dtype::Nullability; + use vortex_scalar::Scalar; + + use crate::Array; + use crate::arrays::ConstantArray; + use crate::arrays::VarBinArray; + use crate::compute::Operator; + use crate::compute::compare; + + #[test] + fn test_null_compare() { + let arr = VarBinArray::from_iter([Some("h")], DType::Utf8(Nullability::NonNullable)); + + let const_ = ConstantArray::new(Scalar::utf8("", Nullability::Nullable), 1); + + assert_eq!( + compare(arr.as_ref(), const_.as_ref(), Operator::Eq) + .unwrap() + .dtype(), + &DType::Bool(Nullability::Nullable) + ); + } +} diff --git a/vortex-array/src/arrays/varbin/compute/mod.rs b/vortex-array/src/arrays/varbin/compute/mod.rs index 6d2eb68044c..4af8bd61d13 100644 --- a/vortex-array/src/arrays/varbin/compute/mod.rs +++ b/vortex-array/src/arrays/varbin/compute/mod.rs @@ -6,6 +6,7 @@ mod slice; pub(crate) use min_max::varbin_compute_min_max; mod cast; +mod compare; mod filter; mod is_constant; mod is_sorted; diff --git a/vortex-array/src/arrays/varbin/vtable/kernel.rs b/vortex-array/src/arrays/varbin/vtable/kernel.rs index 6a94dffb2f8..7c07e795740 100644 --- a/vortex-array/src/arrays/varbin/vtable/kernel.rs +++ b/vortex-array/src/arrays/varbin/vtable/kernel.rs @@ -4,9 +4,11 @@ use crate::arrays::TakeExecuteAdaptor; use crate::arrays::VarBinVTable; use crate::arrays::filter::FilterExecuteAdaptor; +use crate::expr::CompareExecuteAdaptor; use crate::kernel::ParentKernelSet; pub(super) const PARENT_KERNELS: ParentKernelSet = ParentKernelSet::new(&[ + ParentKernelSet::lift(&CompareExecuteAdaptor(VarBinVTable)), ParentKernelSet::lift(&FilterExecuteAdaptor(VarBinVTable)), ParentKernelSet::lift(&TakeExecuteAdaptor(VarBinVTable)), ]); diff --git a/vortex-array/src/expr/exprs/binary/kernel.rs b/vortex-array/src/expr/exprs/binary/kernel.rs new file mode 100644 index 00000000000..357ecdff1fb --- /dev/null +++ b/vortex-array/src/expr/exprs/binary/kernel.rs @@ -0,0 +1,71 @@ +// SPDX-License-Identifier: Apache-2.0 +// SPDX-FileCopyrightText: Copyright the Vortex contributors + +use vortex_error::VortexResult; + +use super::Binary; +use crate::ArrayRef; +use crate::ExecutionCtx; +use crate::arrays::ExactScalarFn; +use crate::arrays::ScalarFnArrayView; +use crate::arrays::ScalarFnVTable; +use crate::compute::Operator; +use crate::kernel::ExecuteParentKernel; +use crate::vtable::VTable; + +/// Trait for encoding-specific comparison kernels that operate in encoded space. +/// +/// Implementations can compare an encoded array against another array (typically a constant) +/// without first decompressing. The adaptor normalizes operand order so `array` is always +/// the left-hand side, swapping the operator when necessary. +pub trait CompareKernel: VTable { + fn compare( + array: &Self::Array, + other: &dyn crate::Array, + operator: Operator, + ctx: &mut ExecutionCtx, + ) -> VortexResult>; +} + +/// Adaptor that bridges [`CompareKernel`] implementations to [`ExecuteParentKernel`]. +/// +/// When a `ScalarFnArray(Binary, cmp_op)` wraps a child that implements `CompareKernel`, +/// this adaptor extracts the comparison operator and other operand, normalizes operand order +/// (swapping the operator if the encoded array is on the RHS), and delegates to the kernel. +#[derive(Default, Debug)] +pub struct CompareExecuteAdaptor(pub V); + +impl ExecuteParentKernel for CompareExecuteAdaptor +where + V: CompareKernel, +{ + type Parent = ExactScalarFn; + + fn execute_parent( + &self, + array: &V::Array, + parent: ScalarFnArrayView<'_, Binary>, + child_idx: usize, + ctx: &mut ExecutionCtx, + ) -> VortexResult> { + // Only handle comparison operators + let Some(cmp_op) = parent.options.maybe_cmp_operator() else { + return Ok(None); + }; + + // Get the ScalarFnArray to access children + let Some(scalar_fn_array) = parent.as_opt::() else { + return Ok(None); + }; + let children = scalar_fn_array.children(); + + // Normalize so `array` is always LHS, swapping the operator if needed + let (cmp_op, other) = match child_idx { + 0 => (cmp_op, &children[1]), + 1 => (cmp_op.swap(), &children[0]), + _ => return Ok(None), + }; + + V::compare(array, other.as_ref(), cmp_op, ctx) + } +} diff --git a/vortex-array/src/expr/exprs/binary/mod.rs b/vortex-array/src/expr/exprs/binary/mod.rs index e01231b9ce1..cc01b59991b 100644 --- a/vortex-array/src/expr/exprs/binary/mod.rs +++ b/vortex-array/src/expr/exprs/binary/mod.rs @@ -30,6 +30,9 @@ mod boolean; pub(crate) use boolean::*; mod compare; pub(crate) use compare::*; +mod kernel; +pub use kernel::CompareExecuteAdaptor; +pub use kernel::CompareKernel; mod numeric; pub(crate) use numeric::*; From c42da1c5dd0908f7f744a80b543c21611db85ac0 Mon Sep 17 00:00:00 2001 From: Joe Isaacs Date: Mon, 16 Feb 2026 10:48:23 +0000 Subject: [PATCH 03/20] u Signed-off-by: Joe Isaacs --- vortex-array/src/expr/exprs/binary/compare.rs | 64 +++++++++++++++++ vortex-array/src/expr/exprs/binary/kernel.rs | 71 ------------------- vortex-array/src/expr/exprs/binary/mod.rs | 3 - 3 files changed, 64 insertions(+), 74 deletions(-) delete mode 100644 vortex-array/src/expr/exprs/binary/kernel.rs diff --git a/vortex-array/src/expr/exprs/binary/compare.rs b/vortex-array/src/expr/exprs/binary/compare.rs index 1fcb7d9bb88..9e605c93562 100644 --- a/vortex-array/src/expr/exprs/binary/compare.rs +++ b/vortex-array/src/expr/exprs/binary/compare.rs @@ -9,15 +9,79 @@ use vortex_scalar::Scalar; use crate::Array; use crate::ArrayRef; use crate::Canonical; +use crate::ExecutionCtx; use crate::IntoArray; use crate::arrays::ConstantArray; use crate::arrays::ConstantVTable; +use crate::arrays::ExactScalarFn; +use crate::arrays::ScalarFnArrayView; +use crate::arrays::ScalarFnVTable; use crate::arrow::Datum; use crate::arrow::IntoArrowArray; use crate::arrow::from_arrow_array_with_len; use crate::compute::Operator; use crate::compute::compare_nested_arrow_arrays; use crate::compute::scalar_cmp; +use crate::expr::Binary; +use crate::kernel::ExecuteParentKernel; +use crate::vtable::VTable; + +/// Trait for encoding-specific comparison kernels that operate in encoded space. +/// +/// Implementations can compare an encoded array against another array (typically a constant) +/// without first decompressing. The adaptor normalizes operand order so `array` is always +/// the left-hand side, swapping the operator when necessary. +pub trait CompareKernel: VTable { + fn compare( + array: &Self::Array, + other: &dyn Array, + operator: Operator, + ctx: &mut ExecutionCtx, + ) -> VortexResult>; +} + +/// Adaptor that bridges [`CompareKernel`] implementations to [`ExecuteParentKernel`]. +/// +/// When a `ScalarFnArray(Binary, cmp_op)` wraps a child that implements `CompareKernel`, +/// this adaptor extracts the comparison operator and other operand, normalizes operand order +/// (swapping the operator if the encoded array is on the RHS), and delegates to the kernel. +#[derive(Default, Debug)] +pub struct CompareExecuteAdaptor(pub V); + +impl ExecuteParentKernel for CompareExecuteAdaptor +where + V: CompareKernel, +{ + type Parent = ExactScalarFn; + + fn execute_parent( + &self, + array: &V::Array, + parent: ScalarFnArrayView<'_, Binary>, + child_idx: usize, + ctx: &mut ExecutionCtx, + ) -> VortexResult> { + // Only handle comparison operators + let Some(cmp_op) = parent.options.maybe_cmp_operator() else { + return Ok(None); + }; + + // Get the ScalarFnArray to access children + let Some(scalar_fn_array) = parent.as_opt::() else { + return Ok(None); + }; + let children = scalar_fn_array.children(); + + // Normalize so `array` is always LHS, swapping the operator if needed + let (cmp_op, other) = match child_idx { + 0 => (cmp_op, &children[1]), + 1 => (cmp_op.swap(), &children[0]), + _ => return Ok(None), + }; + + V::compare(array, other.as_ref(), cmp_op, ctx) + } +} /// Execute a compare operation between two arrays. /// diff --git a/vortex-array/src/expr/exprs/binary/kernel.rs b/vortex-array/src/expr/exprs/binary/kernel.rs deleted file mode 100644 index 357ecdff1fb..00000000000 --- a/vortex-array/src/expr/exprs/binary/kernel.rs +++ /dev/null @@ -1,71 +0,0 @@ -// SPDX-License-Identifier: Apache-2.0 -// SPDX-FileCopyrightText: Copyright the Vortex contributors - -use vortex_error::VortexResult; - -use super::Binary; -use crate::ArrayRef; -use crate::ExecutionCtx; -use crate::arrays::ExactScalarFn; -use crate::arrays::ScalarFnArrayView; -use crate::arrays::ScalarFnVTable; -use crate::compute::Operator; -use crate::kernel::ExecuteParentKernel; -use crate::vtable::VTable; - -/// Trait for encoding-specific comparison kernels that operate in encoded space. -/// -/// Implementations can compare an encoded array against another array (typically a constant) -/// without first decompressing. The adaptor normalizes operand order so `array` is always -/// the left-hand side, swapping the operator when necessary. -pub trait CompareKernel: VTable { - fn compare( - array: &Self::Array, - other: &dyn crate::Array, - operator: Operator, - ctx: &mut ExecutionCtx, - ) -> VortexResult>; -} - -/// Adaptor that bridges [`CompareKernel`] implementations to [`ExecuteParentKernel`]. -/// -/// When a `ScalarFnArray(Binary, cmp_op)` wraps a child that implements `CompareKernel`, -/// this adaptor extracts the comparison operator and other operand, normalizes operand order -/// (swapping the operator if the encoded array is on the RHS), and delegates to the kernel. -#[derive(Default, Debug)] -pub struct CompareExecuteAdaptor(pub V); - -impl ExecuteParentKernel for CompareExecuteAdaptor -where - V: CompareKernel, -{ - type Parent = ExactScalarFn; - - fn execute_parent( - &self, - array: &V::Array, - parent: ScalarFnArrayView<'_, Binary>, - child_idx: usize, - ctx: &mut ExecutionCtx, - ) -> VortexResult> { - // Only handle comparison operators - let Some(cmp_op) = parent.options.maybe_cmp_operator() else { - return Ok(None); - }; - - // Get the ScalarFnArray to access children - let Some(scalar_fn_array) = parent.as_opt::() else { - return Ok(None); - }; - let children = scalar_fn_array.children(); - - // Normalize so `array` is always LHS, swapping the operator if needed - let (cmp_op, other) = match child_idx { - 0 => (cmp_op, &children[1]), - 1 => (cmp_op.swap(), &children[0]), - _ => return Ok(None), - }; - - V::compare(array, other.as_ref(), cmp_op, ctx) - } -} diff --git a/vortex-array/src/expr/exprs/binary/mod.rs b/vortex-array/src/expr/exprs/binary/mod.rs index cc01b59991b..e01231b9ce1 100644 --- a/vortex-array/src/expr/exprs/binary/mod.rs +++ b/vortex-array/src/expr/exprs/binary/mod.rs @@ -30,9 +30,6 @@ mod boolean; pub(crate) use boolean::*; mod compare; pub(crate) use compare::*; -mod kernel; -pub use kernel::CompareExecuteAdaptor; -pub use kernel::CompareKernel; mod numeric; pub(crate) use numeric::*; From 8287929f375641a9234f35eab0435d5eef5aea63 Mon Sep 17 00:00:00 2001 From: Joe Isaacs Date: Mon, 16 Feb 2026 11:10:34 +0000 Subject: [PATCH 04/20] u Signed-off-by: Joe Isaacs --- vortex-array/src/expr/exprs/binary/mod.rs | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/vortex-array/src/expr/exprs/binary/mod.rs b/vortex-array/src/expr/exprs/binary/mod.rs index e01231b9ce1..8bbeb5a4675 100644 --- a/vortex-array/src/expr/exprs/binary/mod.rs +++ b/vortex-array/src/expr/exprs/binary/mod.rs @@ -29,7 +29,7 @@ use crate::expr::stats::Stat; mod boolean; pub(crate) use boolean::*; mod compare; -pub(crate) use compare::*; +pub use compare::*; mod numeric; pub(crate) use numeric::*; From 97094576dea2a452a444703841f7c65c6b8ab062 Mon Sep 17 00:00:00 2001 From: Joe Isaacs Date: Mon, 16 Feb 2026 11:15:44 +0000 Subject: [PATCH 05/20] u Signed-off-by: Joe Isaacs --- vortex-array/benches/dict_compare.rs | 95 +++++++++++++++++----------- 1 file changed, 57 insertions(+), 38 deletions(-) diff --git a/vortex-array/benches/dict_compare.rs b/vortex-array/benches/dict_compare.rs index d4412967ea6..e5bdcede59c 100644 --- a/vortex-array/benches/dict_compare.rs +++ b/vortex-array/benches/dict_compare.rs @@ -6,6 +6,7 @@ use std::str::from_utf8; use vortex_array::Canonical; +use vortex_array::RecursiveCanonical; use vortex_array::VortexSessionExecute; use vortex_array::accessor::ArrayAccessor; use vortex_array::arrays::ConstantArray; @@ -50,15 +51,20 @@ fn bench_compare_primitive(bencher: divan::Bencher, (len, uniqueness): (usize, u let primitive_arr = gen_primitive_for_dict::(len, uniqueness); let dict = dict_encode(primitive_arr.as_ref()).unwrap(); let value = primitive_arr.as_slice::()[0]; + let session = VortexSession::empty(); - bencher.with_inputs(|| &dict).bench_refs(|dict| { - compare( - dict.as_ref(), - ConstantArray::new(value, len).as_ref(), - Operator::Eq, - ) - .unwrap() - }) + bencher + .with_inputs(|| (&dict, session.create_execution_ctx())) + .bench_refs(|(dict, ctx)| { + compare( + dict.as_ref(), + ConstantArray::new(value, len).as_ref(), + Operator::Eq, + ) + .unwrap() + .execute::(ctx) + .unwrap() + }) } #[divan::bench(args = LENGTH_AND_UNIQUE_VALUES)] @@ -67,15 +73,20 @@ fn bench_compare_varbin(bencher: divan::Bencher, (len, uniqueness): (usize, usiz let dict = dict_encode(varbin_arr.as_ref()).unwrap(); let bytes = varbin_arr.with_iterator(|i| i.next().unwrap().unwrap().to_vec()); let value = from_utf8(bytes.as_slice()).unwrap(); + let session = VortexSession::empty(); - bencher.with_inputs(|| &dict).bench_refs(|dict| { - compare( - dict.as_ref(), - ConstantArray::new(value, len).as_ref(), - Operator::Eq, - ) - .unwrap() - }) + bencher + .with_inputs(|| (&dict, session.create_execution_ctx())) + .bench_refs(|(dict, ctx)| { + compare( + dict.as_ref(), + ConstantArray::new(value, len).as_ref(), + Operator::Eq, + ) + .unwrap() + .execute::(ctx) + .unwrap() + }) } #[divan::bench(args = LENGTH_AND_UNIQUE_VALUES)] @@ -84,14 +95,20 @@ fn bench_compare_varbinview(bencher: divan::Bencher, (len, uniqueness): (usize, let dict = dict_encode(varbinview_arr.as_ref()).unwrap(); let bytes = varbinview_arr.with_iterator(|i| i.next().unwrap().unwrap().to_vec()); let value = from_utf8(bytes.as_slice()).unwrap(); - bencher.with_inputs(|| &dict).bench_refs(|dict| { - compare( - dict.as_ref(), - ConstantArray::new(value, len).as_ref(), - Operator::Eq, - ) - .unwrap() - }) + let session = VortexSession::empty(); + + bencher + .with_inputs(|| (&dict, session.create_execution_ctx())) + .bench_refs(|(dict, ctx)| { + compare( + dict.as_ref(), + ConstantArray::new(value, len).as_ref(), + Operator::Eq, + ) + .unwrap() + .execute::(ctx) + .unwrap() + }) } const CODES_AND_VALUES_LENGTHS: &[(usize, usize)] = &[ @@ -117,13 +134,14 @@ fn bench_compare_sliced_dict_primitive( let value = primitive_arr.as_slice::()[0]; let session = VortexSession::empty(); - bencher.with_inputs(|| &dict).bench_refs(|dict| { - let mut ctx = session.create_execution_ctx(); - dict.apply(&eq(root(), lit(value))) - .unwrap() - .execute::(&mut ctx) - .unwrap() - }) + bencher + .with_inputs(|| (&dict, session.create_execution_ctx())) + .bench_refs(|(dict, ctx)| { + dict.apply(&eq(root(), lit(value))) + .unwrap() + .execute::(ctx) + .unwrap() + }) } #[divan::bench(args = CODES_AND_VALUES_LENGTHS)] @@ -138,11 +156,12 @@ fn bench_compare_sliced_dict_varbinview( let value = from_utf8(bytes.as_slice()).unwrap(); let session = VortexSession::empty(); - bencher.with_inputs(|| &dict).bench_refs(|dict| { - let mut ctx = session.create_execution_ctx(); - dict.apply(&eq(root(), lit(value))) - .unwrap() - .execute::(&mut ctx) - .unwrap() - }) + bencher + .with_inputs(|| (&dict, session.create_execution_ctx())) + .bench_refs(|(dict, ctx)| { + dict.apply(&eq(root(), lit(value))) + .unwrap() + .execute::(ctx) + .unwrap() + }) } From ec2fbd19f35bf08607369312c472a861fd8639b7 Mon Sep 17 00:00:00 2001 From: Joe Isaacs Date: Mon, 16 Feb 2026 11:26:43 +0000 Subject: [PATCH 06/20] u Signed-off-by: Joe Isaacs --- encodings/alp/public-api.lock | 8 +- encodings/datetime-parts/public-api.lock | 8 +- encodings/decimal-byte-parts/public-api.lock | 8 +- encodings/fastlanes/public-api.lock | 8 +- encodings/fsst/public-api.lock | 4 +- encodings/runend/public-api.lock | 8 +- encodings/sequence/public-api.lock | 4 - vortex-array/public-api.lock | 142 ++++++++----------- 8 files changed, 78 insertions(+), 112 deletions(-) diff --git a/encodings/alp/public-api.lock b/encodings/alp/public-api.lock index 4105a593e53..fabb5ed64f5 100644 --- a/encodings/alp/public-api.lock +++ b/encodings/alp/public-api.lock @@ -256,10 +256,6 @@ impl vortex_array::compute::between::BetweenKernel for vortex_alp::ALPVTable pub fn vortex_alp::ALPVTable::between(&self, array: &vortex_alp::ALPArray, lower: &dyn vortex_array::array::Array, upper: &dyn vortex_array::array::Array, options: &vortex_array::compute::between::BetweenOptions) -> vortex_error::VortexResult> -impl vortex_array::compute::compare::CompareKernel for vortex_alp::ALPVTable - -pub fn vortex_alp::ALPVTable::compare(&self, lhs: &vortex_alp::ALPArray, rhs: &dyn vortex_array::array::Array, operator: vortex_array::compute::compare::Operator) -> vortex_error::VortexResult> - impl vortex_array::compute::mask::MaskKernel for vortex_alp::ALPVTable pub fn vortex_alp::ALPVTable::mask(&self, array: &vortex_alp::ALPArray, filter_mask: &vortex_mask::Mask) -> vortex_error::VortexResult @@ -268,6 +264,10 @@ impl vortex_array::compute::nan_count::NaNCountKernel for vortex_alp::ALPVTable pub fn vortex_alp::ALPVTable::nan_count(&self, array: &vortex_alp::ALPArray) -> vortex_error::VortexResult +impl vortex_array::expr::exprs::binary::compare::CompareKernel for vortex_alp::ALPVTable + +pub fn vortex_alp::ALPVTable::compare(array: &vortex_alp::ALPArray, other: &dyn vortex_array::array::Array, operator: vortex_array::compute::compare::Operator, _ctx: &mut vortex_array::executor::ExecutionCtx) -> vortex_error::VortexResult> + impl vortex_array::expr::exprs::cast::kernel::CastReduce for vortex_alp::ALPVTable pub fn vortex_alp::ALPVTable::cast(array: &vortex_alp::ALPArray, dtype: &vortex_dtype::dtype::DType) -> vortex_error::VortexResult> diff --git a/encodings/datetime-parts/public-api.lock b/encodings/datetime-parts/public-api.lock index b7938753428..901bc633434 100644 --- a/encodings/datetime-parts/public-api.lock +++ b/encodings/datetime-parts/public-api.lock @@ -114,10 +114,6 @@ impl vortex_array::arrays::slice::SliceReduce for vortex_datetime_parts::DateTim pub fn vortex_datetime_parts::DateTimePartsVTable::slice(array: &Self::Array, range: core::ops::range::Range) -> vortex_error::VortexResult> -impl vortex_array::compute::compare::CompareKernel for vortex_datetime_parts::DateTimePartsVTable - -pub fn vortex_datetime_parts::DateTimePartsVTable::compare(&self, lhs: &vortex_datetime_parts::DateTimePartsArray, rhs: &dyn vortex_array::array::Array, operator: vortex_array::compute::compare::Operator) -> vortex_error::VortexResult> - impl vortex_array::compute::is_constant::IsConstantKernel for vortex_datetime_parts::DateTimePartsVTable pub fn vortex_datetime_parts::DateTimePartsVTable::is_constant(&self, array: &vortex_datetime_parts::DateTimePartsArray, opts: &vortex_array::compute::is_constant::IsConstantOpts) -> vortex_error::VortexResult> @@ -126,6 +122,10 @@ impl vortex_array::compute::mask::MaskKernel for vortex_datetime_parts::DateTime pub fn vortex_datetime_parts::DateTimePartsVTable::mask(&self, array: &vortex_datetime_parts::DateTimePartsArray, mask_array: &vortex_mask::Mask) -> vortex_error::VortexResult +impl vortex_array::expr::exprs::binary::compare::CompareKernel for vortex_datetime_parts::DateTimePartsVTable + +pub fn vortex_datetime_parts::DateTimePartsVTable::compare(array: &vortex_datetime_parts::DateTimePartsArray, other: &dyn vortex_array::array::Array, operator: vortex_array::compute::compare::Operator, _ctx: &mut vortex_array::executor::ExecutionCtx) -> vortex_error::VortexResult> + impl vortex_array::expr::exprs::cast::kernel::CastReduce for vortex_datetime_parts::DateTimePartsVTable pub fn vortex_datetime_parts::DateTimePartsVTable::cast(array: &vortex_datetime_parts::DateTimePartsArray, dtype: &vortex_dtype::dtype::DType) -> vortex_error::VortexResult> diff --git a/encodings/decimal-byte-parts/public-api.lock b/encodings/decimal-byte-parts/public-api.lock index 42212cf004b..2b40655a403 100644 --- a/encodings/decimal-byte-parts/public-api.lock +++ b/encodings/decimal-byte-parts/public-api.lock @@ -64,10 +64,6 @@ impl vortex_array::arrays::slice::SliceReduce for vortex_decimal_byte_parts::Dec pub fn vortex_decimal_byte_parts::DecimalBytePartsVTable::slice(array: &vortex_decimal_byte_parts::DecimalBytePartsArray, range: core::ops::range::Range) -> vortex_error::VortexResult> -impl vortex_array::compute::compare::CompareKernel for vortex_decimal_byte_parts::DecimalBytePartsVTable - -pub fn vortex_decimal_byte_parts::DecimalBytePartsVTable::compare(&self, lhs: &Self::Array, rhs: &dyn vortex_array::array::Array, operator: vortex_array::compute::compare::Operator) -> vortex_error::VortexResult> - impl vortex_array::compute::is_constant::IsConstantKernel for vortex_decimal_byte_parts::DecimalBytePartsVTable pub fn vortex_decimal_byte_parts::DecimalBytePartsVTable::is_constant(&self, array: &vortex_decimal_byte_parts::DecimalBytePartsArray, opts: &vortex_array::compute::is_constant::IsConstantOpts) -> vortex_error::VortexResult> @@ -76,6 +72,10 @@ impl vortex_array::compute::mask::MaskKernel for vortex_decimal_byte_parts::Deci pub fn vortex_decimal_byte_parts::DecimalBytePartsVTable::mask(&self, array: &vortex_decimal_byte_parts::DecimalBytePartsArray, mask_array: &vortex_mask::Mask) -> vortex_error::VortexResult +impl vortex_array::expr::exprs::binary::compare::CompareKernel for vortex_decimal_byte_parts::DecimalBytePartsVTable + +pub fn vortex_decimal_byte_parts::DecimalBytePartsVTable::compare(array: &Self::Array, other: &dyn vortex_array::array::Array, operator: vortex_array::compute::compare::Operator, _ctx: &mut vortex_array::executor::ExecutionCtx) -> vortex_error::VortexResult> + impl vortex_array::expr::exprs::cast::kernel::CastReduce for vortex_decimal_byte_parts::DecimalBytePartsVTable pub fn vortex_decimal_byte_parts::DecimalBytePartsVTable::cast(array: &vortex_decimal_byte_parts::DecimalBytePartsArray, dtype: &vortex_dtype::dtype::DType) -> vortex_error::VortexResult> diff --git a/encodings/fastlanes/public-api.lock b/encodings/fastlanes/public-api.lock index d1fcdeef9db..0cd232e0e47 100644 --- a/encodings/fastlanes/public-api.lock +++ b/encodings/fastlanes/public-api.lock @@ -464,10 +464,6 @@ impl vortex_array::arrays::slice::SliceReduce for vortex_fastlanes::FoRVTable pub fn vortex_fastlanes::FoRVTable::slice(array: &Self::Array, range: core::ops::range::Range) -> vortex_error::VortexResult> -impl vortex_array::compute::compare::CompareKernel for vortex_fastlanes::FoRVTable - -pub fn vortex_fastlanes::FoRVTable::compare(&self, lhs: &vortex_fastlanes::FoRArray, rhs: &dyn vortex_array::array::Array, operator: vortex_array::compute::compare::Operator) -> vortex_error::VortexResult> - impl vortex_array::compute::is_constant::IsConstantKernel for vortex_fastlanes::FoRVTable pub fn vortex_fastlanes::FoRVTable::is_constant(&self, array: &vortex_fastlanes::FoRArray, opts: &vortex_array::compute::is_constant::IsConstantOpts) -> vortex_error::VortexResult> @@ -478,6 +474,10 @@ pub fn vortex_fastlanes::FoRVTable::is_sorted(&self, array: &vortex_fastlanes::F pub fn vortex_fastlanes::FoRVTable::is_strict_sorted(&self, array: &vortex_fastlanes::FoRArray) -> vortex_error::VortexResult> +impl vortex_array::expr::exprs::binary::compare::CompareKernel for vortex_fastlanes::FoRVTable + +pub fn vortex_fastlanes::FoRVTable::compare(array: &vortex_fastlanes::FoRArray, other: &dyn vortex_array::array::Array, operator: vortex_array::compute::compare::Operator, _ctx: &mut vortex_array::executor::ExecutionCtx) -> vortex_error::VortexResult> + impl vortex_array::expr::exprs::cast::kernel::CastReduce for vortex_fastlanes::FoRVTable pub fn vortex_fastlanes::FoRVTable::cast(array: &vortex_fastlanes::FoRArray, dtype: &vortex_dtype::dtype::DType) -> vortex_error::VortexResult> diff --git a/encodings/fsst/public-api.lock b/encodings/fsst/public-api.lock index dfefa5d92ec..1cbb204d1dd 100644 --- a/encodings/fsst/public-api.lock +++ b/encodings/fsst/public-api.lock @@ -104,9 +104,9 @@ impl vortex_array::arrays::slice::SliceReduce for vortex_fsst::FSSTVTable pub fn vortex_fsst::FSSTVTable::slice(array: &Self::Array, range: core::ops::range::Range) -> vortex_error::VortexResult> -impl vortex_array::compute::compare::CompareKernel for vortex_fsst::FSSTVTable +impl vortex_array::expr::exprs::binary::compare::CompareKernel for vortex_fsst::FSSTVTable -pub fn vortex_fsst::FSSTVTable::compare(&self, lhs: &vortex_fsst::FSSTArray, rhs: &dyn vortex_array::array::Array, operator: vortex_array::compute::compare::Operator) -> vortex_error::VortexResult> +pub fn vortex_fsst::FSSTVTable::compare(array: &vortex_fsst::FSSTArray, other: &dyn vortex_array::array::Array, operator: vortex_array::compute::compare::Operator, _ctx: &mut vortex_array::executor::ExecutionCtx) -> vortex_error::VortexResult> impl vortex_array::expr::exprs::cast::kernel::CastReduce for vortex_fsst::FSSTVTable diff --git a/encodings/runend/public-api.lock b/encodings/runend/public-api.lock index ea9bd0e14f6..44c286b1ca3 100644 --- a/encodings/runend/public-api.lock +++ b/encodings/runend/public-api.lock @@ -122,10 +122,6 @@ impl vortex_array::arrays::filter::kernel::FilterKernel for vortex_runend::RunEn pub fn vortex_runend::RunEndVTable::filter(array: &vortex_runend::RunEndArray, mask: &vortex_mask::Mask, _ctx: &mut vortex_array::executor::ExecutionCtx) -> vortex_error::VortexResult> -impl vortex_array::compute::compare::CompareKernel for vortex_runend::RunEndVTable - -pub fn vortex_runend::RunEndVTable::compare(&self, lhs: &vortex_runend::RunEndArray, rhs: &dyn vortex_array::array::Array, operator: vortex_array::compute::compare::Operator) -> vortex_error::VortexResult> - impl vortex_array::compute::is_constant::IsConstantKernel for vortex_runend::RunEndVTable pub fn vortex_runend::RunEndVTable::is_constant(&self, array: &Self::Array, opts: &vortex_array::compute::is_constant::IsConstantOpts) -> vortex_error::VortexResult> @@ -140,6 +136,10 @@ impl vortex_array::compute::min_max::MinMaxKernel for vortex_runend::RunEndVTabl pub fn vortex_runend::RunEndVTable::min_max(&self, array: &vortex_runend::RunEndArray) -> vortex_error::VortexResult> +impl vortex_array::expr::exprs::binary::compare::CompareKernel for vortex_runend::RunEndVTable + +pub fn vortex_runend::RunEndVTable::compare(array: &vortex_runend::RunEndArray, other: &dyn vortex_array::array::Array, operator: vortex_array::compute::compare::Operator, _ctx: &mut vortex_array::executor::ExecutionCtx) -> vortex_error::VortexResult> + impl vortex_array::expr::exprs::cast::kernel::CastReduce for vortex_runend::RunEndVTable pub fn vortex_runend::RunEndVTable::cast(array: &vortex_runend::RunEndArray, dtype: &vortex_dtype::dtype::DType) -> vortex_error::VortexResult> diff --git a/encodings/sequence/public-api.lock b/encodings/sequence/public-api.lock index 3a8034b3fad..a0ca040a86c 100644 --- a/encodings/sequence/public-api.lock +++ b/encodings/sequence/public-api.lock @@ -78,10 +78,6 @@ impl vortex_array::arrays::slice::SliceReduce for vortex_sequence::SequenceVTabl pub fn vortex_sequence::SequenceVTable::slice(array: &Self::Array, range: core::ops::range::Range) -> vortex_error::VortexResult> -impl vortex_array::compute::compare::CompareKernel for vortex_sequence::SequenceVTable - -pub fn vortex_sequence::SequenceVTable::compare(&self, lhs: &vortex_sequence::SequenceArray, rhs: &dyn vortex_array::array::Array, operator: vortex_array::compute::compare::Operator) -> vortex_error::VortexResult> - impl vortex_array::compute::is_sorted::IsSortedKernel for vortex_sequence::SequenceVTable pub fn vortex_sequence::SequenceVTable::is_sorted(&self, array: &vortex_sequence::SequenceArray) -> vortex_error::VortexResult> diff --git a/vortex-array/public-api.lock b/vortex-array/public-api.lock index c0d512c63ee..1edfa5ce965 100644 --- a/vortex-array/public-api.lock +++ b/vortex-array/public-api.lock @@ -98,10 +98,6 @@ impl vortex_array::arrays::TakeExecute for vortex_array::arrays::DictVTable pub fn vortex_array::arrays::DictVTable::take(array: &vortex_array::arrays::DictArray, indices: &dyn vortex_array::Array, _ctx: &mut vortex_array::ExecutionCtx) -> vortex_error::VortexResult> -impl vortex_array::compute::CompareKernel for vortex_array::arrays::DictVTable - -pub fn vortex_array::arrays::DictVTable::compare(&self, lhs: &vortex_array::arrays::DictArray, rhs: &dyn vortex_array::Array, operator: vortex_array::compute::Operator) -> vortex_error::VortexResult> - impl vortex_array::compute::IsConstantKernel for vortex_array::arrays::DictVTable pub fn vortex_array::arrays::DictVTable::is_constant(&self, array: &vortex_array::arrays::DictArray, opts: &vortex_array::compute::IsConstantOpts) -> vortex_error::VortexResult> @@ -124,6 +120,10 @@ impl vortex_array::expr::CastReduce for vortex_array::arrays::DictVTable pub fn vortex_array::arrays::DictVTable::cast(array: &vortex_array::arrays::DictArray, dtype: &vortex_dtype::dtype::DType) -> vortex_error::VortexResult> +impl vortex_array::expr::CompareKernel for vortex_array::arrays::DictVTable + +pub fn vortex_array::arrays::DictVTable::compare(array: &vortex_array::arrays::DictArray, other: &dyn vortex_array::Array, operator: vortex_array::compute::Operator, _ctx: &mut vortex_array::ExecutionCtx) -> vortex_error::VortexResult> + impl vortex_array::expr::FillNullKernel for vortex_array::arrays::DictVTable pub fn vortex_array::arrays::DictVTable::fill_null(array: &vortex_array::arrays::DictArray, fill_value: &vortex_scalar::scalar::Scalar, _ctx: &mut vortex_array::ExecutionCtx) -> vortex_error::VortexResult> @@ -520,10 +520,6 @@ impl vortex_array::arrays::TakeExecute for vortex_array::arrays::ChunkedVTable pub fn vortex_array::arrays::ChunkedVTable::take(array: &vortex_array::arrays::ChunkedArray, indices: &dyn vortex_array::Array, _ctx: &mut vortex_array::ExecutionCtx) -> vortex_error::VortexResult> -impl vortex_array::compute::CompareKernel for vortex_array::arrays::ChunkedVTable - -pub fn vortex_array::arrays::ChunkedVTable::compare(&self, lhs: &vortex_array::arrays::ChunkedArray, rhs: &dyn vortex_array::Array, operator: vortex_array::compute::Operator) -> vortex_error::VortexResult> - impl vortex_array::compute::IsConstantKernel for vortex_array::arrays::ChunkedVTable pub fn vortex_array::arrays::ChunkedVTable::is_constant(&self, array: &vortex_array::arrays::ChunkedArray, opts: &vortex_array::compute::IsConstantOpts) -> vortex_error::VortexResult> @@ -690,10 +686,6 @@ impl vortex_array::arrays::TakeReduce for vortex_array::arrays::ConstantVTable pub fn vortex_array::arrays::ConstantVTable::take(array: &vortex_array::arrays::ConstantArray, indices: &dyn vortex_array::Array) -> vortex_error::VortexResult> -impl vortex_array::compute::CompareKernel for vortex_array::arrays::ConstantVTable - -pub fn vortex_array::arrays::ConstantVTable::compare(&self, lhs: &vortex_array::arrays::ConstantArray, rhs: &dyn vortex_array::Array, operator: vortex_array::compute::Operator) -> vortex_error::VortexResult> - impl vortex_array::compute::MaskKernel for vortex_array::arrays::ConstantVTable pub fn vortex_array::arrays::ConstantVTable::mask(&self, array: &vortex_array::arrays::ConstantArray, mask: &vortex_mask::Mask) -> vortex_error::VortexResult @@ -1114,10 +1106,6 @@ impl vortex_array::arrays::TakeExecute for vortex_array::arrays::DictVTable pub fn vortex_array::arrays::DictVTable::take(array: &vortex_array::arrays::DictArray, indices: &dyn vortex_array::Array, _ctx: &mut vortex_array::ExecutionCtx) -> vortex_error::VortexResult> -impl vortex_array::compute::CompareKernel for vortex_array::arrays::DictVTable - -pub fn vortex_array::arrays::DictVTable::compare(&self, lhs: &vortex_array::arrays::DictArray, rhs: &dyn vortex_array::Array, operator: vortex_array::compute::Operator) -> vortex_error::VortexResult> - impl vortex_array::compute::IsConstantKernel for vortex_array::arrays::DictVTable pub fn vortex_array::arrays::DictVTable::is_constant(&self, array: &vortex_array::arrays::DictArray, opts: &vortex_array::compute::IsConstantOpts) -> vortex_error::VortexResult> @@ -1140,6 +1128,10 @@ impl vortex_array::expr::CastReduce for vortex_array::arrays::DictVTable pub fn vortex_array::arrays::DictVTable::cast(array: &vortex_array::arrays::DictArray, dtype: &vortex_dtype::dtype::DType) -> vortex_error::VortexResult> +impl vortex_array::expr::CompareKernel for vortex_array::arrays::DictVTable + +pub fn vortex_array::arrays::DictVTable::compare(array: &vortex_array::arrays::DictArray, other: &dyn vortex_array::Array, operator: vortex_array::compute::Operator, _ctx: &mut vortex_array::ExecutionCtx) -> vortex_error::VortexResult> + impl vortex_array::expr::FillNullKernel for vortex_array::arrays::DictVTable pub fn vortex_array::arrays::DictVTable::fill_null(array: &vortex_array::arrays::DictArray, fill_value: &vortex_scalar::scalar::Scalar, _ctx: &mut vortex_array::ExecutionCtx) -> vortex_error::VortexResult> @@ -1304,10 +1296,6 @@ impl vortex_array::arrays::TakeExecute for vortex_array::arrays::ExtensionVTable pub fn vortex_array::arrays::ExtensionVTable::take(array: &vortex_array::arrays::ExtensionArray, indices: &dyn vortex_array::Array, _ctx: &mut vortex_array::ExecutionCtx) -> vortex_error::VortexResult> -impl vortex_array::compute::CompareKernel for vortex_array::arrays::ExtensionVTable - -pub fn vortex_array::arrays::ExtensionVTable::compare(&self, lhs: &vortex_array::arrays::ExtensionArray, rhs: &dyn vortex_array::Array, operator: vortex_array::compute::Operator) -> vortex_error::VortexResult> - impl vortex_array::compute::IsConstantKernel for vortex_array::arrays::ExtensionVTable pub fn vortex_array::arrays::ExtensionVTable::is_constant(&self, array: &vortex_array::arrays::ExtensionArray, opts: &vortex_array::compute::IsConstantOpts) -> vortex_error::VortexResult> @@ -1334,6 +1322,10 @@ impl vortex_array::expr::CastReduce for vortex_array::arrays::ExtensionVTable pub fn vortex_array::arrays::ExtensionVTable::cast(array: &vortex_array::arrays::ExtensionArray, dtype: &vortex_dtype::dtype::DType) -> vortex_error::VortexResult> +impl vortex_array::expr::CompareKernel for vortex_array::arrays::ExtensionVTable + +pub fn vortex_array::arrays::ExtensionVTable::compare(array: &vortex_array::arrays::ExtensionArray, other: &dyn vortex_array::Array, operator: vortex_array::compute::Operator, _ctx: &mut vortex_array::ExecutionCtx) -> vortex_error::VortexResult> + impl vortex_array::vtable::BaseArrayVTable for vortex_array::arrays::ExtensionVTable pub fn vortex_array::arrays::ExtensionVTable::array_eq(array: &vortex_array::arrays::ExtensionArray, other: &vortex_array::arrays::ExtensionArray, precision: vortex_array::Precision) -> bool @@ -2124,10 +2116,6 @@ impl vortex_array::arrays::TakeExecute for vortex_array::arrays::MaskedVTable pub fn vortex_array::arrays::MaskedVTable::take(array: &vortex_array::arrays::MaskedArray, indices: &dyn vortex_array::Array, _ctx: &mut vortex_array::ExecutionCtx) -> vortex_error::VortexResult> -impl vortex_array::compute::CompareKernel for vortex_array::arrays::MaskedVTable - -pub fn vortex_array::arrays::MaskedVTable::compare(&self, lhs: &vortex_array::arrays::MaskedArray, rhs: &dyn vortex_array::Array, operator: vortex_array::compute::Operator) -> vortex_error::VortexResult> - impl vortex_array::compute::MaskKernel for vortex_array::arrays::MaskedVTable pub fn vortex_array::arrays::MaskedVTable::mask(&self, array: &vortex_array::arrays::MaskedArray, mask_arg: &vortex_mask::Mask) -> vortex_error::VortexResult @@ -3492,10 +3480,6 @@ impl vortex_array::arrays::TakeExecute for vortex_array::arrays::VarBinVTable pub fn vortex_array::arrays::VarBinVTable::take(array: &vortex_array::arrays::VarBinArray, indices: &dyn vortex_array::Array, _ctx: &mut vortex_array::ExecutionCtx) -> vortex_error::VortexResult> -impl vortex_array::compute::CompareKernel for vortex_array::arrays::VarBinVTable - -pub fn vortex_array::arrays::VarBinVTable::compare(&self, lhs: &vortex_array::arrays::VarBinArray, rhs: &dyn vortex_array::Array, operator: vortex_array::compute::Operator) -> vortex_error::VortexResult> - impl vortex_array::compute::IsConstantKernel for vortex_array::arrays::VarBinVTable pub fn vortex_array::arrays::VarBinVTable::is_constant(&self, array: &vortex_array::arrays::VarBinArray, opts: &vortex_array::compute::IsConstantOpts) -> vortex_error::VortexResult> @@ -3518,6 +3502,10 @@ impl vortex_array::expr::CastReduce for vortex_array::arrays::VarBinVTable pub fn vortex_array::arrays::VarBinVTable::cast(array: &vortex_array::arrays::VarBinArray, dtype: &vortex_dtype::dtype::DType) -> vortex_error::VortexResult> +impl vortex_array::expr::CompareKernel for vortex_array::arrays::VarBinVTable + +pub fn vortex_array::arrays::VarBinVTable::compare(array: &vortex_array::arrays::VarBinArray, other: &dyn vortex_array::Array, operator: vortex_array::compute::Operator, _ctx: &mut vortex_array::ExecutionCtx) -> vortex_error::VortexResult> + impl vortex_array::vtable::BaseArrayVTable for vortex_array::arrays::VarBinVTable pub fn vortex_array::arrays::VarBinVTable::array_eq(array: &vortex_array::arrays::VarBinArray, other: &vortex_array::arrays::VarBinArray, precision: vortex_array::Precision) -> bool @@ -5526,10 +5514,6 @@ impl core::marker::Copy for vortex_array::compute::Operator impl core::marker::StructuralPartialEq for vortex_array::compute::Operator -impl vortex_array::compute::Options for vortex_array::compute::Operator - -pub fn vortex_array::compute::Operator::as_any(&self) -> &dyn core::any::Any - pub enum vortex_array::compute::Output pub vortex_array::compute::Output::Array(vortex_array::ArrayRef) @@ -5690,24 +5674,6 @@ pub type vortex_array::expr::CastReduceAdaptor::Parent = vortex_array::arrays pub fn vortex_array::expr::CastReduceAdaptor::reduce_parent(&self, array: &::Array, parent: vortex_array::arrays::ScalarFnArrayView<'_, vortex_array::expr::Cast>, _child_idx: usize) -> vortex_error::VortexResult> -pub struct vortex_array::compute::CompareKernelAdapter(pub V) - -impl vortex_array::compute::CompareKernelAdapter - -pub const fn vortex_array::compute::CompareKernelAdapter::lift(&'static self) -> vortex_array::compute::CompareKernelRef - -impl core::fmt::Debug for vortex_array::compute::CompareKernelAdapter - -pub fn vortex_array::compute::CompareKernelAdapter::fmt(&self, f: &mut core::fmt::Formatter<'_>) -> core::fmt::Result - -impl vortex_array::compute::Kernel for vortex_array::compute::CompareKernelAdapter - -pub fn vortex_array::compute::CompareKernelAdapter::invoke(&self, args: &vortex_array::compute::InvocationArgs<'_>) -> vortex_error::VortexResult> - -pub struct vortex_array::compute::CompareKernelRef(_) - -impl inventory::Collect for vortex_array::compute::CompareKernelRef - pub struct vortex_array::compute::ComputeFn impl vortex_array::compute::ComputeFn @@ -6178,34 +6144,6 @@ impl vortex_array::expr::CastReduce for vortex_array::arrays::VarBinViewVTable pub fn vortex_array::arrays::VarBinViewVTable::cast(array: &vortex_array::arrays::VarBinViewArray, dtype: &vortex_dtype::dtype::DType) -> vortex_error::VortexResult> -pub trait vortex_array::compute::CompareKernel: vortex_array::vtable::VTable - -pub fn vortex_array::compute::CompareKernel::compare(&self, lhs: &Self::Array, rhs: &dyn vortex_array::Array, operator: vortex_array::compute::Operator) -> vortex_error::VortexResult> - -impl vortex_array::compute::CompareKernel for vortex_array::arrays::ChunkedVTable - -pub fn vortex_array::arrays::ChunkedVTable::compare(&self, lhs: &vortex_array::arrays::ChunkedArray, rhs: &dyn vortex_array::Array, operator: vortex_array::compute::Operator) -> vortex_error::VortexResult> - -impl vortex_array::compute::CompareKernel for vortex_array::arrays::ConstantVTable - -pub fn vortex_array::arrays::ConstantVTable::compare(&self, lhs: &vortex_array::arrays::ConstantArray, rhs: &dyn vortex_array::Array, operator: vortex_array::compute::Operator) -> vortex_error::VortexResult> - -impl vortex_array::compute::CompareKernel for vortex_array::arrays::DictVTable - -pub fn vortex_array::arrays::DictVTable::compare(&self, lhs: &vortex_array::arrays::DictArray, rhs: &dyn vortex_array::Array, operator: vortex_array::compute::Operator) -> vortex_error::VortexResult> - -impl vortex_array::compute::CompareKernel for vortex_array::arrays::ExtensionVTable - -pub fn vortex_array::arrays::ExtensionVTable::compare(&self, lhs: &vortex_array::arrays::ExtensionArray, rhs: &dyn vortex_array::Array, operator: vortex_array::compute::Operator) -> vortex_error::VortexResult> - -impl vortex_array::compute::CompareKernel for vortex_array::arrays::MaskedVTable - -pub fn vortex_array::arrays::MaskedVTable::compare(&self, lhs: &vortex_array::arrays::MaskedArray, rhs: &dyn vortex_array::Array, operator: vortex_array::compute::Operator) -> vortex_error::VortexResult> - -impl vortex_array::compute::CompareKernel for vortex_array::arrays::VarBinVTable - -pub fn vortex_array::arrays::VarBinVTable::compare(&self, lhs: &vortex_array::arrays::VarBinArray, rhs: &dyn vortex_array::Array, operator: vortex_array::compute::Operator) -> vortex_error::VortexResult> - pub trait vortex_array::compute::ComputeFnVTable: 'static + core::marker::Send + core::marker::Sync pub fn vortex_array::compute::ComputeFnVTable::invoke(&self, args: &vortex_array::compute::InvocationArgs<'_>, kernels: &[arcref::ArcRef]) -> vortex_error::VortexResult @@ -6406,10 +6344,6 @@ impl vor pub fn vortex_array::compute::BetweenKernelAdapter::invoke(&self, args: &vortex_array::compute::InvocationArgs<'_>) -> vortex_error::VortexResult> -impl vortex_array::compute::Kernel for vortex_array::compute::CompareKernelAdapter - -pub fn vortex_array::compute::CompareKernelAdapter::invoke(&self, args: &vortex_array::compute::InvocationArgs<'_>) -> vortex_error::VortexResult> - impl vortex_array::compute::Kernel for vortex_array::compute::IsConstantKernelAdapter pub fn vortex_array::compute::IsConstantKernelAdapter::invoke(&self, args: &vortex_array::compute::InvocationArgs<'_>) -> vortex_error::VortexResult> @@ -6618,10 +6552,6 @@ impl vortex_array::compute::Options for vortex_array::compute::LikeOptions pub fn vortex_array::compute::LikeOptions::as_any(&self) -> &dyn core::any::Any -impl vortex_array::compute::Options for vortex_array::compute::Operator - -pub fn vortex_array::compute::Operator::as_any(&self) -> &dyn core::any::Any - impl vortex_array::compute::Options for vortex_scalar::typed_view::primitive::numeric_operator::NumericOperator pub fn vortex_scalar::typed_view::primitive::numeric_operator::NumericOperator::as_any(&self) -> &dyn core::any::Any @@ -7892,6 +7822,8 @@ pub fn vortex_array::expr::Operator::inverse(self) -> core::option::Option pub fn vortex_array::expr::Operator::is_arithmetic(&self) -> bool +pub fn vortex_array::expr::Operator::is_comparison(&self) -> bool + pub fn vortex_array::expr::Operator::logical_inverse(self) -> core::option::Option pub fn vortex_array::expr::Operator::maybe_cmp_operator(self) -> core::option::Option @@ -8082,6 +8014,22 @@ pub type vortex_array::expr::CastReduceAdaptor::Parent = vortex_array::arrays pub fn vortex_array::expr::CastReduceAdaptor::reduce_parent(&self, array: &::Array, parent: vortex_array::arrays::ScalarFnArrayView<'_, vortex_array::expr::Cast>, _child_idx: usize) -> vortex_error::VortexResult> +pub struct vortex_array::expr::CompareExecuteAdaptor(pub V) + +impl core::default::Default for vortex_array::expr::CompareExecuteAdaptor + +pub fn vortex_array::expr::CompareExecuteAdaptor::default() -> vortex_array::expr::CompareExecuteAdaptor + +impl core::fmt::Debug for vortex_array::expr::CompareExecuteAdaptor + +pub fn vortex_array::expr::CompareExecuteAdaptor::fmt(&self, f: &mut core::fmt::Formatter<'_>) -> core::fmt::Result + +impl vortex_array::kernel::ExecuteParentKernel for vortex_array::expr::CompareExecuteAdaptor where V: vortex_array::expr::CompareKernel + +pub type vortex_array::expr::CompareExecuteAdaptor::Parent = vortex_array::arrays::ExactScalarFn + +pub fn vortex_array::expr::CompareExecuteAdaptor::execute_parent(&self, array: &::Array, parent: vortex_array::arrays::ScalarFnArrayView<'_, vortex_array::expr::Binary>, child_idx: usize, ctx: &mut vortex_array::ExecutionCtx) -> vortex_error::VortexResult> + pub struct vortex_array::expr::DynamicComparison impl vortex_array::expr::VTable for vortex_array::expr::DynamicComparison @@ -9024,6 +8972,22 @@ impl vortex_array::expr::CastReduce for vortex_array::arrays::VarBinViewVTable pub fn vortex_array::arrays::VarBinViewVTable::cast(array: &vortex_array::arrays::VarBinViewArray, dtype: &vortex_dtype::dtype::DType) -> vortex_error::VortexResult> +pub trait vortex_array::expr::CompareKernel: vortex_array::vtable::VTable + +pub fn vortex_array::expr::CompareKernel::compare(array: &Self::Array, other: &dyn vortex_array::Array, operator: vortex_array::compute::Operator, ctx: &mut vortex_array::ExecutionCtx) -> vortex_error::VortexResult> + +impl vortex_array::expr::CompareKernel for vortex_array::arrays::DictVTable + +pub fn vortex_array::arrays::DictVTable::compare(array: &vortex_array::arrays::DictArray, other: &dyn vortex_array::Array, operator: vortex_array::compute::Operator, _ctx: &mut vortex_array::ExecutionCtx) -> vortex_error::VortexResult> + +impl vortex_array::expr::CompareKernel for vortex_array::arrays::ExtensionVTable + +pub fn vortex_array::arrays::ExtensionVTable::compare(array: &vortex_array::arrays::ExtensionArray, other: &dyn vortex_array::Array, operator: vortex_array::compute::Operator, _ctx: &mut vortex_array::ExecutionCtx) -> vortex_error::VortexResult> + +impl vortex_array::expr::CompareKernel for vortex_array::arrays::VarBinVTable + +pub fn vortex_array::arrays::VarBinVTable::compare(array: &vortex_array::arrays::VarBinArray, other: &dyn vortex_array::Array, operator: vortex_array::compute::Operator, _ctx: &mut vortex_array::ExecutionCtx) -> vortex_error::VortexResult> + pub trait vortex_array::expr::DynExprVTable: 'static + core::marker::Send + core::marker::Sync + vortex_array::expr::vtable::private::Sealed pub fn vortex_array::expr::DynExprVTable::arity(&self, options: &dyn core::any::Any) -> vortex_array::expr::Arity @@ -9948,6 +9912,12 @@ pub type vortex_array::expr::CastExecuteAdaptor::Parent = vortex_array::array pub fn vortex_array::expr::CastExecuteAdaptor::execute_parent(&self, array: &::Array, parent: ::Match, _child_idx: usize, ctx: &mut vortex_array::ExecutionCtx) -> vortex_error::VortexResult> +impl vortex_array::kernel::ExecuteParentKernel for vortex_array::expr::CompareExecuteAdaptor where V: vortex_array::expr::CompareKernel + +pub type vortex_array::expr::CompareExecuteAdaptor::Parent = vortex_array::arrays::ExactScalarFn + +pub fn vortex_array::expr::CompareExecuteAdaptor::execute_parent(&self, array: &::Array, parent: vortex_array::arrays::ScalarFnArrayView<'_, vortex_array::expr::Binary>, child_idx: usize, ctx: &mut vortex_array::ExecutionCtx) -> vortex_error::VortexResult> + impl vortex_array::kernel::ExecuteParentKernel for vortex_array::expr::FillNullExecuteAdaptor where V: vortex_array::expr::FillNullKernel pub type vortex_array::expr::FillNullExecuteAdaptor::Parent = vortex_array::arrays::ExactScalarFn From 1f5f8da6c62f23b0fe60e3464139ce49f79723a1 Mon Sep 17 00:00:00 2001 From: Joe Isaacs Date: Mon, 16 Feb 2026 11:38:16 +0000 Subject: [PATCH 07/20] u Signed-off-by: Joe Isaacs --- vortex-array/benches/compare.rs | 21 +++++++++++++++++---- 1 file changed, 17 insertions(+), 4 deletions(-) diff --git a/vortex-array/benches/compare.rs b/vortex-array/benches/compare.rs index b6830ebf9e2..34b05edd34b 100644 --- a/vortex-array/benches/compare.rs +++ b/vortex-array/benches/compare.rs @@ -8,11 +8,14 @@ use rand::Rng; use rand::SeedableRng; use rand::distr::Uniform; use rand::prelude::StdRng; +use vortex_array::Canonical; use vortex_array::IntoArray; +use vortex_array::VortexSessionExecute; use vortex_array::arrays::BoolArray; use vortex_array::compute::Operator; use vortex_array::compute::compare; use vortex_buffer::Buffer; +use vortex_session::VortexSession; fn main() { divan::main(); @@ -27,10 +30,15 @@ fn compare_bool(bencher: Bencher) { let arr1 = BoolArray::from_iter((0..ARRAY_SIZE).map(|_| rng.sample(range) == 0)).into_array(); let arr2 = BoolArray::from_iter((0..ARRAY_SIZE).map(|_| rng.sample(range) == 0)).into_array(); + let session = VortexSession::empty(); bencher - .with_inputs(|| (&arr1, &arr2)) - .bench_refs(|(arr1, arr2)| compare(*arr1, *arr2, Operator::Gte).unwrap()); + .with_inputs(|| (&arr1, &arr2, &mut session.create_execution_ctx())) + .bench_refs(|(arr1, arr2, ctx)| { + compare(*arr1, *arr2, Operator::Gte) + .unwrap() + .execute::(ctx) + }); } #[divan::bench] @@ -47,8 +55,13 @@ fn compare_int(bencher: Bencher) { .map(|_| rng.sample(range)) .collect::>() .into_array(); + let session = VortexSession::empty(); bencher - .with_inputs(|| (&arr1, &arr2)) - .bench_refs(|(arr1, arr2)| compare(*arr1, *arr2, Operator::Gte).unwrap()); + .with_inputs(|| (&arr1, &arr2, &mut session.create_execution_ctx())) + .bench_refs(|(arr1, arr2, ctx)| { + compare(*arr1, *arr2, Operator::Gte) + .unwrap() + .execute::(ctx) + }); } From af70507d9bc946d83eb603bd95b76daf78e88792 Mon Sep 17 00:00:00 2001 From: Joe Isaacs Date: Mon, 16 Feb 2026 11:38:46 +0000 Subject: [PATCH 08/20] u Signed-off-by: Joe Isaacs --- vortex-array/benches/compare.rs | 12 ++++++------ 1 file changed, 6 insertions(+), 6 deletions(-) diff --git a/vortex-array/benches/compare.rs b/vortex-array/benches/compare.rs index 34b05edd34b..64e2ca2e098 100644 --- a/vortex-array/benches/compare.rs +++ b/vortex-array/benches/compare.rs @@ -33,11 +33,11 @@ fn compare_bool(bencher: Bencher) { let session = VortexSession::empty(); bencher - .with_inputs(|| (&arr1, &arr2, &mut session.create_execution_ctx())) - .bench_refs(|(arr1, arr2, ctx)| { + .with_inputs(|| (&arr1, &arr2, session.create_execution_ctx())) + .bench_refs(|(arr1, arr2, mut ctx)| { compare(*arr1, *arr2, Operator::Gte) .unwrap() - .execute::(ctx) + .execute::(&mut ctx) }); } @@ -58,10 +58,10 @@ fn compare_int(bencher: Bencher) { let session = VortexSession::empty(); bencher - .with_inputs(|| (&arr1, &arr2, &mut session.create_execution_ctx())) - .bench_refs(|(arr1, arr2, ctx)| { + .with_inputs(|| (&arr1, &arr2, session.create_execution_ctx())) + .bench_refs(|(arr1, arr2, mut ctx)| { compare(*arr1, *arr2, Operator::Gte) .unwrap() - .execute::(ctx) + .execute::(&mut ctx) }); } From 61e90c5a092cf76001edf559e4ce09fa9f92e6c2 Mon Sep 17 00:00:00 2001 From: Joe Isaacs Date: Mon, 16 Feb 2026 11:43:43 +0000 Subject: [PATCH 09/20] u Signed-off-by: Joe Isaacs --- vortex-array/src/arrays/bool/compute/rules.rs | 2 +- .../src/arrays/decimal/compute/rules.rs | 2 +- .../src/arrays/primitive/compute/rules.rs | 2 +- vortex-array/src/validity.rs | 17 ++++++++--------- 4 files changed, 11 insertions(+), 12 deletions(-) diff --git a/vortex-array/src/arrays/bool/compute/rules.rs b/vortex-array/src/arrays/bool/compute/rules.rs index d885804f89c..ab3f5ef50b9 100644 --- a/vortex-array/src/arrays/bool/compute/rules.rs +++ b/vortex-array/src/arrays/bool/compute/rules.rs @@ -46,7 +46,7 @@ impl ArrayParentReduceRule for BoolMaskedValidityRule { Ok(Some( BoolArray::new( array.to_bit_buffer(), - array.validity().clone().and(parent.validity().clone()), + array.validity().clone().and(parent.validity().clone())?, ) .into_array(), )) diff --git a/vortex-array/src/arrays/decimal/compute/rules.rs b/vortex-array/src/arrays/decimal/compute/rules.rs index f60e2f7519c..70130bf3e16 100644 --- a/vortex-array/src/arrays/decimal/compute/rules.rs +++ b/vortex-array/src/arrays/decimal/compute/rules.rs @@ -48,7 +48,7 @@ impl ArrayParentReduceRule for DecimalMaskedValidityRule { DecimalArray::new_unchecked( array.buffer::(), array.decimal_dtype(), - array.validity().clone().and(parent.validity().clone()), + array.validity().clone().and(parent.validity().clone())?, ) } .into_array() diff --git a/vortex-array/src/arrays/primitive/compute/rules.rs b/vortex-array/src/arrays/primitive/compute/rules.rs index e2bcc53327f..9bf20fd5cf6 100644 --- a/vortex-array/src/arrays/primitive/compute/rules.rs +++ b/vortex-array/src/arrays/primitive/compute/rules.rs @@ -44,7 +44,7 @@ impl ArrayParentReduceRule for PrimitiveMaskedValidityRule { PrimitiveArray::new_unchecked_from_handle( array.buffer_handle().clone(), array.ptype(), - array.validity().clone().and(parent.validity().clone()), + array.validity().clone().and(parent.validity().clone())?, ) } .into_array() diff --git a/vortex-array/src/validity.rs b/vortex-array/src/validity.rs index 91060f3f8e8..5e0ac432c56 100644 --- a/vortex-array/src/validity.rs +++ b/vortex-array/src/validity.rs @@ -28,8 +28,11 @@ use crate::IntoArray; use crate::ToCanonical; use crate::arrays::BoolArray; use crate::arrays::ConstantArray; +use crate::arrays::ScalarFnArrayExt; use crate::builtins::ArrayBuiltins; use crate::compute::sum; +use crate::expr::Binary; +use crate::expr::Operator; use crate::patches::Patches; /// Validity information for an array @@ -263,8 +266,8 @@ impl Validity { /// Logically & two Validity values of the same length #[inline] - pub fn and(self, rhs: Validity) -> Validity { - match (self, rhs) { + pub fn and(self, rhs: Validity) -> VortexResult { + Ok(match (self, rhs) { // Should be pretty clear (Validity::NonNullable, Validity::NonNullable) => Validity::NonNullable, // Any `AllInvalid` makes the output all invalid values @@ -280,15 +283,11 @@ impl Validity { | (Validity::AllValid, Validity::AllValid) => Validity::AllValid, // Here we actually have to do some work (Validity::Array(lhs), Validity::Array(rhs)) => { - let lhs = lhs.to_bool(); - let rhs = rhs.to_bool(); + let conj = Binary.try_new_array(lhs.len(), Operator::And, [lhs, rhs])?; - let lhs = lhs.to_bit_buffer(); - let rhs = rhs.to_bit_buffer(); - - Validity::from(lhs.bitand(rhs)) + Validity::from(conj) } - } + }) } pub fn patch( From fc1973c5d6b29b08813eec08eb09502e30284d2f Mon Sep 17 00:00:00 2001 From: Joe Isaacs Date: Mon, 16 Feb 2026 13:25:30 +0000 Subject: [PATCH 10/20] u Signed-off-by: Joe Isaacs --- encodings/alp/src/alp/compute/compare.rs | 12 ++++----- .../datetime-parts/src/compute/compare.rs | 20 +++++++------- .../src/decimal_byte_parts/compute/compare.rs | 24 ++++++++--------- .../fastlanes/src/for/compute/compare.rs | 10 +++---- encodings/fsst/src/compute/compare.rs | 12 +++------ encodings/runend/src/compute/compare.rs | 16 +++++------ vortex-array/benches/compare.rs | 12 ++++----- .../src/arrays/dict/compute/compare.rs | 16 +++++------ .../src/arrays/extension/compute/compare.rs | 14 +++++----- .../src/arrays/varbin/compute/compare.rs | 27 ++++++++----------- vortex-array/src/expr/exprs/binary/compare.rs | 4 +-- vortex-array/src/validity.rs | 5 +--- 12 files changed, 79 insertions(+), 93 deletions(-) diff --git a/encodings/alp/src/alp/compute/compare.rs b/encodings/alp/src/alp/compute/compare.rs index 13c0eba4d03..e90aa3cd81a 100644 --- a/encodings/alp/src/alp/compute/compare.rs +++ b/encodings/alp/src/alp/compute/compare.rs @@ -26,21 +26,21 @@ use crate::match_each_alp_float_ptype; impl CompareKernel for ALPVTable { fn compare( - array: &ALPArray, - other: &dyn Array, + lhs: &ALPArray, + rhs: &dyn Array, operator: Operator, _ctx: &mut ExecutionCtx, ) -> VortexResult> { - if array.patches().is_some() { + if lhs.patches().is_some() { // TODO(joe): support patches return Ok(None); } - if array.dtype().is_nullable() || other.dtype().is_nullable() { + if lhs.dtype().is_nullable() || rhs.dtype().is_nullable() { // TODO(joe): support nullability return Ok(None); } - if let Some(const_scalar) = other.as_constant() { + if let Some(const_scalar) = rhs.as_constant() { let pscalar = const_scalar.as_primitive_opt().ok_or_else(|| { vortex_err!( "ALP Compare RHS had the wrong type {}, expected {}", @@ -51,7 +51,7 @@ impl CompareKernel for ALPVTable { match_each_alp_float_ptype!(pscalar.ptype(), |T| { match pscalar.typed_value::() { - Some(value) => return alp_scalar_compare(array, value, operator), + Some(value) => return alp_scalar_compare(lhs, value, operator), None => vortex_bail!( "Failed to convert scalar {:?} to ALP type {:?}", pscalar, diff --git a/encodings/datetime-parts/src/compute/compare.rs b/encodings/datetime-parts/src/compute/compare.rs index b1462fc897b..77188e6515e 100644 --- a/encodings/datetime-parts/src/compute/compare.rs +++ b/encodings/datetime-parts/src/compute/compare.rs @@ -24,12 +24,12 @@ use crate::timestamp; impl CompareKernel for DateTimePartsVTable { fn compare( - array: &DateTimePartsArray, - other: &dyn Array, + lhs: &DateTimePartsArray, + rhs: &dyn Array, operator: Operator, _ctx: &mut ExecutionCtx, ) -> VortexResult> { - let Some(rhs_const) = other.as_constant() else { + let Some(rhs_const) = rhs.as_constant() else { return Ok(None); }; let Some(timestamp) = rhs_const @@ -45,7 +45,7 @@ impl CompareKernel for DateTimePartsVTable { return Ok(None); }; - let nullability = array.dtype().nullability() | other.dtype().nullability(); + let nullability = lhs.dtype().nullability() | rhs.dtype().nullability(); let Some(options) = ext_dtype.metadata_opt::() else { return Ok(None); @@ -53,17 +53,17 @@ impl CompareKernel for DateTimePartsVTable { let ts_parts = timestamp::split(timestamp, options.unit)?; match operator { - Operator::Eq => compare_eq(array, &ts_parts, nullability), - Operator::NotEq => compare_ne(array, &ts_parts, nullability), + Operator::Eq => compare_eq(lhs, &ts_parts, nullability), + Operator::NotEq => compare_ne(lhs, &ts_parts, nullability), // lt and lte have identical behavior, as we optimize // for the case that all days on the lhs are smaller. // If that special case is not hit, we return `Ok(None)` to // signal that the comparison wasn't handled within dtp. - Operator::Lt => compare_lt(array, &ts_parts, nullability), - Operator::Lte => compare_lt(array, &ts_parts, nullability), + Operator::Lt => compare_lt(lhs, &ts_parts, nullability), + Operator::Lte => compare_lt(lhs, &ts_parts, nullability), // (Like for lt, lte) - Operator::Gt => compare_gt(array, &ts_parts, nullability), - Operator::Gte => compare_gt(array, &ts_parts, nullability), + Operator::Gt => compare_gt(lhs, &ts_parts, nullability), + Operator::Gte => compare_gt(lhs, &ts_parts, nullability), } } } diff --git a/encodings/decimal-byte-parts/src/decimal_byte_parts/compute/compare.rs b/encodings/decimal-byte-parts/src/decimal_byte_parts/compute/compare.rs index af2b6342fa3..94d35a29506 100644 --- a/encodings/decimal-byte-parts/src/decimal_byte_parts/compute/compare.rs +++ b/encodings/decimal-byte-parts/src/decimal_byte_parts/compute/compare.rs @@ -27,17 +27,17 @@ use crate::decimal_byte_parts::compute::compare::Sign::Positive; impl CompareKernel for DecimalBytePartsVTable { fn compare( - array: &Self::Array, - other: &dyn Array, + lhs: &Self::Array, + rhs: &dyn Array, operator: Operator, _ctx: &mut ExecutionCtx, ) -> VortexResult> { - let Some(rhs_const) = other.as_constant() else { + let Some(rhs_const) = rhs.as_constant() else { return Ok(None); }; - let nullability = array.dtype.nullability() | other.dtype().nullability(); - let scalar_type = array.msp.dtype().with_nullability(nullability); + let nullability = lhs.dtype.nullability() | rhs.dtype().nullability(); + let scalar_type = lhs.msp.dtype().with_nullability(nullability); if rhs_const.is_null() { return Ok(None); @@ -48,14 +48,12 @@ impl CompareKernel for DecimalBytePartsVTable { .decimal_value() .vortex_expect("RHS is not null"); - match decimal_value_wrapper_to_primitive( - rhs_decimal, - array.msp.as_primitive_typed().ptype(), - ) { + match decimal_value_wrapper_to_primitive(rhs_decimal, lhs.msp.as_primitive_typed().ptype()) + { Ok(value) => { let encoded_scalar = Scalar::try_new(scalar_type, Some(value))?; - let encoded_const = ConstantArray::new(encoded_scalar, other.len()); - compare(&array.msp, &encoded_const.to_array(), operator).map(Some) + let encoded_const = ConstantArray::new(encoded_scalar, rhs.len()); + compare(&lhs.msp, &encoded_const.to_array(), operator).map(Some) } Err(sign) => { @@ -64,11 +62,11 @@ impl CompareKernel for DecimalBytePartsVTable { // (depending on the `sign`) than all values in MSP. // If the LHS or the RHS contain nulls, then we must fallback to the canonicalized // implementation which does null-checking instead. - if array.all_valid()? && other.all_valid()? { + if lhs.all_valid()? && rhs.all_valid()? { Ok(Some( ConstantArray::new( unconvertible_value(sign, operator, nullability), - array.len(), + lhs.len(), ) .to_array(), )) diff --git a/encodings/fastlanes/src/for/compute/compare.rs b/encodings/fastlanes/src/for/compute/compare.rs index cb13a89eeab..d1ff21ea218 100644 --- a/encodings/fastlanes/src/for/compute/compare.rs +++ b/encodings/fastlanes/src/for/compute/compare.rs @@ -25,12 +25,12 @@ use crate::FoRVTable; impl CompareKernel for FoRVTable { fn compare( - array: &FoRArray, - other: &dyn Array, + lhs: &FoRArray, + rhs: &dyn Array, operator: Operator, _ctx: &mut ExecutionCtx, ) -> VortexResult> { - if let Some(constant) = other.as_constant() + if let Some(constant) = rhs.as_constant() && let Some(constant) = constant.as_primitive_opt() { if constant.pvalue().is_none() { @@ -38,9 +38,9 @@ impl CompareKernel for FoRVTable { } match_each_integer_ptype!(constant.ptype(), |T| { return compare_constant( - array, + lhs, constant.typed_value::().vortex_expect("RHS is not null"), - other.dtype().nullability(), + rhs.dtype().nullability(), operator, ); }) diff --git a/encodings/fsst/src/compute/compare.rs b/encodings/fsst/src/compute/compare.rs index e337f380e0e..7dc0c79f1de 100644 --- a/encodings/fsst/src/compute/compare.rs +++ b/encodings/fsst/src/compute/compare.rs @@ -27,13 +27,13 @@ use crate::FSSTVTable; impl CompareKernel for FSSTVTable { fn compare( - array: &FSSTArray, - other: &dyn Array, + lhs: &FSSTArray, + rhs: &dyn Array, operator: Operator, _ctx: &mut ExecutionCtx, ) -> VortexResult> { - match other.as_constant() { - Some(constant) => compare_fsst_constant(array, &constant, operator), + match rhs.as_constant() { + Some(constant) => compare_fsst_constant(lhs, &constant, operator), // Otherwise, fall back to the default comparison behavior. _ => Ok(None), } @@ -46,10 +46,6 @@ fn compare_fsst_constant( right: &Scalar, operator: Operator, ) -> VortexResult> { - if right.is_null() { - return Ok(None); - } - let is_rhs_empty = match right.dtype() { DType::Binary(_) => right .as_binary() diff --git a/encodings/runend/src/compute/compare.rs b/encodings/runend/src/compute/compare.rs index 3023bd4c218..1d95bf11fa5 100644 --- a/encodings/runend/src/compute/compare.rs +++ b/encodings/runend/src/compute/compare.rs @@ -18,23 +18,23 @@ use crate::compress::runend_decode_bools; impl CompareKernel for RunEndVTable { fn compare( - array: &RunEndArray, - other: &dyn Array, + lhs: &RunEndArray, + rhs: &dyn Array, operator: Operator, _ctx: &mut ExecutionCtx, ) -> VortexResult> { // If the RHS is constant, then we just need to compare against our encoded values. - if let Some(const_scalar) = other.as_constant() { + if let Some(const_scalar) = rhs.as_constant() { let values = compare( - array.values(), - ConstantArray::new(const_scalar, array.values().len()).as_ref(), + lhs.values(), + ConstantArray::new(const_scalar, lhs.values().len()).as_ref(), operator, )?; let decoded = runend_decode_bools( - array.ends().to_primitive(), + lhs.ends().to_primitive(), values.to_bool(), - array.offset(), - array.len(), + lhs.offset(), + lhs.len(), )?; return Ok(Some(decoded.into_array())); } diff --git a/vortex-array/benches/compare.rs b/vortex-array/benches/compare.rs index 64e2ca2e098..7c20cff6f39 100644 --- a/vortex-array/benches/compare.rs +++ b/vortex-array/benches/compare.rs @@ -34,10 +34,10 @@ fn compare_bool(bencher: Bencher) { bencher .with_inputs(|| (&arr1, &arr2, session.create_execution_ctx())) - .bench_refs(|(arr1, arr2, mut ctx)| { - compare(*arr1, *arr2, Operator::Gte) + .bench_refs(|input| { + compare(input.0, input.1, Operator::Gte) .unwrap() - .execute::(&mut ctx) + .execute::(&mut input.2) }); } @@ -59,9 +59,9 @@ fn compare_int(bencher: Bencher) { bencher .with_inputs(|| (&arr1, &arr2, session.create_execution_ctx())) - .bench_refs(|(arr1, arr2, mut ctx)| { - compare(*arr1, *arr2, Operator::Gte) + .bench_refs(|input| { + compare(input.0, input.1, Operator::Gte) .unwrap() - .execute::(&mut ctx) + .execute::(&mut input.2) }); } diff --git a/vortex-array/src/arrays/dict/compute/compare.rs b/vortex-array/src/arrays/dict/compute/compare.rs index a2aad04a17c..2fbba2c29e1 100644 --- a/vortex-array/src/arrays/dict/compute/compare.rs +++ b/vortex-array/src/arrays/dict/compute/compare.rs @@ -16,28 +16,28 @@ use crate::expr::CompareKernel; impl CompareKernel for DictVTable { fn compare( - array: &DictArray, - other: &dyn Array, + lhs: &DictArray, + rhs: &dyn Array, operator: Operator, _ctx: &mut ExecutionCtx, ) -> VortexResult> { // if we have more values than codes, it is faster to canonicalise first. - if array.values().len() > array.codes().len() { + if lhs.values().len() > lhs.codes().len() { return Ok(None); } // If the RHS is constant, then we just need to compare against our encoded values. - if let Some(rhs) = other.as_constant() { + if let Some(rhs) = rhs.as_constant() { let compare_result = compare( - array.values(), - ConstantArray::new(rhs, array.values().len()).as_ref(), + lhs.values(), + ConstantArray::new(rhs, lhs.values().len()).as_ref(), operator, )?; // SAFETY: values len preserved, codes all still point to valid values let result = unsafe { - DictArray::new_unchecked(array.codes().clone(), compare_result) - .set_all_values_referenced(array.has_all_values_referenced()) + DictArray::new_unchecked(lhs.codes().clone(), compare_result) + .set_all_values_referenced(lhs.has_all_values_referenced()) .into_array() }; diff --git a/vortex-array/src/arrays/extension/compute/compare.rs b/vortex-array/src/arrays/extension/compute/compare.rs index 3178c63cff5..aac0226bae2 100644 --- a/vortex-array/src/arrays/extension/compute/compare.rs +++ b/vortex-array/src/arrays/extension/compute/compare.rs @@ -15,25 +15,25 @@ use crate::expr::CompareKernel; impl CompareKernel for ExtensionVTable { fn compare( - array: &ExtensionArray, - other: &dyn Array, + lhs: &ExtensionArray, + rhs: &dyn Array, operator: Operator, _ctx: &mut ExecutionCtx, ) -> VortexResult> { // If the RHS is a constant, we can extract the storage scalar. - if let Some(const_ext) = other.as_constant() { + if let Some(const_ext) = rhs.as_constant() { let storage_scalar = const_ext.as_extension().to_storage_scalar(); return compute::compare( - array.storage(), - ConstantArray::new(storage_scalar, array.len()).as_ref(), + lhs.storage(), + ConstantArray::new(storage_scalar, lhs.len()).as_ref(), operator, ) .map(Some); } // If the RHS is an extension array matching ours, we can extract the storage. - if let Some(rhs_ext) = other.as_opt::() { - return compute::compare(array.storage(), rhs_ext.storage(), operator).map(Some); + if let Some(rhs_ext) = rhs.as_opt::() { + return compute::compare(lhs.storage(), rhs_ext.storage(), operator).map(Some); } // Otherwise, we need the RHS to handle this comparison. diff --git a/vortex-array/src/arrays/varbin/compute/compare.rs b/vortex-array/src/arrays/varbin/compute/compare.rs index 3c46b80836c..adbdff15e99 100644 --- a/vortex-array/src/arrays/varbin/compute/compare.rs +++ b/vortex-array/src/arrays/varbin/compute/compare.rs @@ -34,18 +34,18 @@ use crate::vtable::ValidityHelper; // This implementation exists so we can have custom translation of RHS to arrow that's not the same as IntoCanonical impl CompareKernel for VarBinVTable { fn compare( - array: &VarBinArray, - other: &dyn Array, + lhs: &VarBinArray, + rhs: &dyn Array, operator: Operator, _ctx: &mut ExecutionCtx, ) -> VortexResult> { - if let Some(rhs_const) = other.as_constant() { + if let Some(rhs_const) = rhs.as_constant() { if rhs_const.is_null() { return Ok(None); } - let nullable = array.dtype().is_nullable() || rhs_const.dtype().is_nullable(); - let len = array.len(); + let nullable = lhs.dtype().is_nullable() || rhs_const.dtype().is_nullable(); + let len = lhs.len(); let rhs_is_empty = match rhs_const.dtype() { DType::Binary(_) => rhs_const @@ -64,7 +64,7 @@ impl CompareKernel for VarBinVTable { Operator::Gte => BitBuffer::new_set(len), // Every possible value is >= "" Operator::Lt => BitBuffer::new_unset(len), // No value is < "" Operator::Eq | Operator::NotEq | Operator::Gt | Operator::Lte => { - let lhs_offsets = array.offsets().to_primitive(); + let lhs_offsets = lhs.offsets().to_primitive(); match_each_integer_ptype!(lhs_offsets.ptype(), |P| { compare_offsets_to_empty::

(lhs_offsets, operator) }) @@ -74,16 +74,15 @@ impl CompareKernel for VarBinVTable { return Ok(Some( BoolArray::new( buffer, - array - .validity() + lhs.validity() .clone() - .union_nullability(other.dtype().nullability()), + .union_nullability(rhs.dtype().nullability()), ) .into_array(), )); } - let lhs = Datum::try_new(array.as_ref())?; + let lhs = Datum::try_new(lhs.as_ref())?; // Use StringViewArray/BinaryViewArray to match the Utf8View/BinaryView types // produced by Datum::try_new (which uses into_arrow_preferred()) @@ -115,15 +114,11 @@ impl CompareKernel for VarBinVTable { .map_err(|err| vortex_err!("Failed to compare VarBin array: {}", err))?; Ok(Some(from_arrow_array_with_len(&array, len, nullable)?)) - } else if !other.is::() { + } else if !rhs.is::() { // NOTE: If the rhs is not a VarBin array it will be canonicalized to a VarBinView // Arrow doesn't support comparing VarBin to VarBinView arrays, so we convert ourselves // to VarBinView and re-invoke. - return Ok(Some(compare( - array.to_varbinview().as_ref(), - other, - operator, - )?)); + return Ok(Some(compare(lhs.to_varbinview().as_ref(), rhs, operator)?)); } else { Ok(None) } diff --git a/vortex-array/src/expr/exprs/binary/compare.rs b/vortex-array/src/expr/exprs/binary/compare.rs index 9e605c93562..9dbc632c081 100644 --- a/vortex-array/src/expr/exprs/binary/compare.rs +++ b/vortex-array/src/expr/exprs/binary/compare.rs @@ -33,8 +33,8 @@ use crate::vtable::VTable; /// the left-hand side, swapping the operator when necessary. pub trait CompareKernel: VTable { fn compare( - array: &Self::Array, - other: &dyn Array, + lhs: &Self::Array, + rhs: &dyn Array, operator: Operator, ctx: &mut ExecutionCtx, ) -> VortexResult>; diff --git a/vortex-array/src/validity.rs b/vortex-array/src/validity.rs index 5e0ac432c56..09f148166c8 100644 --- a/vortex-array/src/validity.rs +++ b/vortex-array/src/validity.rs @@ -4,7 +4,6 @@ //! Array validity and nullability behavior, used by arrays and compute functions. use std::fmt::Debug; -use std::ops::BitAnd; use std::ops::Range; use vortex_buffer::BitBuffer; @@ -283,9 +282,7 @@ impl Validity { | (Validity::AllValid, Validity::AllValid) => Validity::AllValid, // Here we actually have to do some work (Validity::Array(lhs), Validity::Array(rhs)) => { - let conj = Binary.try_new_array(lhs.len(), Operator::And, [lhs, rhs])?; - - Validity::from(conj) + Validity::Array(Binary.try_new_array(lhs.len(), Operator::And, [lhs, rhs])?) } }) } From 0035dcac272cc333441e7d2ebe4a909b1319683f Mon Sep 17 00:00:00 2001 From: Joe Isaacs Date: Mon, 16 Feb 2026 13:39:59 +0000 Subject: [PATCH 11/20] u Signed-off-by: Joe Isaacs --- encodings/alp/public-api.lock | 2 +- encodings/datetime-parts/public-api.lock | 2 +- encodings/decimal-byte-parts/public-api.lock | 2 +- encodings/fastlanes/public-api.lock | 2 +- encodings/fsst/public-api.lock | 2 +- encodings/runend/public-api.lock | 2 +- vortex-array/public-api.lock | 144 ++++++++----------- 7 files changed, 63 insertions(+), 93 deletions(-) diff --git a/encodings/alp/public-api.lock b/encodings/alp/public-api.lock index fabb5ed64f5..8416610a039 100644 --- a/encodings/alp/public-api.lock +++ b/encodings/alp/public-api.lock @@ -266,7 +266,7 @@ pub fn vortex_alp::ALPVTable::nan_count(&self, array: &vortex_alp::ALPArray) -> impl vortex_array::expr::exprs::binary::compare::CompareKernel for vortex_alp::ALPVTable -pub fn vortex_alp::ALPVTable::compare(array: &vortex_alp::ALPArray, other: &dyn vortex_array::array::Array, operator: vortex_array::compute::compare::Operator, _ctx: &mut vortex_array::executor::ExecutionCtx) -> vortex_error::VortexResult> +pub fn vortex_alp::ALPVTable::compare(lhs: &vortex_alp::ALPArray, rhs: &dyn vortex_array::array::Array, operator: vortex_array::compute::compare::Operator, _ctx: &mut vortex_array::executor::ExecutionCtx) -> vortex_error::VortexResult> impl vortex_array::expr::exprs::cast::kernel::CastReduce for vortex_alp::ALPVTable diff --git a/encodings/datetime-parts/public-api.lock b/encodings/datetime-parts/public-api.lock index 901bc633434..b43cbafad7a 100644 --- a/encodings/datetime-parts/public-api.lock +++ b/encodings/datetime-parts/public-api.lock @@ -124,7 +124,7 @@ pub fn vortex_datetime_parts::DateTimePartsVTable::mask(&self, array: &vortex_da impl vortex_array::expr::exprs::binary::compare::CompareKernel for vortex_datetime_parts::DateTimePartsVTable -pub fn vortex_datetime_parts::DateTimePartsVTable::compare(array: &vortex_datetime_parts::DateTimePartsArray, other: &dyn vortex_array::array::Array, operator: vortex_array::compute::compare::Operator, _ctx: &mut vortex_array::executor::ExecutionCtx) -> vortex_error::VortexResult> +pub fn vortex_datetime_parts::DateTimePartsVTable::compare(lhs: &vortex_datetime_parts::DateTimePartsArray, rhs: &dyn vortex_array::array::Array, operator: vortex_array::compute::compare::Operator, _ctx: &mut vortex_array::executor::ExecutionCtx) -> vortex_error::VortexResult> impl vortex_array::expr::exprs::cast::kernel::CastReduce for vortex_datetime_parts::DateTimePartsVTable diff --git a/encodings/decimal-byte-parts/public-api.lock b/encodings/decimal-byte-parts/public-api.lock index 2b40655a403..a2117734a75 100644 --- a/encodings/decimal-byte-parts/public-api.lock +++ b/encodings/decimal-byte-parts/public-api.lock @@ -74,7 +74,7 @@ pub fn vortex_decimal_byte_parts::DecimalBytePartsVTable::mask(&self, array: &vo impl vortex_array::expr::exprs::binary::compare::CompareKernel for vortex_decimal_byte_parts::DecimalBytePartsVTable -pub fn vortex_decimal_byte_parts::DecimalBytePartsVTable::compare(array: &Self::Array, other: &dyn vortex_array::array::Array, operator: vortex_array::compute::compare::Operator, _ctx: &mut vortex_array::executor::ExecutionCtx) -> vortex_error::VortexResult> +pub fn vortex_decimal_byte_parts::DecimalBytePartsVTable::compare(lhs: &Self::Array, rhs: &dyn vortex_array::array::Array, operator: vortex_array::compute::compare::Operator, _ctx: &mut vortex_array::executor::ExecutionCtx) -> vortex_error::VortexResult> impl vortex_array::expr::exprs::cast::kernel::CastReduce for vortex_decimal_byte_parts::DecimalBytePartsVTable diff --git a/encodings/fastlanes/public-api.lock b/encodings/fastlanes/public-api.lock index 0cd232e0e47..7206974aa4b 100644 --- a/encodings/fastlanes/public-api.lock +++ b/encodings/fastlanes/public-api.lock @@ -476,7 +476,7 @@ pub fn vortex_fastlanes::FoRVTable::is_strict_sorted(&self, array: &vortex_fastl impl vortex_array::expr::exprs::binary::compare::CompareKernel for vortex_fastlanes::FoRVTable -pub fn vortex_fastlanes::FoRVTable::compare(array: &vortex_fastlanes::FoRArray, other: &dyn vortex_array::array::Array, operator: vortex_array::compute::compare::Operator, _ctx: &mut vortex_array::executor::ExecutionCtx) -> vortex_error::VortexResult> +pub fn vortex_fastlanes::FoRVTable::compare(lhs: &vortex_fastlanes::FoRArray, rhs: &dyn vortex_array::array::Array, operator: vortex_array::compute::compare::Operator, _ctx: &mut vortex_array::executor::ExecutionCtx) -> vortex_error::VortexResult> impl vortex_array::expr::exprs::cast::kernel::CastReduce for vortex_fastlanes::FoRVTable diff --git a/encodings/fsst/public-api.lock b/encodings/fsst/public-api.lock index 1cbb204d1dd..00d030a5a23 100644 --- a/encodings/fsst/public-api.lock +++ b/encodings/fsst/public-api.lock @@ -106,7 +106,7 @@ pub fn vortex_fsst::FSSTVTable::slice(array: &Self::Array, range: core::ops::ran impl vortex_array::expr::exprs::binary::compare::CompareKernel for vortex_fsst::FSSTVTable -pub fn vortex_fsst::FSSTVTable::compare(array: &vortex_fsst::FSSTArray, other: &dyn vortex_array::array::Array, operator: vortex_array::compute::compare::Operator, _ctx: &mut vortex_array::executor::ExecutionCtx) -> vortex_error::VortexResult> +pub fn vortex_fsst::FSSTVTable::compare(lhs: &vortex_fsst::FSSTArray, rhs: &dyn vortex_array::array::Array, operator: vortex_array::compute::compare::Operator, _ctx: &mut vortex_array::executor::ExecutionCtx) -> vortex_error::VortexResult> impl vortex_array::expr::exprs::cast::kernel::CastReduce for vortex_fsst::FSSTVTable diff --git a/encodings/runend/public-api.lock b/encodings/runend/public-api.lock index 44c286b1ca3..8b146026548 100644 --- a/encodings/runend/public-api.lock +++ b/encodings/runend/public-api.lock @@ -138,7 +138,7 @@ pub fn vortex_runend::RunEndVTable::min_max(&self, array: &vortex_runend::RunEnd impl vortex_array::expr::exprs::binary::compare::CompareKernel for vortex_runend::RunEndVTable -pub fn vortex_runend::RunEndVTable::compare(array: &vortex_runend::RunEndArray, other: &dyn vortex_array::array::Array, operator: vortex_array::compute::compare::Operator, _ctx: &mut vortex_array::executor::ExecutionCtx) -> vortex_error::VortexResult> +pub fn vortex_runend::RunEndVTable::compare(lhs: &vortex_runend::RunEndArray, rhs: &dyn vortex_array::array::Array, operator: vortex_array::compute::compare::Operator, _ctx: &mut vortex_array::executor::ExecutionCtx) -> vortex_error::VortexResult> impl vortex_array::expr::exprs::cast::kernel::CastReduce for vortex_runend::RunEndVTable diff --git a/vortex-array/public-api.lock b/vortex-array/public-api.lock index b3178b83186..7ef0534a230 100644 --- a/vortex-array/public-api.lock +++ b/vortex-array/public-api.lock @@ -98,10 +98,6 @@ impl vortex_array::arrays::TakeExecute for vortex_array::arrays::DictVTable pub fn vortex_array::arrays::DictVTable::take(array: &vortex_array::arrays::DictArray, indices: &dyn vortex_array::Array, _ctx: &mut vortex_array::ExecutionCtx) -> vortex_error::VortexResult> -impl vortex_array::compute::CompareKernel for vortex_array::arrays::DictVTable - -pub fn vortex_array::arrays::DictVTable::compare(&self, lhs: &vortex_array::arrays::DictArray, rhs: &dyn vortex_array::Array, operator: vortex_array::compute::Operator) -> vortex_error::VortexResult> - impl vortex_array::compute::IsConstantKernel for vortex_array::arrays::DictVTable pub fn vortex_array::arrays::DictVTable::is_constant(&self, array: &vortex_array::arrays::DictArray, opts: &vortex_array::compute::IsConstantOpts) -> vortex_error::VortexResult> @@ -120,6 +116,10 @@ impl vortex_array::expr::CastReduce for vortex_array::arrays::DictVTable pub fn vortex_array::arrays::DictVTable::cast(array: &vortex_array::arrays::DictArray, dtype: &vortex_dtype::dtype::DType) -> vortex_error::VortexResult> +impl vortex_array::expr::CompareKernel for vortex_array::arrays::DictVTable + +pub fn vortex_array::arrays::DictVTable::compare(lhs: &vortex_array::arrays::DictArray, rhs: &dyn vortex_array::Array, operator: vortex_array::compute::Operator, _ctx: &mut vortex_array::ExecutionCtx) -> vortex_error::VortexResult> + impl vortex_array::expr::FillNullKernel for vortex_array::arrays::DictVTable pub fn vortex_array::arrays::DictVTable::fill_null(array: &vortex_array::arrays::DictArray, fill_value: &vortex_scalar::scalar::Scalar, _ctx: &mut vortex_array::ExecutionCtx) -> vortex_error::VortexResult> @@ -520,10 +520,6 @@ impl vortex_array::arrays::TakeExecute for vortex_array::arrays::ChunkedVTable pub fn vortex_array::arrays::ChunkedVTable::take(array: &vortex_array::arrays::ChunkedArray, indices: &dyn vortex_array::Array, _ctx: &mut vortex_array::ExecutionCtx) -> vortex_error::VortexResult> -impl vortex_array::compute::CompareKernel for vortex_array::arrays::ChunkedVTable - -pub fn vortex_array::arrays::ChunkedVTable::compare(&self, lhs: &vortex_array::arrays::ChunkedArray, rhs: &dyn vortex_array::Array, operator: vortex_array::compute::Operator) -> vortex_error::VortexResult> - impl vortex_array::compute::IsConstantKernel for vortex_array::arrays::ChunkedVTable pub fn vortex_array::arrays::ChunkedVTable::is_constant(&self, array: &vortex_array::arrays::ChunkedArray, opts: &vortex_array::compute::IsConstantOpts) -> vortex_error::VortexResult> @@ -690,10 +686,6 @@ impl vortex_array::arrays::TakeReduce for vortex_array::arrays::ConstantVTable pub fn vortex_array::arrays::ConstantVTable::take(array: &vortex_array::arrays::ConstantArray, indices: &dyn vortex_array::Array) -> vortex_error::VortexResult> -impl vortex_array::compute::CompareKernel for vortex_array::arrays::ConstantVTable - -pub fn vortex_array::arrays::ConstantVTable::compare(&self, lhs: &vortex_array::arrays::ConstantArray, rhs: &dyn vortex_array::Array, operator: vortex_array::compute::Operator) -> vortex_error::VortexResult> - impl vortex_array::compute::MaskKernel for vortex_array::arrays::ConstantVTable pub fn vortex_array::arrays::ConstantVTable::mask(&self, array: &vortex_array::arrays::ConstantArray, mask: &vortex_mask::Mask) -> vortex_error::VortexResult @@ -1114,10 +1106,6 @@ impl vortex_array::arrays::TakeExecute for vortex_array::arrays::DictVTable pub fn vortex_array::arrays::DictVTable::take(array: &vortex_array::arrays::DictArray, indices: &dyn vortex_array::Array, _ctx: &mut vortex_array::ExecutionCtx) -> vortex_error::VortexResult> -impl vortex_array::compute::CompareKernel for vortex_array::arrays::DictVTable - -pub fn vortex_array::arrays::DictVTable::compare(&self, lhs: &vortex_array::arrays::DictArray, rhs: &dyn vortex_array::Array, operator: vortex_array::compute::Operator) -> vortex_error::VortexResult> - impl vortex_array::compute::IsConstantKernel for vortex_array::arrays::DictVTable pub fn vortex_array::arrays::DictVTable::is_constant(&self, array: &vortex_array::arrays::DictArray, opts: &vortex_array::compute::IsConstantOpts) -> vortex_error::VortexResult> @@ -1136,6 +1124,10 @@ impl vortex_array::expr::CastReduce for vortex_array::arrays::DictVTable pub fn vortex_array::arrays::DictVTable::cast(array: &vortex_array::arrays::DictArray, dtype: &vortex_dtype::dtype::DType) -> vortex_error::VortexResult> +impl vortex_array::expr::CompareKernel for vortex_array::arrays::DictVTable + +pub fn vortex_array::arrays::DictVTable::compare(lhs: &vortex_array::arrays::DictArray, rhs: &dyn vortex_array::Array, operator: vortex_array::compute::Operator, _ctx: &mut vortex_array::ExecutionCtx) -> vortex_error::VortexResult> + impl vortex_array::expr::FillNullKernel for vortex_array::arrays::DictVTable pub fn vortex_array::arrays::DictVTable::fill_null(array: &vortex_array::arrays::DictArray, fill_value: &vortex_scalar::scalar::Scalar, _ctx: &mut vortex_array::ExecutionCtx) -> vortex_error::VortexResult> @@ -1304,10 +1296,6 @@ impl vortex_array::arrays::TakeExecute for vortex_array::arrays::ExtensionVTable pub fn vortex_array::arrays::ExtensionVTable::take(array: &vortex_array::arrays::ExtensionArray, indices: &dyn vortex_array::Array, _ctx: &mut vortex_array::ExecutionCtx) -> vortex_error::VortexResult> -impl vortex_array::compute::CompareKernel for vortex_array::arrays::ExtensionVTable - -pub fn vortex_array::arrays::ExtensionVTable::compare(&self, lhs: &vortex_array::arrays::ExtensionArray, rhs: &dyn vortex_array::Array, operator: vortex_array::compute::Operator) -> vortex_error::VortexResult> - impl vortex_array::compute::IsConstantKernel for vortex_array::arrays::ExtensionVTable pub fn vortex_array::arrays::ExtensionVTable::is_constant(&self, array: &vortex_array::arrays::ExtensionArray, opts: &vortex_array::compute::IsConstantOpts) -> vortex_error::VortexResult> @@ -1334,6 +1322,10 @@ impl vortex_array::expr::CastReduce for vortex_array::arrays::ExtensionVTable pub fn vortex_array::arrays::ExtensionVTable::cast(array: &vortex_array::arrays::ExtensionArray, dtype: &vortex_dtype::dtype::DType) -> vortex_error::VortexResult> +impl vortex_array::expr::CompareKernel for vortex_array::arrays::ExtensionVTable + +pub fn vortex_array::arrays::ExtensionVTable::compare(lhs: &vortex_array::arrays::ExtensionArray, rhs: &dyn vortex_array::Array, operator: vortex_array::compute::Operator, _ctx: &mut vortex_array::ExecutionCtx) -> vortex_error::VortexResult> + impl vortex_array::vtable::BaseArrayVTable for vortex_array::arrays::ExtensionVTable pub fn vortex_array::arrays::ExtensionVTable::array_eq(array: &vortex_array::arrays::ExtensionArray, other: &vortex_array::arrays::ExtensionArray, precision: vortex_array::Precision) -> bool @@ -2124,10 +2116,6 @@ impl vortex_array::arrays::TakeExecute for vortex_array::arrays::MaskedVTable pub fn vortex_array::arrays::MaskedVTable::take(array: &vortex_array::arrays::MaskedArray, indices: &dyn vortex_array::Array, _ctx: &mut vortex_array::ExecutionCtx) -> vortex_error::VortexResult> -impl vortex_array::compute::CompareKernel for vortex_array::arrays::MaskedVTable - -pub fn vortex_array::arrays::MaskedVTable::compare(&self, lhs: &vortex_array::arrays::MaskedArray, rhs: &dyn vortex_array::Array, operator: vortex_array::compute::Operator) -> vortex_error::VortexResult> - impl vortex_array::compute::MaskKernel for vortex_array::arrays::MaskedVTable pub fn vortex_array::arrays::MaskedVTable::mask(&self, array: &vortex_array::arrays::MaskedArray, mask_arg: &vortex_mask::Mask) -> vortex_error::VortexResult @@ -3492,10 +3480,6 @@ impl vortex_array::arrays::TakeExecute for vortex_array::arrays::VarBinVTable pub fn vortex_array::arrays::VarBinVTable::take(array: &vortex_array::arrays::VarBinArray, indices: &dyn vortex_array::Array, _ctx: &mut vortex_array::ExecutionCtx) -> vortex_error::VortexResult> -impl vortex_array::compute::CompareKernel for vortex_array::arrays::VarBinVTable - -pub fn vortex_array::arrays::VarBinVTable::compare(&self, lhs: &vortex_array::arrays::VarBinArray, rhs: &dyn vortex_array::Array, operator: vortex_array::compute::Operator) -> vortex_error::VortexResult> - impl vortex_array::compute::IsConstantKernel for vortex_array::arrays::VarBinVTable pub fn vortex_array::arrays::VarBinVTable::is_constant(&self, array: &vortex_array::arrays::VarBinArray, opts: &vortex_array::compute::IsConstantOpts) -> vortex_error::VortexResult> @@ -3518,6 +3502,10 @@ impl vortex_array::expr::CastReduce for vortex_array::arrays::VarBinVTable pub fn vortex_array::arrays::VarBinVTable::cast(array: &vortex_array::arrays::VarBinArray, dtype: &vortex_dtype::dtype::DType) -> vortex_error::VortexResult> +impl vortex_array::expr::CompareKernel for vortex_array::arrays::VarBinVTable + +pub fn vortex_array::arrays::VarBinVTable::compare(lhs: &vortex_array::arrays::VarBinArray, rhs: &dyn vortex_array::Array, operator: vortex_array::compute::Operator, _ctx: &mut vortex_array::ExecutionCtx) -> vortex_error::VortexResult> + impl vortex_array::vtable::BaseArrayVTable for vortex_array::arrays::VarBinVTable pub fn vortex_array::arrays::VarBinVTable::array_eq(array: &vortex_array::arrays::VarBinArray, other: &vortex_array::arrays::VarBinArray, precision: vortex_array::Precision) -> bool @@ -5526,10 +5514,6 @@ impl core::marker::Copy for vortex_array::compute::Operator impl core::marker::StructuralPartialEq for vortex_array::compute::Operator -impl vortex_array::compute::Options for vortex_array::compute::Operator - -pub fn vortex_array::compute::Operator::as_any(&self) -> &dyn core::any::Any - pub enum vortex_array::compute::Output pub vortex_array::compute::Output::Array(vortex_array::ArrayRef) @@ -5690,24 +5674,6 @@ pub type vortex_array::expr::CastReduceAdaptor::Parent = vortex_array::arrays pub fn vortex_array::expr::CastReduceAdaptor::reduce_parent(&self, array: &::Array, parent: vortex_array::arrays::ScalarFnArrayView<'_, vortex_array::expr::Cast>, _child_idx: usize) -> vortex_error::VortexResult> -pub struct vortex_array::compute::CompareKernelAdapter(pub V) - -impl vortex_array::compute::CompareKernelAdapter - -pub const fn vortex_array::compute::CompareKernelAdapter::lift(&'static self) -> vortex_array::compute::CompareKernelRef - -impl core::fmt::Debug for vortex_array::compute::CompareKernelAdapter - -pub fn vortex_array::compute::CompareKernelAdapter::fmt(&self, f: &mut core::fmt::Formatter<'_>) -> core::fmt::Result - -impl vortex_array::compute::Kernel for vortex_array::compute::CompareKernelAdapter - -pub fn vortex_array::compute::CompareKernelAdapter::invoke(&self, args: &vortex_array::compute::InvocationArgs<'_>) -> vortex_error::VortexResult> - -pub struct vortex_array::compute::CompareKernelRef(_) - -impl inventory::Collect for vortex_array::compute::CompareKernelRef - pub struct vortex_array::compute::ComputeFn impl vortex_array::compute::ComputeFn @@ -6120,34 +6086,6 @@ impl vortex_array::expr::CastReduce for vortex_array::arrays::VarBinViewVTable pub fn vortex_array::arrays::VarBinViewVTable::cast(array: &vortex_array::arrays::VarBinViewArray, dtype: &vortex_dtype::dtype::DType) -> vortex_error::VortexResult> -pub trait vortex_array::compute::CompareKernel: vortex_array::vtable::VTable - -pub fn vortex_array::compute::CompareKernel::compare(&self, lhs: &Self::Array, rhs: &dyn vortex_array::Array, operator: vortex_array::compute::Operator) -> vortex_error::VortexResult> - -impl vortex_array::compute::CompareKernel for vortex_array::arrays::ChunkedVTable - -pub fn vortex_array::arrays::ChunkedVTable::compare(&self, lhs: &vortex_array::arrays::ChunkedArray, rhs: &dyn vortex_array::Array, operator: vortex_array::compute::Operator) -> vortex_error::VortexResult> - -impl vortex_array::compute::CompareKernel for vortex_array::arrays::ConstantVTable - -pub fn vortex_array::arrays::ConstantVTable::compare(&self, lhs: &vortex_array::arrays::ConstantArray, rhs: &dyn vortex_array::Array, operator: vortex_array::compute::Operator) -> vortex_error::VortexResult> - -impl vortex_array::compute::CompareKernel for vortex_array::arrays::DictVTable - -pub fn vortex_array::arrays::DictVTable::compare(&self, lhs: &vortex_array::arrays::DictArray, rhs: &dyn vortex_array::Array, operator: vortex_array::compute::Operator) -> vortex_error::VortexResult> - -impl vortex_array::compute::CompareKernel for vortex_array::arrays::ExtensionVTable - -pub fn vortex_array::arrays::ExtensionVTable::compare(&self, lhs: &vortex_array::arrays::ExtensionArray, rhs: &dyn vortex_array::Array, operator: vortex_array::compute::Operator) -> vortex_error::VortexResult> - -impl vortex_array::compute::CompareKernel for vortex_array::arrays::MaskedVTable - -pub fn vortex_array::arrays::MaskedVTable::compare(&self, lhs: &vortex_array::arrays::MaskedArray, rhs: &dyn vortex_array::Array, operator: vortex_array::compute::Operator) -> vortex_error::VortexResult> - -impl vortex_array::compute::CompareKernel for vortex_array::arrays::VarBinVTable - -pub fn vortex_array::arrays::VarBinVTable::compare(&self, lhs: &vortex_array::arrays::VarBinArray, rhs: &dyn vortex_array::Array, operator: vortex_array::compute::Operator) -> vortex_error::VortexResult> - pub trait vortex_array::compute::ComputeFnVTable: 'static + core::marker::Send + core::marker::Sync pub fn vortex_array::compute::ComputeFnVTable::invoke(&self, args: &vortex_array::compute::InvocationArgs<'_>, kernels: &[arcref::ArcRef]) -> vortex_error::VortexResult @@ -6348,10 +6286,6 @@ impl vor pub fn vortex_array::compute::BetweenKernelAdapter::invoke(&self, args: &vortex_array::compute::InvocationArgs<'_>) -> vortex_error::VortexResult> -impl vortex_array::compute::Kernel for vortex_array::compute::CompareKernelAdapter - -pub fn vortex_array::compute::CompareKernelAdapter::invoke(&self, args: &vortex_array::compute::InvocationArgs<'_>) -> vortex_error::VortexResult> - impl vortex_array::compute::Kernel for vortex_array::compute::IsConstantKernelAdapter pub fn vortex_array::compute::IsConstantKernelAdapter::invoke(&self, args: &vortex_array::compute::InvocationArgs<'_>) -> vortex_error::VortexResult> @@ -6544,10 +6478,6 @@ impl vortex_array::compute::Options for vortex_array::compute::IsConstantOpts pub fn vortex_array::compute::IsConstantOpts::as_any(&self) -> &dyn core::any::Any -impl vortex_array::compute::Options for vortex_array::compute::Operator - -pub fn vortex_array::compute::Operator::as_any(&self) -> &dyn core::any::Any - impl vortex_array::compute::Options for vortex_scalar::typed_view::primitive::numeric_operator::NumericOperator pub fn vortex_scalar::typed_view::primitive::numeric_operator::NumericOperator::as_any(&self) -> &dyn core::any::Any @@ -7816,6 +7746,8 @@ pub fn vortex_array::expr::Operator::inverse(self) -> core::option::Option pub fn vortex_array::expr::Operator::is_arithmetic(&self) -> bool +pub fn vortex_array::expr::Operator::is_comparison(&self) -> bool + pub fn vortex_array::expr::Operator::logical_inverse(self) -> core::option::Option pub fn vortex_array::expr::Operator::maybe_cmp_operator(self) -> core::option::Option @@ -8006,6 +7938,22 @@ pub type vortex_array::expr::CastReduceAdaptor::Parent = vortex_array::arrays pub fn vortex_array::expr::CastReduceAdaptor::reduce_parent(&self, array: &::Array, parent: vortex_array::arrays::ScalarFnArrayView<'_, vortex_array::expr::Cast>, _child_idx: usize) -> vortex_error::VortexResult> +pub struct vortex_array::expr::CompareExecuteAdaptor(pub V) + +impl core::default::Default for vortex_array::expr::CompareExecuteAdaptor + +pub fn vortex_array::expr::CompareExecuteAdaptor::default() -> vortex_array::expr::CompareExecuteAdaptor + +impl core::fmt::Debug for vortex_array::expr::CompareExecuteAdaptor + +pub fn vortex_array::expr::CompareExecuteAdaptor::fmt(&self, f: &mut core::fmt::Formatter<'_>) -> core::fmt::Result + +impl vortex_array::kernel::ExecuteParentKernel for vortex_array::expr::CompareExecuteAdaptor where V: vortex_array::expr::CompareKernel + +pub type vortex_array::expr::CompareExecuteAdaptor::Parent = vortex_array::arrays::ExactScalarFn + +pub fn vortex_array::expr::CompareExecuteAdaptor::execute_parent(&self, array: &::Array, parent: vortex_array::arrays::ScalarFnArrayView<'_, vortex_array::expr::Binary>, child_idx: usize, ctx: &mut vortex_array::ExecutionCtx) -> vortex_error::VortexResult> + pub struct vortex_array::expr::DynamicComparison impl vortex_array::expr::VTable for vortex_array::expr::DynamicComparison @@ -9016,6 +8964,22 @@ impl vortex_array::expr::CastReduce for vortex_array::arrays::VarBinViewVTable pub fn vortex_array::arrays::VarBinViewVTable::cast(array: &vortex_array::arrays::VarBinViewArray, dtype: &vortex_dtype::dtype::DType) -> vortex_error::VortexResult> +pub trait vortex_array::expr::CompareKernel: vortex_array::vtable::VTable + +pub fn vortex_array::expr::CompareKernel::compare(lhs: &Self::Array, rhs: &dyn vortex_array::Array, operator: vortex_array::compute::Operator, ctx: &mut vortex_array::ExecutionCtx) -> vortex_error::VortexResult> + +impl vortex_array::expr::CompareKernel for vortex_array::arrays::DictVTable + +pub fn vortex_array::arrays::DictVTable::compare(lhs: &vortex_array::arrays::DictArray, rhs: &dyn vortex_array::Array, operator: vortex_array::compute::Operator, _ctx: &mut vortex_array::ExecutionCtx) -> vortex_error::VortexResult> + +impl vortex_array::expr::CompareKernel for vortex_array::arrays::ExtensionVTable + +pub fn vortex_array::arrays::ExtensionVTable::compare(lhs: &vortex_array::arrays::ExtensionArray, rhs: &dyn vortex_array::Array, operator: vortex_array::compute::Operator, _ctx: &mut vortex_array::ExecutionCtx) -> vortex_error::VortexResult> + +impl vortex_array::expr::CompareKernel for vortex_array::arrays::VarBinVTable + +pub fn vortex_array::arrays::VarBinVTable::compare(lhs: &vortex_array::arrays::VarBinArray, rhs: &dyn vortex_array::Array, operator: vortex_array::compute::Operator, _ctx: &mut vortex_array::ExecutionCtx) -> vortex_error::VortexResult> + pub trait vortex_array::expr::DynExprVTable: 'static + core::marker::Send + core::marker::Sync + vortex_array::expr::vtable::private::Sealed pub fn vortex_array::expr::DynExprVTable::arity(&self, options: &dyn core::any::Any) -> vortex_array::expr::Arity @@ -9952,6 +9916,12 @@ pub type vortex_array::expr::CastExecuteAdaptor::Parent = vortex_array::array pub fn vortex_array::expr::CastExecuteAdaptor::execute_parent(&self, array: &::Array, parent: ::Match, _child_idx: usize, ctx: &mut vortex_array::ExecutionCtx) -> vortex_error::VortexResult> +impl vortex_array::kernel::ExecuteParentKernel for vortex_array::expr::CompareExecuteAdaptor where V: vortex_array::expr::CompareKernel + +pub type vortex_array::expr::CompareExecuteAdaptor::Parent = vortex_array::arrays::ExactScalarFn + +pub fn vortex_array::expr::CompareExecuteAdaptor::execute_parent(&self, array: &::Array, parent: vortex_array::arrays::ScalarFnArrayView<'_, vortex_array::expr::Binary>, child_idx: usize, ctx: &mut vortex_array::ExecutionCtx) -> vortex_error::VortexResult> + impl vortex_array::kernel::ExecuteParentKernel for vortex_array::expr::FillNullExecuteAdaptor where V: vortex_array::expr::FillNullKernel pub type vortex_array::expr::FillNullExecuteAdaptor::Parent = vortex_array::arrays::ExactScalarFn @@ -10850,7 +10820,7 @@ pub fn vortex_array::validity::Validity::all_invalid(&self, len: usize) -> vorte pub fn vortex_array::validity::Validity::all_valid(&self, len: usize) -> vortex_error::VortexResult -pub fn vortex_array::validity::Validity::and(self, rhs: vortex_array::validity::Validity) -> vortex_array::validity::Validity +pub fn vortex_array::validity::Validity::and(self, rhs: vortex_array::validity::Validity) -> vortex_error::VortexResult pub fn vortex_array::validity::Validity::as_array(&self) -> core::option::Option<&vortex_array::ArrayRef> From 075452fdf03212610999bcff0a3e1522a0ead40b Mon Sep 17 00:00:00 2001 From: Joe Isaacs Date: Mon, 16 Feb 2026 15:45:47 +0000 Subject: [PATCH 12/20] u Signed-off-by: Joe Isaacs --- .../src/decimal_byte_parts/compute/compare.rs | 6 +- .../fastlanes/src/for/compute/compare.rs | 7 +- encodings/sequence/src/compute/compare.rs | 58 ++ encodings/sequence/src/kernel.rs | 557 +----------------- .../src/arrays/varbin/compute/compare.rs | 4 - vortex-array/src/expr/exprs/binary/compare.rs | 21 + 6 files changed, 85 insertions(+), 568 deletions(-) diff --git a/encodings/decimal-byte-parts/src/decimal_byte_parts/compute/compare.rs b/encodings/decimal-byte-parts/src/decimal_byte_parts/compute/compare.rs index 94d35a29506..0aa77420b5f 100644 --- a/encodings/decimal-byte-parts/src/decimal_byte_parts/compute/compare.rs +++ b/encodings/decimal-byte-parts/src/decimal_byte_parts/compute/compare.rs @@ -39,14 +39,10 @@ impl CompareKernel for DecimalBytePartsVTable { let nullability = lhs.dtype.nullability() | rhs.dtype().nullability(); let scalar_type = lhs.msp.dtype().with_nullability(nullability); - if rhs_const.is_null() { - return Ok(None); - } - let rhs_decimal = rhs_const .as_decimal() .decimal_value() - .vortex_expect("RHS is not null"); + .vortex_expect("checked for null in entry func"); match decimal_value_wrapper_to_primitive(rhs_decimal, lhs.msp.as_primitive_typed().ptype()) { diff --git a/encodings/fastlanes/src/for/compute/compare.rs b/encodings/fastlanes/src/for/compute/compare.rs index d1ff21ea218..cccdc6daceb 100644 --- a/encodings/fastlanes/src/for/compute/compare.rs +++ b/encodings/fastlanes/src/for/compute/compare.rs @@ -33,13 +33,12 @@ impl CompareKernel for FoRVTable { if let Some(constant) = rhs.as_constant() && let Some(constant) = constant.as_primitive_opt() { - if constant.pvalue().is_none() { - return Ok(None); - } match_each_integer_ptype!(constant.ptype(), |T| { return compare_constant( lhs, - constant.typed_value::().vortex_expect("RHS is not null"), + constant + .typed_value::() + .vortex_expect("null scalar handled in adaptor"), rhs.dtype().nullability(), operator, ); diff --git a/encodings/sequence/src/compute/compare.rs b/encodings/sequence/src/compute/compare.rs index 67e64fe8e93..a18b3fd6f7d 100644 --- a/encodings/sequence/src/compute/compare.rs +++ b/encodings/sequence/src/compute/compare.rs @@ -1,10 +1,68 @@ // SPDX-License-Identifier: Apache-2.0 // SPDX-FileCopyrightText: Copyright the Vortex contributors +use vortex_array::Array; +use vortex_array::ArrayRef; +use vortex_array::ExecutionCtx; +use vortex_array::arrays::BoolArray; +use vortex_array::arrays::ConstantArray; +use vortex_array::compute::Operator; +use vortex_array::expr::CompareKernel; +use vortex_buffer::BitBuffer; use vortex_dtype::NativePType; +use vortex_dtype::Nullability; use vortex_dtype::match_each_integer_ptype; use vortex_error::VortexExpect; +use vortex_error::VortexResult; use vortex_scalar::PValue; +use vortex_scalar::Scalar; + +use crate::SequenceArray; +use crate::array::SequenceVTable; + +impl CompareKernel for SequenceVTable { + fn compare( + lhs: &SequenceArray, + rhs: &dyn Array, + operator: Operator, + _ctx: &mut ExecutionCtx, + ) -> VortexResult> { + // TODO(joe): support other operators (NotEq, Lt, Lte, Gt, Gte) in encoded space. + if operator != Operator::Eq { + return Ok(None); + } + + let Some(constant) = rhs.as_constant() else { + return Ok(None); + }; + + // Check if there exists an integer solution to const = base + (0..len) * multiplier. + let set_idx = find_intersection_scalar( + lhs.base(), + lhs.multiplier(), + lhs.len(), + constant + .as_primitive() + .pvalue() + .vortex_expect("null constant handled in adaptor"), + ); + + let nullability = lhs.dtype().nullability() | rhs.dtype().nullability(); + let validity = match nullability { + Nullability::NonNullable => vortex_array::validity::Validity::NonNullable, + Nullability::Nullable => vortex_array::validity::Validity::AllValid, + }; + + if let Some(set_idx) = set_idx { + let buffer = BitBuffer::from_iter((0..lhs.len()).map(|idx| idx == set_idx)); + Ok(Some(BoolArray::new(buffer, validity).to_array())) + } else { + Ok(Some( + ConstantArray::new(Scalar::bool(false, nullability), lhs.len()).to_array(), + )) + } + } +} /// Find the index where `base + idx * multiplier == intercept`, if one exists. /// diff --git a/encodings/sequence/src/kernel.rs b/encodings/sequence/src/kernel.rs index 6dacc4d01ca..4217d3b1823 100644 --- a/encodings/sequence/src/kernel.rs +++ b/encodings/sequence/src/kernel.rs @@ -1,568 +1,15 @@ // SPDX-License-Identifier: Apache-2.0 // SPDX-FileCopyrightText: Copyright the Vortex contributors -use vortex_array::ArrayRef; -use vortex_array::ExecutionCtx; -use vortex_array::IntoArray; -use vortex_array::arrays::BoolArray; -use vortex_array::arrays::ConstantArray; -use vortex_array::arrays::ConstantVTable; -use vortex_array::arrays::ExactScalarFn; use vortex_array::arrays::FilterExecuteAdaptor; -use vortex_array::arrays::ScalarFnArrayView; -use vortex_array::arrays::ScalarFnVTable; use vortex_array::arrays::TakeExecuteAdaptor; -use vortex_array::compute::Operator; -use vortex_array::expr::Binary; -use vortex_array::kernel::ExecuteParentKernel; +use vortex_array::expr::CompareExecuteAdaptor; use vortex_array::kernel::ParentKernelSet; -use vortex_buffer::bitbuffer; -use vortex_buffer::buffer; -use vortex_dtype::DType; -use vortex_dtype::NativePType; -use vortex_dtype::Nullability; -use vortex_dtype::match_each_integer_ptype; -use vortex_error::VortexExpect; -use vortex_error::VortexResult; -use vortex_runend::RunEndArray; -use vortex_scalar::PValue; -use vortex_scalar::Scalar; -use crate::SequenceArray; use crate::SequenceVTable; -use crate::compute::compare::find_intersection_scalar; pub(crate) const PARENT_KERNELS: ParentKernelSet = ParentKernelSet::new(&[ - ParentKernelSet::lift(&SequenceCompareKernel), + ParentKernelSet::lift(&CompareExecuteAdaptor(SequenceVTable)), ParentKernelSet::lift(&FilterExecuteAdaptor(SequenceVTable)), ParentKernelSet::lift(&TakeExecuteAdaptor(SequenceVTable)), ]); - -/// Kernel to execute comparison operations directly on a sequence array. -#[derive(Debug)] -struct SequenceCompareKernel; - -impl ExecuteParentKernel for SequenceCompareKernel { - type Parent = ExactScalarFn; - - fn execute_parent( - &self, - array: &SequenceArray, - parent: ScalarFnArrayView<'_, Binary>, - child_idx: usize, - ctx: &mut ExecutionCtx, - ) -> VortexResult> { - // Only handle comparison operators - let Some(cmp_op) = parent.options.maybe_cmp_operator() else { - return Ok(None); - }; - - // Get the ScalarFnArray to access children - let Some(scalar_fn_array) = parent.as_opt::() else { - return Ok(None); - }; - let children = scalar_fn_array.children(); - - // Determine which operand is the constant and which is the sequence - let (cmp_op, constant) = match child_idx { - 0 => { - // sequence is lhs, check if rhs is constant - let rhs = &children[1]; - let Some(constant) = rhs.as_opt::() else { - return Ok(None); - }; - (cmp_op, constant) - } - 1 => { - // sequence is rhs, swap the operator and check if lhs is constant - let lhs = &children[0]; - let Some(constant) = lhs.as_opt::() else { - return Ok(None); - }; - // Swap the operator since we're reversing operand order - (cmp_op.swap(), constant) - } - _ => return Ok(None), - }; - - let constant_pvalue = constant.scalar().as_primitive().pvalue(); - let Some(constant_pvalue) = constant_pvalue else { - // Constant is null - result is all null for comparisons - let nullability = array.dtype().nullability() | constant.dtype().nullability(); - let result_array = - ConstantArray::new(Scalar::null(DType::Bool(nullability)), array.len).to_array(); - return Ok(Some(result_array)); - }; - - let nullability = array.dtype().nullability() | constant.dtype().nullability(); - - // For Eq and NotEq, use specialized logic - if cmp_op == Operator::Eq { - return compare_eq_neq(array, constant_pvalue, nullability, false, ctx); - } - if cmp_op == Operator::NotEq { - return compare_eq_neq(array, constant_pvalue, nullability, true, ctx); - } - - // For ordering comparisons, find the transition point - compare_ordering(array, constant_pvalue, cmp_op, nullability, ctx) - } -} - -/// Compare sequence to constant for equality/inequality. -/// When `negate` is false, returns true where sequence == constant. -/// When `negate` is true, returns true where sequence != constant. -fn compare_eq_neq( - array: &SequenceArray, - constant: PValue, - nullability: Nullability, - negate: bool, - _ctx: &mut ExecutionCtx, -) -> VortexResult> { - // For Eq: match_val=true, default_val=false - // For NotEq: match_val=false, default_val=true - let match_val = !negate; - let not_match_val = negate; - - // Check if there exists an integer solution to const = base + idx * multiplier - let Some(set_idx) = - find_intersection_scalar(array.base(), array.multiplier(), array.len, constant) - else { - return Ok(Some( - ConstantArray::new(Scalar::bool(not_match_val, nullability), array.len).into_array(), - )); - }; - let idx = set_idx as u64; - let len = array.len as u64; - - if len == 1 && set_idx == 0 { - let result_array = - ConstantArray::new(Scalar::bool(match_val, nullability), array.len).to_array(); - return Ok(Some(result_array)); - } - - let (ends, values) = if idx == 0 { - let ends = buffer![1u64, len].into_array(); - let values = - BoolArray::new(bitbuffer![match_val, not_match_val], nullability.into()).into_array(); - (ends, values) - } else if idx == len - 1 { - let ends = buffer![idx, len].into_array(); - let values = - BoolArray::new(bitbuffer![not_match_val, match_val], nullability.into()).into_array(); - (ends, values) - } else { - let ends = buffer![idx, idx + 1, len].into_array(); - let values = BoolArray::new( - bitbuffer![not_match_val, match_val, not_match_val], - nullability.into(), - ) - .into_array(); - (ends, values) - }; - Ok(Some(RunEndArray::try_new(ends, values)?.into_array())) -} - -fn compare_ordering( - array: &SequenceArray, - constant: PValue, - operator: Operator, - nullability: Nullability, - _ctx: &mut ExecutionCtx, -) -> VortexResult> { - let transition = find_transition_point( - array.base(), - array.multiplier(), - array.len, - constant, - operator, - ); - - let result_array = match transition { - Transition::AllTrue => { - ConstantArray::new(Scalar::bool(true, nullability), array.len).to_array() - } - Transition::AllFalse => { - ConstantArray::new(Scalar::bool(false, nullability), array.len).to_array() - } - Transition::FalseToTrue(idx) => { - // [0..idx) is false, [idx..len) is true - let ends = buffer![idx as u64, array.len as u64].into_array(); - let values = BoolArray::new(bitbuffer![false, true], nullability.into()).into_array(); - RunEndArray::try_new(ends, values)?.into_array() - } - Transition::TrueToFalse(idx) => { - // [0..idx) is true, [idx..len) is false - let ends = buffer![idx as u64, array.len as u64].into_array(); - let values = BoolArray::new(bitbuffer![true, false], nullability.into()).into_array(); - RunEndArray::try_new(ends, values)?.into_array() - } - }; - - Ok(Some(result_array)) -} - -enum Transition { - AllTrue, - AllFalse, - FalseToTrue(usize), - TrueToFalse(usize), -} - -fn find_transition_point( - base: PValue, - multiplier: PValue, - len: usize, - constant: PValue, - operator: Operator, -) -> Transition { - match_each_integer_ptype!(base.ptype(), |P| { - find_transition_point_typed::

( - base.cast::

(), - multiplier.cast::

(), - len, - constant.cast::

(), - operator, - ) - }) -} - -fn find_transition_point_typed( - base: P, - multiplier: P, - len: usize, - constant: P, - operator: Operator, -) -> Transition { - if len == 0 { - return Transition::AllFalse; - } - - let last_idx = P::from_usize(len - 1).vortex_expect("len must fit into type"); - let first_value = base; - let last_value = base + multiplier * last_idx; - - let first_result = eval_comparison(first_value, constant, operator); - let last_result = eval_comparison(last_value, constant, operator); - - if first_result && last_result { - return Transition::AllTrue; - } - if !first_result && !last_result { - return Transition::AllFalse; - } - - // There's a transition point - find it using binary search - let transition_idx = binary_search_transition(base, multiplier, len, constant, operator); - - if first_result { - Transition::TrueToFalse(transition_idx) - } else { - Transition::FalseToTrue(transition_idx) - } -} - -fn eval_comparison(lhs: P, rhs: P, operator: Operator) -> bool { - match operator { - Operator::Lt => lhs.is_lt(rhs), - Operator::Lte => lhs.is_le(rhs), - Operator::Gt => lhs.is_gt(rhs), - Operator::Gte => lhs.is_ge(rhs), - Operator::Eq => lhs.is_eq(rhs), - Operator::NotEq => !lhs.is_eq(rhs), - } -} - -fn binary_search_transition( - base: P, - multiplier: P, - len: usize, - constant: P, - operator: Operator, -) -> usize { - let first_result = eval_comparison(base, constant, operator); - - let mut lo = 0usize; - let mut hi = len; - - while lo < hi { - let mid = lo + (hi - lo) / 2; - let mid_p = P::from_usize(mid).vortex_expect("idx must fit into type"); - let value = base + multiplier * mid_p; - let result = eval_comparison(value, constant, operator); - - if result == first_result { - lo = mid + 1; - } else { - hi = mid; - } - } - - lo -} - -#[cfg(test)] -mod tests { - use vortex_array::ToCanonical; - use vortex_array::arrays::BoolArray; - use vortex_array::arrays::ConstantArray; - use vortex_array::arrays::ScalarFnArrayExt; - use vortex_array::assert_arrays_eq; - use vortex_array::expr::Binary; - use vortex_array::expr::Operator as ExprOperator; - use vortex_array::validity::Validity; - use vortex_buffer::BitBuffer; - use vortex_buffer::bitbuffer; - use vortex_dtype::DType; - use vortex_dtype::Nullability; - use vortex_dtype::Nullability::NonNullable; - use vortex_dtype::PType; - use vortex_error::VortexResult; - use vortex_scalar::Scalar; - - use crate::SequenceArray; - - #[test] - fn test_sequence_eq_neq_constant() -> VortexResult<()> { - let len = 1; - let seq = SequenceArray::typed_new(5i64, 1, NonNullable, len)?.to_array(); - let constant = ConstantArray::new(5i64, len).to_array(); - - let compare_array = - Binary.try_new_array(len, ExprOperator::NotEq, [seq.clone(), constant.clone()])?; - - let result = compare_array; - let bool_result = result.to_bool(); - - let expected = BitBuffer::from(vec![false]); - assert_eq!(bool_result.to_bit_buffer(), expected); - - let compare_array = Binary.try_new_array(len, ExprOperator::Eq, [seq, constant])?; - - let result = compare_array; - let bool_result = result.to_bool(); - - let expected = BitBuffer::from(vec![true]); - assert_eq!(bool_result.to_bit_buffer(), expected); - Ok(()) - } - - #[test] - fn test_sequence_gte_constant() -> VortexResult<()> { - let seq = SequenceArray::typed_new(0i64, 1, NonNullable, 10)?.to_array(); - let constant = ConstantArray::new( - Scalar::try_new( - DType::Primitive(PType::I64, Nullability::Nullable), - Some(5i64.into()), - ) - .unwrap(), - 10, - ) - .to_array(); - - let compare_array = Binary.try_new_array(10, ExprOperator::Gte, [seq, constant])?; - - let result = compare_array; - let bool_result = result.to_bool(); - - let expected = BoolArray::new( - bitbuffer![ - false, false, false, false, false, true, true, true, true, true, - ], - Validity::AllValid, - ); - assert_arrays_eq!(bool_result, expected); - Ok(()) - } - - #[test] - fn test_sequence_lt_constant() -> VortexResult<()> { - let seq = SequenceArray::typed_new(0i64, 1, NonNullable, 10)?.to_array(); - let constant = ConstantArray::new(5i64, 10).to_array(); - - let compare_array = Binary.try_new_array(10, ExprOperator::Lt, [seq, constant])?; - - let result = compare_array; - let bool_result = result.to_bool(); - - let expected = BitBuffer::from(vec![ - true, true, true, true, true, false, false, false, false, false, - ]); - assert_eq!(bool_result.to_bit_buffer(), expected); - Ok(()) - } - - #[test] - fn test_sequence_lte_constant() -> VortexResult<()> { - let seq = SequenceArray::typed_new(0i64, 1, NonNullable, 10)?.to_array(); - let constant = ConstantArray::new(5i64, 10).to_array(); - - let compare_array = Binary.try_new_array(10, ExprOperator::Lte, [seq, constant])?; - - let result = compare_array; - let bool_result = result.to_bool(); - - // [0,1,2,3,4,5,6,7,8,9] <= 5 - let expected = BitBuffer::from(vec![ - true, true, true, true, true, true, false, false, false, false, - ]); - assert_eq!(bool_result.to_bit_buffer(), expected); - Ok(()) - } - - #[test] - fn test_sequence_gt_constant() -> VortexResult<()> { - let seq = SequenceArray::typed_new(0i64, 1, NonNullable, 10)?.to_array(); - let constant = ConstantArray::new(5i64, 10).to_array(); - - let compare_array = Binary.try_new_array(10, ExprOperator::Gt, [seq, constant])?; - - let result = compare_array; - let bool_result = result.to_bool(); - - // [0,1,2,3,4,5,6,7,8,9] > 5 - let expected = BitBuffer::from(vec![ - false, false, false, false, false, false, true, true, true, true, - ]); - assert_eq!(bool_result.to_bit_buffer(), expected); - Ok(()) - } - - #[test] - fn test_constant_gte_sequence() -> VortexResult<()> { - // Test when constant is on the left side - let constant = ConstantArray::new(5i64, 10).to_array(); - let seq = SequenceArray::typed_new(0i64, 1, NonNullable, 10)?.to_array(); - - let compare_array = Binary.try_new_array(10, ExprOperator::Gte, [constant, seq])?; - - let result = compare_array; - let bool_result = result.to_bool(); - - // 5 >= [0,1,2,3,4,5,6,7,8,9] - let expected = BitBuffer::from(vec![ - true, true, true, true, true, true, false, false, false, false, - ]); - assert_eq!(bool_result.to_bit_buffer(), expected); - Ok(()) - } - - #[test] - fn test_sequence_eq_constant() -> VortexResult<()> { - let seq = SequenceArray::typed_new(0i64, 1, NonNullable, 10)?.to_array(); - let constant = ConstantArray::new(5i64, 10).to_array(); - - let compare_array = Binary.try_new_array(10, ExprOperator::Eq, [seq, constant])?; - - let result = compare_array; - let bool_result = result.to_bool(); - - let expected = BitBuffer::from(vec![ - false, false, false, false, false, true, false, false, false, false, - ]); - assert_eq!(bool_result.to_bit_buffer(), expected); - Ok(()) - } - - #[test] - fn test_sequence_not_eq_constant() -> VortexResult<()> { - let seq = SequenceArray::typed_new(0i64, 1, NonNullable, 10)?.to_array(); - let constant = ConstantArray::new(5i64, 10).to_array(); - - let compare_array = Binary.try_new_array(10, ExprOperator::NotEq, [seq, constant])?; - - let result = compare_array; - let bool_result = result.to_bool(); - - let expected = BitBuffer::from(vec![ - true, true, true, true, true, false, true, true, true, true, - ]); - assert_eq!(bool_result.to_bit_buffer(), expected); - Ok(()) - } - - #[test] - fn test_sequence_all_true() -> VortexResult<()> { - let seq = SequenceArray::typed_new(10i64, 1, NonNullable, 5)?.to_array(); - let constant = ConstantArray::new(5i64, 5).to_array(); - - let compare_array = Binary.try_new_array(5, ExprOperator::Gt, [seq, constant])?; - - let result = compare_array; - let bool_result = result.to_bool(); - - let expected = BitBuffer::from(vec![true, true, true, true, true]); - assert_eq!(bool_result.to_bit_buffer(), expected); - Ok(()) - } - - #[test] - fn test_sequence_all_false() -> VortexResult<()> { - let seq = SequenceArray::typed_new(0i64, 1, NonNullable, 5)?.to_array(); - let constant = ConstantArray::new(100i64, 5).to_array(); - - let compare_array = Binary.try_new_array(5, ExprOperator::Gt, [seq, constant])?; - - let result = compare_array; - let bool_result = result.to_bool(); - - let expected = BitBuffer::from(vec![false, false, false, false, false]); - assert_eq!(bool_result.to_bit_buffer(), expected); - Ok(()) - } - - #[test] - fn test_sequence_multiplier_2_gte() -> VortexResult<()> { - // Sequence: [0, 2, 4, 6, 8, 10, 12, 14, 16, 18] - let seq = SequenceArray::typed_new(0i64, 2, NonNullable, 10)?.to_array(); - let constant = ConstantArray::new(10i64, 10).to_array(); - - let compare_array = Binary.try_new_array(10, ExprOperator::Gte, [seq, constant])?; - - let result = compare_array; - let bool_result = result.to_bool(); - - // [0, 2, 4, 6, 8, 10, 12, 14, 16, 18] >= 10 - let expected = BitBuffer::from(vec![ - false, false, false, false, false, true, true, true, true, true, - ]); - assert_eq!(bool_result.to_bit_buffer(), expected); - Ok(()) - } - - #[test] - fn test_sequence_multiplier_3_eq() -> VortexResult<()> { - // Sequence: [5, 8, 11, 14, 17, 20, 23, 26] - let seq = SequenceArray::typed_new(5i64, 3, NonNullable, 8)?.to_array(); - let constant = ConstantArray::new(14i64, 8).to_array(); - - let compare_array = Binary.try_new_array(8, ExprOperator::Eq, [seq, constant])?; - - let result = compare_array; - let bool_result = result.to_bool(); - - // 14 is at index 3: (14 - 5) / 3 = 3 - let expected = BitBuffer::from(vec![false, false, false, true, false, false, false, false]); - assert_eq!(bool_result.to_bit_buffer(), expected); - Ok(()) - } - - #[test] - fn test_sequence_negative_multiplier_lt() -> VortexResult<()> { - // Sequence: [100, 90, 80, 70, 60, 50, 40, 30, 20, 10] - let seq = SequenceArray::typed_new(100i64, -10, NonNullable, 10)?.to_array(); - let constant = ConstantArray::new(50i64, 10).to_array(); - - let compare_array = Binary.try_new_array(10, ExprOperator::Lt, [seq, constant])?; - - let result = compare_array; - let bool_result = result.to_bool(); - - // [100, 90, 80, 70, 60, 50, 40, 30, 20, 10] < 50 - let expected = BitBuffer::from(vec![ - false, false, false, false, false, false, true, true, true, true, - ]); - assert_eq!(bool_result.to_bit_buffer(), expected); - Ok(()) - } -} diff --git a/vortex-array/src/arrays/varbin/compute/compare.rs b/vortex-array/src/arrays/varbin/compute/compare.rs index adbdff15e99..50491952580 100644 --- a/vortex-array/src/arrays/varbin/compute/compare.rs +++ b/vortex-array/src/arrays/varbin/compute/compare.rs @@ -40,10 +40,6 @@ impl CompareKernel for VarBinVTable { _ctx: &mut ExecutionCtx, ) -> VortexResult> { if let Some(rhs_const) = rhs.as_constant() { - if rhs_const.is_null() { - return Ok(None); - } - let nullable = lhs.dtype().is_nullable() || rhs_const.dtype().is_nullable(); let len = lhs.len(); diff --git a/vortex-array/src/expr/exprs/binary/compare.rs b/vortex-array/src/expr/exprs/binary/compare.rs index 9dbc632c081..8f60da83072 100644 --- a/vortex-array/src/expr/exprs/binary/compare.rs +++ b/vortex-array/src/expr/exprs/binary/compare.rs @@ -79,6 +79,27 @@ where _ => return Ok(None), }; + let len = array.len(); + let nullable = array.dtype().is_nullable() || other.dtype().is_nullable(); + + // Empty array → empty bool result + if len == 0 { + return Ok(Some( + Canonical::empty(&vortex_dtype::DType::Bool(nullable.into())).into_array(), + )); + } + + // Null constant on either side → all-null bool result + if other.as_constant().is_some_and(|s| s.is_null()) { + return Ok(Some( + ConstantArray::new( + Scalar::null(vortex_dtype::DType::Bool(nullable.into())), + len, + ) + .into_array(), + )); + } + V::compare(array, other.as_ref(), cmp_op, ctx) } } From d11784434ac248fc9d52ec13e641449ccbbff71f Mon Sep 17 00:00:00 2001 From: Joe Isaacs Date: Mon, 16 Feb 2026 15:50:02 +0000 Subject: [PATCH 13/20] u Signed-off-by: Joe Isaacs --- vortex-array/src/expr/exprs/binary/compare.rs | 1 + 1 file changed, 1 insertion(+) diff --git a/vortex-array/src/expr/exprs/binary/compare.rs b/vortex-array/src/expr/exprs/binary/compare.rs index 8f60da83072..cd44365b2cf 100644 --- a/vortex-array/src/expr/exprs/binary/compare.rs +++ b/vortex-array/src/expr/exprs/binary/compare.rs @@ -73,6 +73,7 @@ where let children = scalar_fn_array.children(); // Normalize so `array` is always LHS, swapping the operator if needed + // TODO(joe): should be go this here or in the Rule/Kernel let (cmp_op, other) = match child_idx { 0 => (cmp_op, &children[1]), 1 => (cmp_op.swap(), &children[0]), From eef33601e400171ab444fd879a544b32328818a5 Mon Sep 17 00:00:00 2001 From: Joe Isaacs Date: Mon, 16 Feb 2026 15:59:25 +0000 Subject: [PATCH 14/20] u Signed-off-by: Joe Isaacs --- vortex-array/src/arrays/masked/vtable/kernel.rs | 1 + vortex-array/src/expr/exprs/binary/mod.rs | 14 -------------- vortex-test/e2e/src/lib.rs | 2 +- 3 files changed, 2 insertions(+), 15 deletions(-) diff --git a/vortex-array/src/arrays/masked/vtable/kernel.rs b/vortex-array/src/arrays/masked/vtable/kernel.rs index 869e8601aba..cacfbd98cd4 100644 --- a/vortex-array/src/arrays/masked/vtable/kernel.rs +++ b/vortex-array/src/arrays/masked/vtable/kernel.rs @@ -5,5 +5,6 @@ use crate::arrays::MaskedVTable; use crate::arrays::TakeExecuteAdaptor; use crate::kernel::ParentKernelSet; +// TODO(joe): add CompareExecuteAdaptor to push comparisons through the mask without canonicalizing. pub(super) const PARENT_KERNELS: ParentKernelSet = ParentKernelSet::new(&[ParentKernelSet::lift(&TakeExecuteAdaptor(MaskedVTable))]); diff --git a/vortex-array/src/expr/exprs/binary/mod.rs b/vortex-array/src/expr/exprs/binary/mod.rs index 8bbeb5a4675..16166e088f9 100644 --- a/vortex-array/src/expr/exprs/binary/mod.rs +++ b/vortex-array/src/expr/exprs/binary/mod.rs @@ -105,20 +105,6 @@ impl VTable for Binary { && !lhs.is_extension() && !rhs.is_extension() { - if lhs.is_float() && rhs.is_float() { - vortex_bail!( - "Cannot compare different floating-point types ({}, {}). Consider using cast.", - lhs, - rhs, - ); - } - if lhs.is_int() && rhs.is_int() { - vortex_bail!( - "Cannot compare different fixed-width types ({}, {}). Consider using cast.", - lhs, - rhs, - ); - } vortex_bail!("Cannot compare different DTypes {} and {}", lhs, rhs); } diff --git a/vortex-test/e2e/src/lib.rs b/vortex-test/e2e/src/lib.rs index 765d4de32bd..2e8d3c77be0 100644 --- a/vortex-test/e2e/src/lib.rs +++ b/vortex-test/e2e/src/lib.rs @@ -28,7 +28,7 @@ mod tests { #[cfg(feature = "unstable_encodings")] const EXPECTED_SIZE: usize = 216188; #[cfg(not(feature = "unstable_encodings"))] - const EXPECTED_SIZE: usize = 216188; + const EXPECTED_SIZE: usize = 216156; let futures: Vec<_> = (0..5) .map(|_| { let array = array.clone(); From 1af5afd8164def238346f6eb7beebf3efb55cf21 Mon Sep 17 00:00:00 2001 From: Joe Isaacs Date: Mon, 16 Feb 2026 16:03:55 +0000 Subject: [PATCH 15/20] u Signed-off-by: Joe Isaacs --- encodings/sequence/public-api.lock | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/encodings/sequence/public-api.lock b/encodings/sequence/public-api.lock index a0ca040a86c..b939289103e 100644 --- a/encodings/sequence/public-api.lock +++ b/encodings/sequence/public-api.lock @@ -92,6 +92,10 @@ impl vortex_array::compute::min_max::MinMaxKernel for vortex_sequence::SequenceV pub fn vortex_sequence::SequenceVTable::min_max(&self, array: &vortex_sequence::SequenceArray) -> vortex_error::VortexResult> +impl vortex_array::expr::exprs::binary::compare::CompareKernel for vortex_sequence::SequenceVTable + +pub fn vortex_sequence::SequenceVTable::compare(lhs: &vortex_sequence::SequenceArray, rhs: &dyn vortex_array::array::Array, operator: vortex_array::compute::compare::Operator, _ctx: &mut vortex_array::executor::ExecutionCtx) -> vortex_error::VortexResult> + impl vortex_array::expr::exprs::cast::kernel::CastReduce for vortex_sequence::SequenceVTable pub fn vortex_sequence::SequenceVTable::cast(array: &vortex_sequence::SequenceArray, dtype: &vortex_dtype::dtype::DType) -> vortex_error::VortexResult> From 0c1ecf38211730f0216d417668bb83ce72df6559 Mon Sep 17 00:00:00 2001 From: Joe Isaacs Date: Mon, 16 Feb 2026 16:17:00 +0000 Subject: [PATCH 16/20] u Signed-off-by: Joe Isaacs --- vortex-array/src/compute/compare.rs | 54 ----------------------------- 1 file changed, 54 deletions(-) diff --git a/vortex-array/src/compute/compare.rs b/vortex-array/src/compute/compare.rs index 66ba9bc1ed3..8e6719f2c31 100644 --- a/vortex-array/src/compute/compare.rs +++ b/vortex-array/src/compute/compare.rs @@ -403,58 +403,4 @@ mod tests { assert!(result.scalar_at(1).unwrap().is_valid()); assert!(result.scalar_at(2).unwrap().is_valid()); } - - #[test] - fn test_different_floats_error_messages() { - let result = compare( - &buffer![0.0f32].into_array(), - &buffer![0.0f64].into_array(), - Operator::Lt, - ); - assert!(result.as_ref().is_err_and(|err| { - err.to_string() - .contains("Cannot compare different floating-point types") - })); - - let expr = lt(get_item("l", root()), get_item("r", root())); - let array = StructArray::from_fields(&[ - ("l", buffer![0.0f32].into_array()), - ("r", buffer![0.0f64].into_array()), - ]) - .unwrap() - .into_array(); - // Force evaluation by calling scalar_at - let result = array.apply(&expr).and_then(|arr| arr.scalar_at(0)); - assert!(result.as_ref().is_err_and(|err| { - err.to_string() - .contains("Cannot compare different floating-point types") - })); - } - - #[test] - fn test_different_ints_error_messages() { - let result = compare( - &buffer![0u8].into_array(), - &buffer![0u16].into_array(), - Operator::Lt, - ); - assert!(result.as_ref().is_err_and(|err| { - err.to_string() - .contains("Cannot compare different fixed-width types") - })); - - let expr = lt(get_item("l", root()), get_item("r", root())); - let array = StructArray::from_fields(&[ - ("l", buffer![0u8].into_array()), - ("r", buffer![0u16].into_array()), - ]) - .unwrap() - .into_array(); - // Force evaluation by calling scalar_at - let result = array.apply(&expr).and_then(|arr| arr.scalar_at(0)); - assert!(result.as_ref().is_err_and(|err| { - err.to_string() - .contains("Cannot compare different fixed-width types") - })); - } } From d6f62b528f1abeea1f2f35a0a0d2129c9230ab84 Mon Sep 17 00:00:00 2001 From: Joe Isaacs Date: Mon, 16 Feb 2026 16:34:09 +0000 Subject: [PATCH 17/20] u Signed-off-by: Joe Isaacs --- vortex-array/src/compute/compare.rs | 3 --- 1 file changed, 3 deletions(-) diff --git a/vortex-array/src/compute/compare.rs b/vortex-array/src/compute/compare.rs index 8e6719f2c31..c773a1234d2 100644 --- a/vortex-array/src/compute/compare.rs +++ b/vortex-array/src/compute/compare.rs @@ -176,9 +176,6 @@ mod tests { use crate::arrays::VarBinArray; use crate::arrays::VarBinViewArray; use crate::assert_arrays_eq; - use crate::expr::get_item; - use crate::expr::lt; - use crate::expr::root; use crate::test_harness::to_int_indices; use crate::validity::Validity; From 301682b5aa6453bc560a15a603b5e9b493e52e18 Mon Sep 17 00:00:00 2001 From: Joe Isaacs Date: Mon, 16 Feb 2026 17:39:09 +0000 Subject: [PATCH 18/20] u Signed-off-by: Joe Isaacs --- encodings/fsst/benches/fsst_compress.rs | 29 ++++++++++++++++++++----- 1 file changed, 24 insertions(+), 5 deletions(-) diff --git a/encodings/fsst/benches/fsst_compress.rs b/encodings/fsst/benches/fsst_compress.rs index c0dd99db5f0..0ce978e9243 100644 --- a/encodings/fsst/benches/fsst_compress.rs +++ b/encodings/fsst/benches/fsst_compress.rs @@ -10,6 +10,8 @@ use rand::Rng; use rand::SeedableRng; use rand::rngs::StdRng; use vortex_array::IntoArray; +use vortex_array::LEGACY_SESSION; +use vortex_array::RecursiveCanonical; use vortex_array::VortexSessionExecute; use vortex_array::arrays::ChunkedArray; use vortex_array::arrays::ConstantArray; @@ -87,9 +89,18 @@ fn pushdown_compare(bencher: Bencher, (string_count, avg_len, unique_chars): (us let constant = ConstantArray::new(Scalar::from(&b"const"[..]), array.len()); bencher - .with_inputs(|| (&fsst_array, &constant)) - .bench_refs(|(fsst_array, constant)| { - compare(fsst_array.as_ref(), constant.as_ref(), Operator::Eq).unwrap(); + .with_inputs(|| { + ( + &fsst_array, + &constant, + LEGACY_SESSION.create_execution_ctx(), + ) + }) + .bench_refs(|(fsst_array, constant, mut ctx)| { + compare(fsst_array.as_ref(), constant.as_ref(), Operator::Eq) + .unwrap() + .execute::(&mut ctx) + .unwrap(); }) } @@ -104,14 +115,22 @@ fn canonicalize_compare( let constant = ConstantArray::new(Scalar::from(&b"const"[..]), array.len()); bencher - .with_inputs(|| (&fsst_array, &constant)) - .bench_refs(|(fsst_array, constant)| { + .with_inputs(|| { + ( + &fsst_array, + &constant, + LEGACY_SESSION.create_execution_ctx(), + ) + }) + .bench_refs(|(fsst_array, constant, mut ctx)| { compare( fsst_array.to_canonical().unwrap().as_ref(), constant.as_ref(), Operator::Eq, ) .unwrap() + .execute::(&mut ctx) + .unwrap(); }); } From 29c211acc44f3cdb9d31301bbf0c06fc6b0f2b69 Mon Sep 17 00:00:00 2001 From: Joe Isaacs Date: Mon, 16 Feb 2026 17:45:55 +0000 Subject: [PATCH 19/20] u Signed-off-by: Joe Isaacs --- encodings/fsst/benches/fsst_compress.rs | 26 +++++++++++++------------ 1 file changed, 14 insertions(+), 12 deletions(-) diff --git a/encodings/fsst/benches/fsst_compress.rs b/encodings/fsst/benches/fsst_compress.rs index 0ce978e9243..0b43ad5353d 100644 --- a/encodings/fsst/benches/fsst_compress.rs +++ b/encodings/fsst/benches/fsst_compress.rs @@ -96,10 +96,10 @@ fn pushdown_compare(bencher: Bencher, (string_count, avg_len, unique_chars): (us LEGACY_SESSION.create_execution_ctx(), ) }) - .bench_refs(|(fsst_array, constant, mut ctx)| { + .bench_refs(|(fsst_array, constant, ctx)| { compare(fsst_array.as_ref(), constant.as_ref(), Operator::Eq) .unwrap() - .execute::(&mut ctx) + .execute::(ctx) .unwrap(); }) } @@ -122,14 +122,14 @@ fn canonicalize_compare( LEGACY_SESSION.create_execution_ctx(), ) }) - .bench_refs(|(fsst_array, constant, mut ctx)| { + .bench_refs(|(fsst_array, constant, ctx)| { compare( fsst_array.to_canonical().unwrap().as_ref(), constant.as_ref(), Operator::Eq, ) .unwrap() - .execute::(&mut ctx) + .execute::(ctx) .unwrap(); }); } @@ -154,14 +154,16 @@ fn chunked_canonicalize_into( ) { let array = generate_chunked_test_data(chunk_size, string_count, avg_len, unique_chars); - bencher.with_inputs(|| &array).bench_refs(|array| { - let mut builder = - VarBinViewBuilder::with_capacity(DType::Binary(Nullability::NonNullable), array.len()); - array - .append_to_builder(&mut builder, &mut SESSION.create_execution_ctx()) - .unwrap(); - builder.finish() - }); + bencher + .with_inputs(|| (&array, &mut SESSION.create_execution_ctx())) + .bench_refs(|(array, ctx)| { + let mut builder = VarBinViewBuilder::with_capacity( + DType::Binary(Nullability::NonNullable), + array.len(), + ); + array.append_to_builder(&mut builder, ctx).unwrap(); + builder.finish() + }); } #[divan::bench(args = CHUNKED_BENCH_ARGS)] From ac4743dfe5f07a44daa0dc502cb40df1a2c05e6e Mon Sep 17 00:00:00 2001 From: Joe Isaacs Date: Mon, 16 Feb 2026 17:51:31 +0000 Subject: [PATCH 20/20] u Signed-off-by: Joe Isaacs --- encodings/fsst/benches/fsst_compress.rs | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/encodings/fsst/benches/fsst_compress.rs b/encodings/fsst/benches/fsst_compress.rs index 0b43ad5353d..d5f8660be8c 100644 --- a/encodings/fsst/benches/fsst_compress.rs +++ b/encodings/fsst/benches/fsst_compress.rs @@ -155,7 +155,7 @@ fn chunked_canonicalize_into( let array = generate_chunked_test_data(chunk_size, string_count, avg_len, unique_chars); bencher - .with_inputs(|| (&array, &mut SESSION.create_execution_ctx())) + .with_inputs(|| (&array, SESSION.create_execution_ctx())) .bench_refs(|(array, ctx)| { let mut builder = VarBinViewBuilder::with_capacity( DType::Binary(Nullability::NonNullable),