From 3537e1b2778249de26390a3fbe01ff1b5d52e598 Mon Sep 17 00:00:00 2001 From: Robert Kruszewski Date: Fri, 1 May 2026 09:31:49 -0400 Subject: [PATCH 01/17] Introduce ParentRef and ArrayParts::optimize chain `ArrayRef::slice/filter/take` previously allocated a wrapper array (`SliceArray`, `FilterArray`, `DictArray`) and called `.optimize()` on the resulting `ArrayRef`, relying on a `reduce_parent` rule to throw the wrapper away. The wrapper allocation was always paid even when reduction ran in one shot. This change establishes the API surface for moving the wrapper onto the stack: - `ParentRef<'a>`: a parent representation that optionally borrows an `&ArrayRef` and otherwise carries the encoding-specific data, dtype, length, slots, and encoding id directly. `into_array_ref(self)` clones the underlying `Arc` when the parent is heap-backed. - `ParentView<'a, V>`: a typed view of a parent that derefs to `V::ArrayData` without holding an `&ArrayRef`. Used by the upcoming matcher path that accepts stack-allocated parents. - `DynArray::data_any` exposes the encoding-specific data so a matcher can downcast to `V::ArrayData` from a `ParentRef` regardless of whether the parent is heap- or stack-backed. - `ArrayParts::optimize`, `optimize_ctx(session)`, and `into_array`, plus `Optimized` with its own `into_array`. Callers chain `parts.optimize()?.into_array()` so reduction is an explicit, orthogonal step from materialization. - `ArrayRef::slice / filter / take` now build an `ArrayParts` and drive it through the chain. The internals of `ArrayParts::optimize` still materialize before running the existing `reduce_parent` chain, so this PR does not yet remove the `Arc>` allocation. Wiring `ParentRef` through `DynArray::reduce_parent`, `VTable::reduce_parent`, `ParentRuleSet`, `DynArrayParentReduceRule`, `ReduceParentFn`, the `Matcher` API, and the per-encoding rule bodies is the follow-up that delivers the allocation savings. - `cargo build --workspace` - `cargo nextest run -p vortex-array -p vortex-fastlanes -p vortex-fsst -p vortex-alp -p vortex-runend -p vortex-zigzag` (all 3078 pass; the 21 skipped are timezone-dependent and unrelated to this change) - `cargo clippy -p vortex-array --all-targets --all-features` - `cargo +nightly fmt --all -- --check` - `./scripts/public-api.sh` Signed-off-by: Robert Kruszewski --- encodings/alp/src/alp/array.rs | 3 +- encodings/alp/src/alp_rd/array.rs | 3 +- encodings/bytebool/src/array.rs | 3 +- encodings/datetime-parts/src/array.rs | 3 +- .../src/decimal_byte_parts/mod.rs | 3 +- .../fastlanes/src/bitpacking/compute/slice.rs | 5 +- .../fastlanes/src/bitpacking/vtable/mod.rs | 3 +- .../src/bitpacking/vtable/operations.rs | 5 +- encodings/fastlanes/src/delta/vtable/mod.rs | 3 +- encodings/fastlanes/src/for/vtable/mod.rs | 3 +- encodings/fastlanes/src/rle/vtable/mod.rs | 3 +- encodings/fsst/src/array.rs | 3 +- encodings/pco/src/array.rs | 3 +- encodings/runend/src/array.rs | 3 +- encodings/sequence/src/array.rs | 3 +- encodings/sparse/src/lib.rs | 3 +- encodings/zigzag/src/array.rs | 3 +- encodings/zstd/src/array.rs | 3 +- vortex-array/src/array/erased.rs | 72 ++-- vortex-array/src/array/mod.rs | 15 +- vortex-array/src/array/parent.rs | 399 ++++++++++++++++++ vortex-array/src/array/typed.rs | 30 ++ vortex-array/src/array/view.rs | 98 ++++- vortex-array/src/array/vtable/mod.rs | 3 +- vortex-array/src/arrays/bool/vtable/mod.rs | 3 +- vortex-array/src/arrays/chunked/vtable/mod.rs | 4 +- .../src/arrays/constant/vtable/mod.rs | 3 +- vortex-array/src/arrays/decimal/vtable/mod.rs | 3 +- vortex-array/src/arrays/dict/array.rs | 44 +- vortex-array/src/arrays/dict/take.rs | 51 +++ vortex-array/src/arrays/dict/vtable/mod.rs | 3 +- .../src/arrays/extension/vtable/mod.rs | 3 +- vortex-array/src/arrays/filter/array.rs | 33 +- vortex-array/src/arrays/filter/kernel.rs | 41 ++ vortex-array/src/arrays/filter/vtable.rs | 4 +- .../src/arrays/fixed_size_list/vtable/mod.rs | 4 +- vortex-array/src/arrays/list/vtable/mod.rs | 4 +- .../src/arrays/listview/vtable/mod.rs | 4 +- vortex-array/src/arrays/masked/vtable/mod.rs | 3 +- vortex-array/src/arrays/null/mod.rs | 3 +- vortex-array/src/arrays/patched/vtable/mod.rs | 3 +- .../src/arrays/primitive/vtable/mod.rs | 3 +- .../src/arrays/scalar_fn/vtable/mod.rs | 44 +- vortex-array/src/arrays/shared/vtable.rs | 1 + vortex-array/src/arrays/slice/array.rs | 29 +- vortex-array/src/arrays/slice/mod.rs | 20 + vortex-array/src/arrays/slice/vtable.rs | 4 +- .../src/arrays/struct_/compute/cast.rs | 8 +- .../src/arrays/struct_/compute/rules.rs | 9 +- vortex-array/src/arrays/struct_/vtable/mod.rs | 4 +- vortex-array/src/arrays/varbin/vtable/mod.rs | 4 +- .../src/arrays/varbinview/vtable/mod.rs | 3 +- vortex-array/src/arrays/variant/vtable/mod.rs | 3 +- vortex-array/src/canonical.rs | 45 ++ vortex-array/src/columnar.rs | 24 +- vortex-array/src/executor.rs | 7 +- vortex-array/src/matcher.rs | 43 +- vortex-array/src/optimizer/kernels.rs | 16 +- vortex-array/src/optimizer/mod.rs | 6 +- vortex-array/src/optimizer/rules.rs | 64 ++- .../src/dynamic_dispatch/plan_builder.rs | 4 +- 61 files changed, 1034 insertions(+), 195 deletions(-) create mode 100644 vortex-array/src/array/parent.rs diff --git a/encodings/alp/src/alp/array.rs b/encodings/alp/src/alp/array.rs index cac93d1d27d..8dd43935644 100644 --- a/encodings/alp/src/alp/array.rs +++ b/encodings/alp/src/alp/array.rs @@ -19,6 +19,7 @@ use vortex_array::ArrayView; use vortex_array::ExecutionCtx; use vortex_array::ExecutionResult; use vortex_array::IntoArray; +use vortex_array::ParentRef; use vortex_array::Precision; use vortex_array::TypedArrayRef; use vortex_array::array_slots; @@ -183,7 +184,7 @@ impl VTable for ALP { fn reduce_parent( array: ArrayView<'_, Self>, - parent: &ArrayRef, + parent: &ParentRef<'_>, child_idx: usize, ) -> VortexResult> { RULES.evaluate(array, parent, child_idx) diff --git a/encodings/alp/src/alp_rd/array.rs b/encodings/alp/src/alp_rd/array.rs index 099b9e1a3ea..5539d285367 100644 --- a/encodings/alp/src/alp_rd/array.rs +++ b/encodings/alp/src/alp_rd/array.rs @@ -22,6 +22,7 @@ use vortex_array::ExecutionCtx; use vortex_array::ExecutionResult; use vortex_array::IntoArray; use vortex_array::LEGACY_SESSION; +use vortex_array::ParentRef; use vortex_array::Precision; use vortex_array::TypedArrayRef; use vortex_array::VortexSessionExecute; @@ -302,7 +303,7 @@ impl VTable for ALPRD { fn reduce_parent( array: ArrayView<'_, Self>, - parent: &ArrayRef, + parent: &ParentRef<'_>, child_idx: usize, ) -> VortexResult> { RULES.evaluate(array, parent, child_idx) diff --git a/encodings/bytebool/src/array.rs b/encodings/bytebool/src/array.rs index 40da3809d01..28656a6f46d 100644 --- a/encodings/bytebool/src/array.rs +++ b/encodings/bytebool/src/array.rs @@ -17,6 +17,7 @@ use vortex_array::ArrayView; use vortex_array::ExecutionCtx; use vortex_array::ExecutionResult; use vortex_array::IntoArray; +use vortex_array::ParentRef; use vortex_array::Precision; use vortex_array::TypedArrayRef; use vortex_array::arrays::BoolArray; @@ -143,7 +144,7 @@ impl VTable for ByteBool { fn reduce_parent( array: ArrayView<'_, Self>, - parent: &ArrayRef, + parent: &ParentRef<'_>, child_idx: usize, ) -> VortexResult> { crate::rules::RULES.evaluate(array, parent, child_idx) diff --git a/encodings/datetime-parts/src/array.rs b/encodings/datetime-parts/src/array.rs index e603b1ddfe5..db773010bbf 100644 --- a/encodings/datetime-parts/src/array.rs +++ b/encodings/datetime-parts/src/array.rs @@ -18,6 +18,7 @@ use vortex_array::ArrayView; use vortex_array::ExecutionCtx; use vortex_array::ExecutionResult; use vortex_array::IntoArray; +use vortex_array::ParentRef; use vortex_array::Precision; use vortex_array::array_slots; use vortex_array::arrays::Primitive; @@ -195,7 +196,7 @@ impl VTable for DateTimeParts { fn reduce_parent( array: ArrayView<'_, Self>, - parent: &ArrayRef, + parent: &ParentRef<'_>, child_idx: usize, ) -> VortexResult> { PARENT_RULES.evaluate(array, parent, child_idx) diff --git a/encodings/decimal-byte-parts/src/decimal_byte_parts/mod.rs b/encodings/decimal-byte-parts/src/decimal_byte_parts/mod.rs index 76349f40910..c6f7f5ded94 100644 --- a/encodings/decimal-byte-parts/src/decimal_byte_parts/mod.rs +++ b/encodings/decimal-byte-parts/src/decimal_byte_parts/mod.rs @@ -20,6 +20,7 @@ use vortex_array::ArrayRef; use vortex_array::ExecutionCtx; use vortex_array::ExecutionResult; use vortex_array::IntoArray; +use vortex_array::ParentRef; use vortex_array::Precision; use vortex_array::TypedArrayRef; use vortex_array::arrays::DecimalArray; @@ -156,7 +157,7 @@ impl VTable for DecimalByteParts { fn reduce_parent( array: ArrayView<'_, Self>, - parent: &ArrayRef, + parent: &ParentRef<'_>, child_idx: usize, ) -> VortexResult> { PARENT_RULES.evaluate(array, parent, child_idx) diff --git a/encodings/fastlanes/src/bitpacking/compute/slice.rs b/encodings/fastlanes/src/bitpacking/compute/slice.rs index c019cccd003..1426795f27f 100644 --- a/encodings/fastlanes/src/bitpacking/compute/slice.rs +++ b/encodings/fastlanes/src/bitpacking/compute/slice.rs @@ -73,6 +73,7 @@ fn slice_bitpacked( mod tests { use vortex_array::IntoArray; use vortex_array::LEGACY_SESSION; + use vortex_array::ParentRef; use vortex_array::VortexSessionExecute; use vortex_array::arrays::PrimitiveArray; use vortex_array::arrays::SliceArray; @@ -87,11 +88,11 @@ mod tests { let values = PrimitiveArray::from_iter(0u32..2048); let bitpacked = bitpack_encode(&values, 11, None, &mut ctx)?; - let slice_array = SliceArray::new(bitpacked.clone().into_array(), 500..1500); + let slice_array = SliceArray::new(bitpacked.clone().into_array(), 500..1500).into_array(); let bitpacked_ref = bitpacked.into_array(); let reduced = bitpacked_ref - .reduce_parent(&slice_array.into_array(), 0)? + .reduce_parent(&ParentRef::from_array_ref(&slice_array), 0)? .expect("expected slice kernel to execute"); assert!(reduced.is::()); diff --git a/encodings/fastlanes/src/bitpacking/vtable/mod.rs b/encodings/fastlanes/src/bitpacking/vtable/mod.rs index 912dd4ff44b..4a850bdc343 100644 --- a/encodings/fastlanes/src/bitpacking/vtable/mod.rs +++ b/encodings/fastlanes/src/bitpacking/vtable/mod.rs @@ -16,6 +16,7 @@ use vortex_array::ArrayView; use vortex_array::ExecutionCtx; use vortex_array::ExecutionResult; use vortex_array::IntoArray; +use vortex_array::ParentRef; use vortex_array::Precision; use vortex_array::buffer::BufferHandle; use vortex_array::builders::ArrayBuilder; @@ -279,7 +280,7 @@ impl VTable for BitPacked { fn reduce_parent( array: ArrayView<'_, Self>, - parent: &ArrayRef, + parent: &ParentRef<'_>, child_idx: usize, ) -> VortexResult> { RULES.evaluate(array, parent, child_idx) diff --git a/encodings/fastlanes/src/bitpacking/vtable/operations.rs b/encodings/fastlanes/src/bitpacking/vtable/operations.rs index 4c277163719..01540cf210a 100644 --- a/encodings/fastlanes/src/bitpacking/vtable/operations.rs +++ b/encodings/fastlanes/src/bitpacking/vtable/operations.rs @@ -35,6 +35,7 @@ mod test { use vortex_array::ArrayRef; use vortex_array::IntoArray; use vortex_array::LEGACY_SESSION; + use vortex_array::ParentRef; use vortex_array::VortexSessionExecute; use vortex_array::arrays::PrimitiveArray; use vortex_array::arrays::SliceArray; @@ -63,9 +64,9 @@ mod test { fn slice_via_reduce(array: &BitPackedArray, range: Range) -> BitPackedArray { let array_ref = array.clone().into_array(); - let slice_array = SliceArray::new(array_ref.clone(), range); + let slice_array = SliceArray::new(array_ref.clone(), range).into_array(); let sliced = array_ref - .reduce_parent(&slice_array.into_array(), 0) + .reduce_parent(&ParentRef::from_array_ref(&slice_array), 0) .expect("execute_parent failed") .expect("expected slice kernel to execute"); sliced.as_::().into_owned() diff --git a/encodings/fastlanes/src/delta/vtable/mod.rs b/encodings/fastlanes/src/delta/vtable/mod.rs index b5e68791ceb..30b51dfcf58 100644 --- a/encodings/fastlanes/src/delta/vtable/mod.rs +++ b/encodings/fastlanes/src/delta/vtable/mod.rs @@ -15,6 +15,7 @@ use vortex_array::ArrayView; use vortex_array::ExecutionCtx; use vortex_array::ExecutionResult; use vortex_array::IntoArray; +use vortex_array::ParentRef; use vortex_array::Precision; use vortex_array::arrays::PrimitiveArray; use vortex_array::buffer::BufferHandle; @@ -110,7 +111,7 @@ impl VTable for Delta { fn reduce_parent( array: ArrayView<'_, Self>, - parent: &ArrayRef, + parent: &ParentRef<'_>, child_idx: usize, ) -> VortexResult> { rules::RULES.evaluate(array, parent, child_idx) diff --git a/encodings/fastlanes/src/for/vtable/mod.rs b/encodings/fastlanes/src/for/vtable/mod.rs index 899276341de..0a86b30c166 100644 --- a/encodings/fastlanes/src/for/vtable/mod.rs +++ b/encodings/fastlanes/src/for/vtable/mod.rs @@ -15,6 +15,7 @@ use vortex_array::ArrayView; use vortex_array::ExecutionCtx; use vortex_array::ExecutionResult; use vortex_array::IntoArray; +use vortex_array::ParentRef; use vortex_array::Precision; use vortex_array::arrays::PrimitiveArray; use vortex_array::buffer::BufferHandle; @@ -141,7 +142,7 @@ impl VTable for FoR { fn reduce_parent( array: ArrayView<'_, Self>, - parent: &ArrayRef, + parent: &ParentRef<'_>, child_idx: usize, ) -> VortexResult> { PARENT_RULES.evaluate(array, parent, child_idx) diff --git a/encodings/fastlanes/src/rle/vtable/mod.rs b/encodings/fastlanes/src/rle/vtable/mod.rs index 989b462ab99..995c2ca5646 100644 --- a/encodings/fastlanes/src/rle/vtable/mod.rs +++ b/encodings/fastlanes/src/rle/vtable/mod.rs @@ -15,6 +15,7 @@ use vortex_array::ArrayView; use vortex_array::ExecutionCtx; use vortex_array::ExecutionResult; use vortex_array::IntoArray; +use vortex_array::ParentRef; use vortex_array::Precision; use vortex_array::arrays::Primitive; use vortex_array::buffer::BufferHandle; @@ -124,7 +125,7 @@ impl VTable for RLE { fn reduce_parent( array: ArrayView<'_, Self>, - parent: &ArrayRef, + parent: &ParentRef<'_>, child_idx: usize, ) -> VortexResult> { RULES.evaluate(array, parent, child_idx) diff --git a/encodings/fsst/src/array.rs b/encodings/fsst/src/array.rs index 617908e94dd..4a6b19f9acf 100644 --- a/encodings/fsst/src/array.rs +++ b/encodings/fsst/src/array.rs @@ -25,6 +25,7 @@ use vortex_array::ExecutionCtx; use vortex_array::ExecutionResult; use vortex_array::IntoArray; use vortex_array::LEGACY_SESSION; +use vortex_array::ParentRef; use vortex_array::Precision; use vortex_array::TypedArrayRef; use vortex_array::VortexSessionExecute; @@ -319,7 +320,7 @@ impl VTable for FSST { fn reduce_parent( array: ArrayView<'_, Self>, - parent: &ArrayRef, + parent: &ParentRef<'_>, child_idx: usize, ) -> VortexResult> { RULES.evaluate(array, parent, child_idx) diff --git a/encodings/pco/src/array.rs b/encodings/pco/src/array.rs index 94db1014c79..47780f7fe43 100644 --- a/encodings/pco/src/array.rs +++ b/encodings/pco/src/array.rs @@ -28,6 +28,7 @@ use vortex_array::ArrayView; use vortex_array::ExecutionCtx; use vortex_array::ExecutionResult; use vortex_array::IntoArray; +use vortex_array::ParentRef; use vortex_array::Precision; use vortex_array::arrays::Primitive; use vortex_array::arrays::PrimitiveArray; @@ -231,7 +232,7 @@ impl VTable for Pco { fn reduce_parent( array: ArrayView<'_, Self>, - parent: &ArrayRef, + parent: &ParentRef<'_>, child_idx: usize, ) -> VortexResult> { crate::rules::RULES.evaluate(array, parent, child_idx) diff --git a/encodings/runend/src/array.rs b/encodings/runend/src/array.rs index 943f4881806..e91c290cfa0 100644 --- a/encodings/runend/src/array.rs +++ b/encodings/runend/src/array.rs @@ -19,6 +19,7 @@ use vortex_array::ExecutionCtx; use vortex_array::ExecutionResult; use vortex_array::IntoArray; use vortex_array::LEGACY_SESSION; +use vortex_array::ParentRef; use vortex_array::Precision; use vortex_array::TypedArrayRef; use vortex_array::VortexSessionExecute; @@ -166,7 +167,7 @@ impl VTable for RunEnd { fn reduce_parent( array: ArrayView<'_, Self>, - parent: &ArrayRef, + parent: &ParentRef<'_>, child_idx: usize, ) -> VortexResult> { RULES.evaluate(array, parent, child_idx) diff --git a/encodings/sequence/src/array.rs b/encodings/sequence/src/array.rs index a572a5fc165..ddf1f8eb960 100644 --- a/encodings/sequence/src/array.rs +++ b/encodings/sequence/src/array.rs @@ -18,6 +18,7 @@ use vortex_array::ArrayRef; use vortex_array::ArrayView; use vortex_array::ExecutionCtx; use vortex_array::ExecutionResult; +use vortex_array::ParentRef; use vortex_array::Precision; use vortex_array::buffer::BufferHandle; use vortex_array::dtype::DType; @@ -342,7 +343,7 @@ impl VTable for Sequence { fn reduce_parent( array: ArrayView<'_, Self>, - parent: &ArrayRef, + parent: &ParentRef<'_>, child_idx: usize, ) -> VortexResult> { RULES.evaluate(array, parent, child_idx) diff --git a/encodings/sparse/src/lib.rs b/encodings/sparse/src/lib.rs index 42b5cd46724..5e63e600e92 100644 --- a/encodings/sparse/src/lib.rs +++ b/encodings/sparse/src/lib.rs @@ -22,6 +22,7 @@ use vortex_array::Canonical; use vortex_array::ExecutionCtx; use vortex_array::ExecutionResult; use vortex_array::IntoArray; +use vortex_array::ParentRef; use vortex_array::Precision; use vortex_array::arrays::BoolArray; use vortex_array::arrays::ConstantArray; @@ -288,7 +289,7 @@ impl VTable for Sparse { fn reduce_parent( array: ArrayView<'_, Self>, - parent: &ArrayRef, + parent: &ParentRef<'_>, child_idx: usize, ) -> VortexResult> { RULES.evaluate(array, parent, child_idx) diff --git a/encodings/zigzag/src/array.rs b/encodings/zigzag/src/array.rs index 26d3bf984e2..d1dad0960d1 100644 --- a/encodings/zigzag/src/array.rs +++ b/encodings/zigzag/src/array.rs @@ -15,6 +15,7 @@ use vortex_array::ArrayView; use vortex_array::ExecutionCtx; use vortex_array::ExecutionResult; use vortex_array::IntoArray; +use vortex_array::ParentRef; use vortex_array::Precision; use vortex_array::TypedArrayRef; use vortex_array::buffer::BufferHandle; @@ -138,7 +139,7 @@ impl VTable for ZigZag { fn reduce_parent( array: ArrayView<'_, Self>, - parent: &ArrayRef, + parent: &ParentRef<'_>, child_idx: usize, ) -> VortexResult> { RULES.evaluate(array, parent, child_idx) diff --git a/encodings/zstd/src/array.rs b/encodings/zstd/src/array.rs index b327a6a2a95..6a7823ccef0 100644 --- a/encodings/zstd/src/array.rs +++ b/encodings/zstd/src/array.rs @@ -21,6 +21,7 @@ use vortex_array::Canonical; use vortex_array::ExecutionCtx; use vortex_array::ExecutionResult; use vortex_array::IntoArray; +use vortex_array::ParentRef; use vortex_array::Precision; use vortex_array::accessor::ArrayAccessor; use vortex_array::arrays::ConstantArray; @@ -244,7 +245,7 @@ impl VTable for Zstd { fn reduce_parent( array: ArrayView<'_, Self>, - parent: &ArrayRef, + parent: &ParentRef<'_>, child_idx: usize, ) -> VortexResult> { crate::rules::RULES.evaluate(array, parent, child_idx) diff --git a/vortex-array/src/array/erased.rs b/vortex-array/src/array/erased.rs index 77800377f1c..1ce88a23eb2 100644 --- a/vortex-array/src/array/erased.rs +++ b/vortex-array/src/array/erased.rs @@ -21,6 +21,7 @@ use crate::AnyCanonical; use crate::Array; use crate::ArrayEq; use crate::ArrayHash; +use crate::ArraySlots; use crate::ArrayView; use crate::Canonical; use crate::ExecutionCtx; @@ -33,17 +34,12 @@ use crate::aggregate_fn::fns::sum::sum; use crate::array::ArrayData; use crate::array::ArrayId; use crate::array::ArrayInner; -use crate::array::ArraySlots; use crate::array::DynArrayData; -use crate::arrays::Bool; +use crate::array::ParentRef; use crate::arrays::Constant; use crate::arrays::DictArray; use crate::arrays::FilterArray; -use crate::arrays::Null; -use crate::arrays::Primitive; use crate::arrays::SliceArray; -use crate::arrays::VarBin; -use crate::arrays::VarBinView; use crate::buffer::BufferHandle; use crate::builders::ArrayBuilder; use crate::dtype::DType; @@ -52,7 +48,6 @@ use crate::expr::stats::Precision; use crate::expr::stats::Stat; use crate::expr::stats::StatsProviderExt; use crate::matcher::Matcher; -use crate::optimizer::ArrayOptimizer; use crate::scalar::Scalar; use crate::stats::StatsSetRef; use crate::validity::Validity; @@ -94,6 +89,11 @@ impl ArrayRef { &self.0.data } + #[inline(always)] + pub(crate) fn inner(&self) -> &ArrayInner { + &self.0 + } + /// Returns a mutable reference to the inner if this is the sole owner. #[inline(always)] pub(crate) fn inner_mut(&mut self) -> Option<&mut ArrayInner> { @@ -228,9 +228,7 @@ impl ArrayRef { return Ok(Canonical::empty(self.dtype()).into_array()); } - let sliced = SliceArray::try_new(self.clone(), range)? - .into_array() - .optimize()?; + let sliced = SliceArray::try_new_parts(self.clone(), range)?.optimize()?; // Propagate some stats from the original array to the sliced array. if !sliced.is::() { @@ -255,16 +253,12 @@ impl ArrayRef { /// Wraps the array in a [`FilterArray`] such that it is logically filtered by the given mask. pub fn filter(&self, mask: Mask) -> VortexResult { - FilterArray::try_new(self.clone(), mask)? - .into_array() - .optimize() + FilterArray::try_new_parts(self.clone(), mask)?.optimize() } /// Wraps the array in a [`DictArray`] such that it is logically taken by the given indices. pub fn take(&self, indices: ArrayRef) -> VortexResult { - DictArray::try_new(indices, self.clone())? - .into_array() - .optimize() + DictArray::try_new_parts(indices, self.clone())?.optimize() } /// Fetch the scalar at the given index. @@ -400,6 +394,10 @@ impl ArrayRef { } /// Returns the array downcast by the given matcher. + /// + /// Routes through the heap-array entry points (`Matcher::matches` / + /// `Matcher::try_match`) so matchers with a cheap, direct downcast — like the + /// blanket `VTable` matcher — don't pay for a [`ParentRef`] construction here. pub fn as_opt(&self) -> Option> { M::try_match(self) } @@ -441,15 +439,6 @@ impl ArrayRef { nbytes } - /// Returns whether this array is an arrow encoding. - pub fn is_arrow(&self) -> bool { - self.is::() - || self.is::() - || self.is::() - || self.is::() - || self.is::() - } - /// Whether the array is of a canonical encoding. pub fn is_canonical(&self) -> bool { self.is::() @@ -595,7 +584,7 @@ impl ArrayRef { pub fn reduce_parent( &self, - parent: &ArrayRef, + parent: &ParentRef<'_>, child_idx: usize, ) -> VortexResult> { self.0.data.reduce_parent(self, parent, child_idx) @@ -738,13 +727,34 @@ impl IntoArray for ArrayRef { impl Matcher for V { type Match<'a> = ArrayView<'a, V>; + /// Fast encoding-id check that skips [`ParentRef`] construction. The hot + /// `ArrayRef::is::()` path goes through here, so any extra work shows up in + /// downstream micro-benchmarks (`patches_lookup`, `chunk_array_builder`, ...). + #[inline] fn matches(array: &ArrayRef) -> bool { - array.0.data.as_any().is::>() + array.dyn_array().as_any().is::>() + } + + /// Direct downcast — same fast path as [`Matcher::matches`] but also produces + /// the [`ArrayView`] when it matches. + #[inline] + fn try_match(array: &ArrayRef) -> Option> { + array.as_typed::() } - fn try_match(array: &'_ ArrayRef) -> Option> { - let inner = array.0.data.as_any().downcast_ref::>()?; - // # Safety checked by `downcast_ref`. - Some(unsafe { ArrayView::new_unchecked(array, &inner.data) }) + /// Match by encoding id (no materialization). Equivalent to + /// [`Matcher::try_match_parent`].is_some() but avoids constructing an + /// [`ArrayView`] for parents that do not need one. + fn matches_parent(parent: &ParentRef<'_>) -> bool { + parent.is_encoding::() + } + + /// Returns an [`ArrayView`] for the parent if its encoding is `V`. + /// + /// The returned [`ArrayView`] is stack-backed when the parent is stack-backed, + /// so no `Arc>` is allocated until a downstream consumer reaches + /// for [`ArrayView::array`]. + fn try_match_parent<'a>(parent: &'a ParentRef<'_>) -> Option> { + parent.as_opt::() } } diff --git a/vortex-array/src/array/mod.rs b/vortex-array/src/array/mod.rs index 23ad9499766..6b6746ee326 100644 --- a/vortex-array/src/array/mod.rs +++ b/vortex-array/src/array/mod.rs @@ -34,6 +34,9 @@ pub use plugin::*; mod foreign; pub(crate) use foreign::*; +mod parent; +pub use parent::*; + mod typed; pub use typed::*; @@ -60,7 +63,11 @@ pub type ArraySlots = SmallVec<[Option; 4]>; #[doc(hidden)] pub(crate) trait DynArrayData: 'static + private::Sealed + Send + Sync + Debug { /// Returns the array as a reference to a generic [`Any`] trait object. - fn as_any(&self) -> &dyn Any; + /// + /// The `+ Send + Sync` bound is preserved so [`ParentRef`] — which carries + /// this reference as `&dyn Any` to stay type-erased over `V` — stays + /// `Send + Sync` for use across `.await` boundaries. + fn as_any(&self) -> &(dyn Any + Send + Sync); /// Returns the array as a mutable reference to a generic [`Any`] trait object. fn as_any_mut(&mut self) -> &mut dyn Any; @@ -148,7 +155,7 @@ pub(crate) trait DynArrayData: 'static + private::Sealed + Send + Sync + Debug { fn reduce_parent( &self, this: &ArrayRef, - parent: &ArrayRef, + parent: &ParentRef<'_>, child_idx: usize, ) -> VortexResult>; @@ -219,7 +226,7 @@ mod private { /// This is self-contained: identity methods use `ArrayData`'s own fields (dtype, len, stats), /// while data-access methods delegate to VTable methods on the inner `V::TypedArrayData`. impl DynArrayData for ArrayData { - fn as_any(&self) -> &dyn Any { + fn as_any(&self) -> &(dyn Any + Send + Sync) { self } @@ -410,7 +417,7 @@ impl DynArrayData for ArrayData { fn reduce_parent( &self, this: &ArrayRef, - parent: &ArrayRef, + parent: &ParentRef<'_>, child_idx: usize, ) -> VortexResult> { let view = unsafe { ArrayView::new_unchecked(this, &self.data) }; diff --git a/vortex-array/src/array/parent.rs b/vortex-array/src/array/parent.rs new file mode 100644 index 00000000000..06de10b48b3 --- /dev/null +++ b/vortex-array/src/array/parent.rs @@ -0,0 +1,399 @@ +// SPDX-License-Identifier: Apache-2.0 +// SPDX-FileCopyrightText: Copyright the Vortex contributors + +//! Stack-allocatable parent representation used by the `reduce_parent` dispatch chain. +//! +//! [`ParentRef`] either borrows an existing heap-allocated [`ArrayRef`], or borrows +//! stack-allocated construction state. The construction-side optimizer can borrow +//! `ArrayParts` before materializing an `ArrayInner`, so matchers and parent-reduce +//! rules can attempt reduction without first allocating an `Arc>`. +//! +//! Stack-backed parents lazily materialize an `ArrayRef` into an internal [`OnceLock`] +//! when a downstream consumer asks for one through [`ArrayBacking::array_ref`], so +//! [`ParentRef`] can stand in anywhere an [`ArrayView`] is needed. + +use std::any::Any; +use std::fmt::Debug; +use std::fmt::Formatter; +use std::sync::OnceLock; + +use vortex_error::VortexExpect; +use vortex_error::VortexResult; +use vortex_session::SessionExt; +use vortex_session::VortexSession; + +use crate::ArrayRef; +use crate::array::ArrayData; +use crate::array::ArrayId; +use crate::array::ArrayParts; +use crate::array::ArraySlots; +use crate::array::ArrayView; +use crate::array::VTable; +use crate::dtype::DType; +use crate::optimizer::ArrayOptimizer; +use crate::optimizer::kernels::ArrayKernels; + +/// A parent array, possibly stack-allocated, used by the `reduce_parent` dispatch chain. +/// +/// Carries the metadata needed to dispatch parent-reduce rules (encoding id, dtype, +/// length, encoding-specific data, slots) regardless of whether the parent is backed +/// by an existing [`ArrayRef`] or by borrowed [`ArrayParts`]. Stack-backed parents +/// materialize an [`ArrayRef`] into an internal cache on first call to +/// [`ArrayBacking::array_ref`]. +pub struct ParentRef<'a> { + encoding_id: ArrayId, + dtype: &'a DType, + len: usize, + slots: &'a [Option], + data: ParentData<'a>, + /// Lazily-populated materialization slot used by stack-backed parents. + /// Heap-backed parents return their borrowed [`ArrayRef`] directly and never + /// touch this cache. + cache: OnceLock, +} + +/// Type-erased payload for [`ParentRef`]. +/// +/// Carries `&dyn Any` rather than `&V`/`&V::TypedArrayData` so [`ParentRef`] is not +/// itself generic over `V`. The `+ Send + Sync` bound mirrors the bounds on +/// [`VTable`](crate::array::VTable) and `V::TypedArrayData`, keeping [`ParentRef`] +/// and the [`ArrayView`] built on top of it `Send + Sync`. +type AnyRef<'a> = &'a (dyn Any + Send + Sync); + +enum ParentData<'a> { + Heap { + array: &'a ArrayRef, + data: AnyRef<'a>, + }, + Parts { + vtable: AnyRef<'a>, + data: AnyRef<'a>, + materialize: MaterializeFn, + }, +} + +/// Function pointer that materializes stack-borrowed parts into an owned [`ArrayRef`]. +/// +/// The `vtable` and `data` arguments are the borrowed `&V` and `&V::TypedArrayData` +/// previously stashed as `&dyn Any` in [`ParentData::Parts`]. The implementation +/// downcasts them, clones into owned values, and produces an `ArrayRef`. +type MaterializeFn = fn( + vtable: &(dyn Any + Send + Sync), + data: &(dyn Any + Send + Sync), + dtype: &DType, + len: usize, + slots: &[Option], +) -> ArrayRef; + +impl<'a> ParentRef<'a> { + /// Build a [`ParentRef`] borrowing a heap-allocated [`ArrayRef`]. + #[inline] + pub fn from_array_ref(array: &'a ArrayRef) -> Self { + let inner = array.inner(); + Self { + encoding_id: inner.encoding_id, + dtype: &inner.dtype, + len: inner.len, + slots: &inner.slots, + data: ParentData::Heap { + array, + data: inner.data.as_any(), + }, + cache: OnceLock::new(), + } + } + + /// Build a [`ParentRef`] borrowing construction parts before materialization. + /// + /// The returned [`ParentRef`] owns the cache slot for the lazily materialized + /// [`ArrayRef`], so callers don't need to thread an external scratch through. + #[inline] + pub(crate) fn from_parts(parts: &'a ArrayParts) -> Self { + Self { + encoding_id: parts.vtable.id(), + dtype: &parts.dtype, + len: parts.len, + slots: &parts.slots, + data: ParentData::Parts { + vtable: &parts.vtable, + data: &parts.data, + materialize: materialize_parts::, + }, + cache: OnceLock::new(), + } + } + + /// Optimize this parent, materializing the parts if no stack reduction fires. + /// + /// Tries `reduce_parent` on each child slot first, which can match against the + /// stack-borrowed parent without ever allocating an `Arc>`. If + /// nothing matches, the parent is materialized into a real [`ArrayRef`] and the + /// full optimizer is run on it so legacy rules whose matchers still require an + /// [`ArrayView`] also get a chance to fire. + pub fn optimize(self) -> VortexResult { + match self.try_reduce_parent(None)? { + Some(reduced) => Ok(reduced), + None => self.into_array_ref().optimize(), + } + } + + /// Same as [`Self::optimize`] but also consults [`ArrayKernels`] from `session`. + pub fn optimize_ctx(self, session: &VortexSession) -> VortexResult { + match self.try_reduce_parent(Some(session))? { + Some(reduced) => Ok(reduced), + None => self.into_array_ref().optimize_ctx(session), + } + } + + fn try_reduce_parent(&self, session: Option<&VortexSession>) -> VortexResult> { + let kernels = session.and_then(|s| s.get_opt::()); + + for (slot_idx, slot) in self.slots.iter().enumerate() { + let Some(child) = slot else { continue }; + + // Session kernels take precedence over static `PARENT_RULES`, matching + // the existing optimizer's ordering. + if let Some(kernels) = &kernels + && let Some(plugins) = + kernels.find_reduce_parent(self.encoding_id, child.encoding_id()) + { + for plugin in plugins.as_ref() { + if let Some(reduced) = plugin(child, self, slot_idx)? { + return cascade(reduced, session).map(Some); + } + } + } + + if let Some(reduced) = child.reduce_parent(self, slot_idx)? { + return cascade(reduced, session).map(Some); + } + } + + Ok(None) + } + + /// Returns the encoding id of the parent. + #[inline] + pub fn encoding_id(&self) -> ArrayId { + self.encoding_id + } + + /// Returns the dtype of the parent. + #[inline] + pub fn dtype(&self) -> &'a DType { + self.dtype + } + + /// Returns the length of the parent. + #[inline] + pub fn len(&self) -> usize { + self.len + } + + /// Returns whether the parent is empty. + #[inline] + pub fn is_empty(&self) -> bool { + self.len == 0 + } + + /// Returns the slots of the parent. + #[inline] + pub fn slots(&self) -> &'a [Option] { + self.slots + } + + /// Consume this `ParentRef` and return an owned [`ArrayRef`]. + /// + /// Cheap for heap-backed parents (clones the existing `Arc`); for stack-backed + /// parents this materializes the borrowed parts into a fresh + /// `Arc>`, reusing the cached materialization if one was already + /// produced by [`ArrayBacking::array_ref`]. + pub fn into_array_ref(self) -> ArrayRef { + if let Some(cached) = self.cache.into_inner() { + return cached; + } + match self.data { + ParentData::Heap { array, .. } => array.clone(), + ParentData::Parts { + vtable, + data, + materialize, + } => materialize(vtable, data, self.dtype, self.len, self.slots), + } + } + + /// Returns `true` if this parent's encoding matches `V`. + /// + /// Cheap encoding-id check that works for both heap- and stack-backed parents + /// without forcing materialization. + #[inline] + pub(crate) fn is_encoding(&self) -> bool { + match self.data { + ParentData::Heap { data, .. } => data.is::>(), + ParentData::Parts { vtable, .. } => return vtable.is::(), + } + } + + #[inline] + pub(crate) fn typed_data(&self) -> Option<&'a V::TypedArrayData> { + match self.data { + ParentData::Heap { data, .. } => data + .downcast_ref::>() + .map(|array_data| &array_data.data), + ParentData::Parts { data, .. } => data.downcast_ref::(), + } + } + + /// Try to extract an [`ArrayView`] for the parent's encoding `V`. + /// + /// Returns `None` if the parent's encoding is not `V`. The returned view is + /// stack-backed when the parent is stack-backed — no materialization happens + /// up front. Materialization is deferred to [`ArrayView::array`], which goes + /// through [`ArrayBacking::array_ref`] on the parent's internal cache. + pub fn as_opt(&self) -> Option> { + let data = self.typed_data::()?; + // SAFETY: `typed_data::()` returned Some, so the parent's encoding is + // `V` and `data` is the `V::TypedArrayData` reachable through `self`. + Some(unsafe { ArrayView::new_from_parent(self, data) }) + } +} + +impl Debug for ParentRef<'_> { + fn fmt(&self, f: &mut Formatter<'_>) -> std::fmt::Result { + let heap_backed = matches!(self.data, ParentData::Heap { .. }); + f.debug_struct("ParentRef") + .field("encoding", &self.encoding_id()) + .field("dtype", self.dtype()) + .field("len", &self.len()) + .field("heap_backed", &heap_backed) + .finish() + } +} + +impl<'a> From<&'a ArrayRef> for ParentRef<'a> { + fn from(array: &'a ArrayRef) -> Self { + Self::from_array_ref(array) + } +} + +/// Trait providing an [`ArrayRef`] view of an array-like backing, materializing on +/// demand for stack-allocated parents. +/// +/// Implemented for [`ArrayRef`] (returns `self`) and [`ParentRef`] (returns the +/// borrowed `ArrayRef` for heap parents or the lazily-materialized cache for stack +/// parents). [`ArrayView`] stores `&dyn ArrayBacking` so its hot accessors stay +/// branch-free while the cold [`ArrayView::array`] path can still produce an +/// `ArrayRef` whichever way the view was constructed. +pub trait ArrayBacking: Send + Sync { + /// Returns an [`ArrayRef`] borrowing the array-like data. + /// + /// For heap-backed views this is a cheap reference return. For stack-backed + /// views this triggers materialization on first call, caching the result in + /// the parent so subsequent calls reuse it. + fn array_ref(&self) -> &ArrayRef; +} + +impl ArrayBacking for ArrayRef { + #[inline] + fn array_ref(&self) -> &ArrayRef { + self + } +} + +impl ArrayBacking for ParentRef<'_> { + #[inline] + fn array_ref(&self) -> &ArrayRef { + match self.data { + ParentData::Heap { array, .. } => array, + ParentData::Parts { + vtable, + data, + materialize, + } => self + .cache + .get_or_init(|| materialize(vtable, data, self.dtype, self.len, self.slots)), + } + } +} + +#[inline] +fn cascade(reduced: ArrayRef, session: Option<&VortexSession>) -> VortexResult { + match session { + Some(s) => reduced.optimize_ctx(s), + None => reduced.optimize(), + } +} + +/// Materializes stack-borrowed parts of encoding `V` into an owned [`ArrayRef`]. +/// +/// Used as the function pointer stored inside [`ParentData::Parts`]. The +/// `vtable`/`data` arguments are `&V` and `&V::TypedArrayData` erased to `&dyn Any`; +/// they are downcast and cloned into a fresh `ArrayParts` which is then turned +/// into an `ArrayRef`. Validation is skipped: stack-borrowed parts were validated +/// when the originating `ArrayParts` was constructed. +fn materialize_parts( + vtable: &(dyn Any + Send + Sync), + data: &(dyn Any + Send + Sync), + dtype: &DType, + len: usize, + slots: &[Option], +) -> ArrayRef { + let vtable = vtable + .downcast_ref::() + .vortex_expect("ParentRef materialize: vtable type mismatch"); + let data = data + .downcast_ref::() + .vortex_expect("ParentRef materialize: data type mismatch"); + let slots: ArraySlots = slots.iter().cloned().collect(); + ArrayParts::new(vtable.clone(), dtype.clone(), len, data.clone()) + .with_slots(slots) + .into_array() +} + +#[cfg(test)] +mod tests { + use vortex_error::VortexResult; + + use super::ParentRef; + use crate::IntoArray; + use crate::arrays::BoolArray; + use crate::arrays::Slice; + use crate::arrays::SliceArray; + + #[test] + fn parts_parent_ref_exposes_array_view() -> VortexResult<()> { + let child = BoolArray::from_iter([true, false, true]).into_array(); + let parts = SliceArray::try_new_parts(child, 1..3)?; + let parent = ParentRef::from_parts(&parts); + + let view = parent + .as_opt::() + .expect("Slice parts should match a Slice array view"); + + assert_eq!(view.slice_range(), &(1..3)); + assert_eq!(view.len(), 2); + + Ok(()) + } + + #[test] + fn parts_parent_ref_array_method_materializes() -> VortexResult<()> { + let child = BoolArray::from_iter([true, false, true]).into_array(); + let parts = SliceArray::try_new_parts(child, 1..3)?; + let parent = ParentRef::from_parts(&parts); + + let view = parent + .as_opt::() + .expect("Slice parts should match a Slice array view"); + + // Reading metadata through the view does NOT force materialization. + assert_eq!(view.slice_range(), &(1..3)); + assert_eq!(view.len(), 2); + + // But calling array() DOES materialize. + let array_ref = view.array(); + assert_eq!(array_ref.len(), 2); + + Ok(()) + } +} diff --git a/vortex-array/src/array/typed.rs b/vortex-array/src/array/typed.rs index 180f771bc9c..98605542779 100644 --- a/vortex-array/src/array/typed.rs +++ b/vortex-array/src/array/typed.rs @@ -13,6 +13,7 @@ use std::ops::DerefMut; use std::sync::Arc; use vortex_error::VortexResult; +use vortex_session::VortexSession; use crate::ArrayRef; use crate::ArraySlots; @@ -22,6 +23,7 @@ use crate::LEGACY_SESSION; use crate::VortexSessionExecute; use crate::array::ArrayId; use crate::array::ArrayView; +use crate::array::ParentRef; use crate::array::VTable; use crate::dtype::DType; use crate::stats::ArrayStats; @@ -70,6 +72,34 @@ impl ArrayParts { self.slots = slots; self } + + /// Materialize already-valid parts into an [`ArrayRef`] without attempting reduction. + /// + /// This intentionally skips vtable validation. Use + /// `Array::::try_from_parts(parts)?.into_array()` when constructing parts from unchecked + /// inputs. + pub fn into_array(self) -> ArrayRef { + unsafe { Array::::from_parts_unchecked(self).into_array() } + } + + /// Optimize the parts directly, without ever requiring an [`ArrayRef`]. + /// + /// Builds a [`ParentRef`] borrowing `self` and runs the parent-reduce dispatch + /// chain on it. If a child rule fires before materialization the wrapper + /// allocation is skipped entirely; otherwise the parts are materialized and run + /// through the full optimizer. + /// + /// This is the entry point that lets construction-side callers hand a + /// stack-allocated array over to the optimizer without first paying for + /// `Arc>`. + pub fn optimize(self) -> VortexResult { + ParentRef::from_parts(&self).optimize() + } + + /// Same as [`Self::optimize`] but also consults session-registered kernels. + pub fn optimize_ctx(self, session: &VortexSession) -> VortexResult { + ParentRef::from_parts(&self).optimize_ctx(session) + } } /// Shared bound for helpers that should work over both owned [`Array`] and borrowed diff --git a/vortex-array/src/array/view.rs b/vortex-array/src/array/view.rs index 969fd0484a3..ff869180d35 100644 --- a/vortex-array/src/array/view.rs +++ b/vortex-array/src/array/view.rs @@ -9,16 +9,33 @@ use vortex_error::VortexResult; use crate::ArrayRef; use crate::array::Array; +use crate::array::ArrayBacking; use crate::array::ArrayId; +use crate::array::ParentRef; use crate::array::VTable; use crate::dtype::DType; use crate::stats::StatsSetRef; use crate::validity::Validity; -/// A lightweight, `Copy`-able typed view into an [`ArrayRef`]. +/// A lightweight, `Copy`-able typed view of an array. +/// +/// The view can be either *heap-backed* (sourced from an existing [`ArrayRef`]) or +/// *stack-backed* (sourced from borrowed [`ArrayParts`](crate::array::ArrayParts) +/// reachable through a [`ParentRef`]). Either way the hot accessors (`dtype`, `len`, +/// `slots`, `encoding_id`, `data`) are direct field reads — the cached metadata is +/// flattened into the view at construction. +/// +/// The cold [`Self::array`] path is the only place the heap/stack split surfaces; +/// it goes through a [`ArrayBacking`] trait object so stack-backed views can defer +/// materializing an `Arc>` until a downstream consumer actually needs +/// an [`ArrayRef`]. pub struct ArrayView<'a, V: VTable> { - array: &'a ArrayRef, data: &'a V::TypedArrayData, + dtype: &'a DType, + len: usize, + slots: &'a [Option], + encoding_id: ArrayId, + backing: &'a dyn ArrayBacking, } impl Copy for ArrayView<'_, V> {} @@ -30,58 +47,105 @@ impl Clone for ArrayView<'_, V> { } impl<'a, V: VTable> ArrayView<'a, V> { + /// Construct a heap-backed view. + /// /// # Safety /// Caller must ensure `data` is the `V::TypedArrayData` stored inside `array`. pub(crate) unsafe fn new_unchecked(array: &'a ArrayRef, data: &'a V::TypedArrayData) -> Self { debug_assert!(array.is::()); - Self { array, data } + let inner = array.inner(); + Self { + data, + dtype: &inner.dtype, + len: inner.len, + slots: &inner.slots, + encoding_id: inner.encoding_id, + backing: array, + } } + /// Construct a stack-backed view borrowing parts through `parent`. + /// + /// # Safety + /// Caller must ensure `parent.is_encoding::()` and that `data` is the + /// `V::TypedArrayData` borrowed inside `parent`. + pub(crate) unsafe fn new_from_parent( + parent: &'a ParentRef<'a>, + data: &'a V::TypedArrayData, + ) -> Self { + debug_assert!(parent.is_encoding::()); + Self { + data, + dtype: parent.dtype(), + len: parent.len(), + slots: parent.slots(), + encoding_id: parent.encoding_id(), + backing: parent, + } + } + + /// Returns the underlying [`ArrayRef`], materializing stack parts on first call. + /// + /// For heap-backed views this is a cheap reference return. Stack-backed views + /// build an `Arc>` and cache it on the parent. + #[inline] pub fn array(&self) -> &'a ArrayRef { - self.array + self.backing.array_ref() } + #[inline] pub fn data(&self) -> &'a V::TypedArrayData { self.data } + #[inline] pub fn slots(&self) -> &'a [Option] { - self.array.slots() + self.slots } - pub fn dtype(&self) -> &DType { - self.array.dtype() + #[inline] + pub fn dtype(&self) -> &'a DType { + self.dtype } + #[inline] pub fn len(&self) -> usize { - self.array.len() + self.len } + #[inline] pub fn is_empty(&self) -> bool { - self.array.len() == 0 + self.len == 0 } + #[inline] pub fn encoding_id(&self) -> ArrayId { - self.array.encoding_id() + self.encoding_id } + /// Returns the array's statistics. Forces stack-backed views to materialize. pub fn statistics(&self) -> StatsSetRef<'_> { - self.array.statistics() + self.array().statistics() } + /// Returns the array's validity. Forces stack-backed views to materialize. pub fn validity(&self) -> VortexResult { - self.array.validity() + self.array().validity() } + /// Returns an owned typed handle. Forces stack-backed views to materialize. pub fn into_owned(self) -> Array { // SAFETY: we are ourselves type checked as 'V' - unsafe { Array::::from_array_ref_unchecked(self.array.clone()) } + unsafe { Array::::from_array_ref_unchecked(self.array().clone()) } } } impl AsRef for ArrayView<'_, V> { fn as_ref(&self) -> &ArrayRef { - self.array + // For heap-backed views this returns the borrowed `ArrayRef` directly. For + // stack-backed views, materialization runs once and the cached `ArrayRef` + // lives as long as the parent. + self.array() } } @@ -96,9 +160,9 @@ impl Deref for ArrayView<'_, V> { impl Debug for ArrayView<'_, V> { fn fmt(&self, f: &mut Formatter<'_>) -> std::fmt::Result { f.debug_struct("ArrayView") - .field("encoding", &self.array.encoding_id()) - .field("dtype", self.array.dtype()) - .field("len", &self.array.len()) + .field("encoding", &self.encoding_id()) + .field("dtype", self.dtype()) + .field("len", &self.len()) .finish() } } diff --git a/vortex-array/src/array/vtable/mod.rs b/vortex-array/src/array/vtable/mod.rs index 6c85bb79d05..a62fd2bfe62 100644 --- a/vortex-array/src/array/vtable/mod.rs +++ b/vortex-array/src/array/vtable/mod.rs @@ -25,6 +25,7 @@ use crate::Canonical; use crate::ExecutionResult; use crate::IntoArray; use crate::Precision; +use crate::array::ParentRef; pub use crate::array::plugin::*; use crate::arrays::ConstantArray; use crate::arrays::constant::Constant; @@ -205,7 +206,7 @@ pub trait VTable: 'static + Clone + Sized + Send + Sync + Debug { /// Attempt to perform a reduction of the parent of this array. fn reduce_parent( array: ArrayView<'_, Self>, - parent: &ArrayRef, + parent: &ParentRef<'_>, child_idx: usize, ) -> VortexResult> { _ = (array, parent, child_idx); diff --git a/vortex-array/src/arrays/bool/vtable/mod.rs b/vortex-array/src/arrays/bool/vtable/mod.rs index 35261fa8d89..9cfa80ad4ca 100644 --- a/vortex-array/src/arrays/bool/vtable/mod.rs +++ b/vortex-array/src/arrays/bool/vtable/mod.rs @@ -19,6 +19,7 @@ use crate::ExecutionResult; use crate::array::Array; use crate::array::ArrayParts; use crate::array::ArrayView; +use crate::array::ParentRef; use crate::array::VTable; use crate::array::child_to_validity; use crate::arrays::bool::BoolData; @@ -184,7 +185,7 @@ impl VTable for Bool { fn reduce_parent( array: ArrayView<'_, Self>, - parent: &ArrayRef, + parent: &ParentRef<'_>, child_idx: usize, ) -> VortexResult> { RULES.evaluate(array, parent, child_idx) diff --git a/vortex-array/src/arrays/chunked/vtable/mod.rs b/vortex-array/src/arrays/chunked/vtable/mod.rs index 68679a4c3b5..e8068385ffa 100644 --- a/vortex-array/src/arrays/chunked/vtable/mod.rs +++ b/vortex-array/src/arrays/chunked/vtable/mod.rs @@ -28,6 +28,7 @@ use crate::array::Array; use crate::array::ArrayId; use crate::array::ArrayParts; use crate::array::ArrayView; +use crate::array::ParentRef; use crate::array::VTable; use crate::arrays::chunked::ChunkedArrayExt; use crate::arrays::chunked::ChunkedData; @@ -72,6 +73,7 @@ impl VTable for Chunked { type OperationsVTable = Self; type ValidityVTable = Self; + fn id(&self) -> ArrayId { static ID: CachedId = CachedId::new("vortex.chunked"); *ID @@ -282,7 +284,7 @@ impl VTable for Chunked { fn reduce_parent( array: ArrayView<'_, Self>, - parent: &ArrayRef, + parent: &ParentRef<'_>, child_idx: usize, ) -> VortexResult> { PARENT_RULES.evaluate(array, parent, child_idx) diff --git a/vortex-array/src/arrays/constant/vtable/mod.rs b/vortex-array/src/arrays/constant/vtable/mod.rs index c130c28a95a..5ec1e3d9ef4 100644 --- a/vortex-array/src/arrays/constant/vtable/mod.rs +++ b/vortex-array/src/arrays/constant/vtable/mod.rs @@ -23,6 +23,7 @@ use crate::Precision; use crate::array::Array; use crate::array::ArrayId; use crate::array::ArrayView; +use crate::array::ParentRef; use crate::array::VTable; use crate::arrays::constant::ConstantData; use crate::arrays::constant::compute::rules::PARENT_RULES; @@ -154,7 +155,7 @@ impl VTable for Constant { fn reduce_parent( array: ArrayView<'_, Self>, - parent: &ArrayRef, + parent: &ParentRef<'_>, child_idx: usize, ) -> VortexResult> { PARENT_RULES.evaluate(array, parent, child_idx) diff --git a/vortex-array/src/arrays/decimal/vtable/mod.rs b/vortex-array/src/arrays/decimal/vtable/mod.rs index dac24ecd95f..2351137b63a 100644 --- a/vortex-array/src/arrays/decimal/vtable/mod.rs +++ b/vortex-array/src/arrays/decimal/vtable/mod.rs @@ -17,6 +17,7 @@ use crate::ExecutionCtx; use crate::ExecutionResult; use crate::array::Array; use crate::array::ArrayView; +use crate::array::ParentRef; use crate::array::VTable; use crate::arrays::decimal::DecimalData; use crate::buffer::BufferHandle; @@ -187,7 +188,7 @@ impl VTable for Decimal { fn reduce_parent( array: ArrayView<'_, Self>, - parent: &ArrayRef, + parent: &ParentRef<'_>, child_idx: usize, ) -> VortexResult> { RULES.evaluate(array, parent, child_idx) diff --git a/vortex-array/src/arrays/dict/array.rs b/vortex-array/src/arrays/dict/array.rs index 10108191744..291aa549d91 100644 --- a/vortex-array/src/arrays/dict/array.rs +++ b/vortex-array/src/arrays/dict/array.rs @@ -233,12 +233,19 @@ impl Array { /// Build a new `DictArray` from its components, `codes` and `values`. pub fn try_new(codes: ArrayRef, values: ArrayRef) -> VortexResult { + Array::try_from_parts(Self::try_new_parts(codes, values)?) + } + + /// Build the [`ArrayParts`]. The parts can then be optimized through + /// [`ParentRef::optimize`](crate::array::ParentRef::optimize) or materialized + /// directly with [`ArrayParts::into_array`]. + pub fn try_new_parts(codes: ArrayRef, values: ArrayRef) -> VortexResult> { let dtype = values .dtype() .union_nullability(codes.dtype().nullability()); let len = codes.len(); let data = DictData::try_new(codes.dtype())?; - Array::try_from_parts( + Ok( ArrayParts::new(Dict, dtype, len, data) .with_slots(smallvec![Some(codes), Some(values)]), ) @@ -293,6 +300,8 @@ impl Array { #[cfg(test)] mod test { + use std::sync::LazyLock; + use rand::RngExt; use rand::SeedableRng; use rand::distr::Distribution; @@ -304,12 +313,10 @@ mod test { use vortex_error::VortexResult; use vortex_error::vortex_panic; use vortex_mask::AllOr; + use vortex_session::VortexSession; use crate::ArrayRef; use crate::IntoArray; - use crate::LEGACY_SESSION; - #[expect(deprecated)] - use crate::ToCanonical as _; use crate::VortexSessionExecute; use crate::arrays::ChunkedArray; use crate::arrays::DictArray; @@ -321,8 +328,12 @@ mod test { use crate::dtype::Nullability::NonNullable; use crate::dtype::PType; use crate::dtype::UnsignedPType; + use crate::session::ArraySession; use crate::validity::Validity; + static SESSION: LazyLock = + LazyLock::new(|| VortexSession::empty().with::()); + #[test] fn nullable_codes_validity() { let dict = DictArray::try_new( @@ -338,10 +349,7 @@ mod test { .as_ref() .validity() .unwrap() - .execute_mask( - dict.as_ref().len(), - &mut LEGACY_SESSION.create_execution_ctx(), - ) + .execute_mask(dict.as_ref().len(), &mut SESSION.create_execution_ctx()) .unwrap(); let AllOr::Some(indices) = mask.indices() else { vortex_panic!("Expected indices from mask") @@ -364,10 +372,7 @@ mod test { .as_ref() .validity() .unwrap() - .execute_mask( - dict.as_ref().len(), - &mut LEGACY_SESSION.create_execution_ctx(), - ) + .execute_mask(dict.as_ref().len(), &mut SESSION.create_execution_ctx()) .unwrap(); let AllOr::Some(indices) = mask.indices() else { vortex_panic!("Expected indices from mask") @@ -394,10 +399,7 @@ mod test { .as_ref() .validity() .unwrap() - .execute_mask( - dict.as_ref().len(), - &mut LEGACY_SESSION.create_execution_ctx(), - ) + .execute_mask(dict.as_ref().len(), &mut SESSION.create_execution_ctx()) .unwrap(); let AllOr::Some(indices) = mask.indices() else { vortex_panic!("Expected indices from mask") @@ -420,10 +422,7 @@ mod test { .as_ref() .validity() .unwrap() - .execute_mask( - dict.as_ref().len(), - &mut LEGACY_SESSION.create_execution_ctx(), - ) + .execute_mask(dict.as_ref().len(), &mut SESSION.create_execution_ctx()) .unwrap(); let AllOr::Some(indices) = mask.indices() else { vortex_panic!("Expected indices from mask") @@ -470,10 +469,9 @@ mod test { &DType::Primitive(PType::U64, NonNullable), len * chunk_count, ); - array.append_to_builder(builder.as_mut(), &mut LEGACY_SESSION.create_execution_ctx())?; + array.append_to_builder(builder.as_mut(), &mut SESSION.create_execution_ctx())?; - #[expect(deprecated)] - let into_prim = array.to_primitive(); + let into_prim = array.execute::(&mut SESSION.create_execution_ctx())?; let prim_into = builder.finish_into_canonical().into_primitive(); assert_arrays_eq!(into_prim, prim_into); diff --git a/vortex-array/src/arrays/dict/take.rs b/vortex-array/src/arrays/dict/take.rs index b77bef19b39..10b2533fb04 100644 --- a/vortex-array/src/arrays/dict/take.rs +++ b/vortex-array/src/arrays/dict/take.rs @@ -10,9 +10,11 @@ use crate::Canonical; use crate::ExecutionCtx; use crate::IntoArray; use crate::array::ArrayView; +use crate::array::ParentRef; use crate::array::VTable; use crate::arrays::ConstantArray; use crate::arrays::dict::DictArraySlotsExt; +use crate::arrays::dict::DictSlotsView; use crate::expr::stats::Precision; use crate::expr::stats::Stat; use crate::expr::stats::StatsProvider; @@ -106,6 +108,30 @@ where } Ok(result) } + + fn reduce_parent_ref( + &self, + array: ArrayView<'_, V>, + parent: &ParentRef<'_>, + child_idx: usize, + ) -> VortexResult> { + // Only handle the values child (index 1), not the codes child (index 0). + if child_idx != 1 { + return Ok(None); + } + let Some(parent) = parent.as_opt::() else { + return Ok(None); + }; + let codes = DictSlotsView::from_slots(parent.slots()).codes; + if let Some(result) = precondition::(array, codes) { + return Ok(Some(result)); + } + let result = ::take(array, codes)?; + if let Some(taken) = &result { + propagate_take_stats(array.array(), taken, codes)?; + } + Ok(result) + } } #[derive(Default, Debug)] @@ -170,3 +196,28 @@ pub(crate) fn propagate_take_stats( ) }) } + +#[cfg(test)] +mod tests { + use vortex_error::VortexResult; + + use crate::IntoArray; + use crate::ParentRef; + use crate::arrays::Constant; + use crate::arrays::ConstantArray; + use crate::arrays::DictArray; + use crate::arrays::PrimitiveArray; + + #[test] + fn reduce_adaptor_handles_stack_backed_dict_parent() -> VortexResult<()> { + let indices = PrimitiveArray::from_iter([0u32, 0, 0]).into_array(); + let values = ConstantArray::new(7i32, 1).into_array(); + let parts = DictArray::try_new_parts(indices, values)?; + + let reduced = ParentRef::from_parts(&parts).optimize()?; + + assert!(reduced.is::()); + assert_eq!(reduced.len(), 3); + Ok(()) + } +} diff --git a/vortex-array/src/arrays/dict/vtable/mod.rs b/vortex-array/src/arrays/dict/vtable/mod.rs index 33db223de72..8ed45c501c3 100644 --- a/vortex-array/src/arrays/dict/vtable/mod.rs +++ b/vortex-array/src/arrays/dict/vtable/mod.rs @@ -30,6 +30,7 @@ use crate::array::Array; use crate::array::ArrayId; use crate::array::ArrayParts; use crate::array::ArrayView; +use crate::array::ParentRef; use crate::array::VTable; use crate::arrays::ConstantArray; use crate::arrays::Primitive; @@ -199,7 +200,7 @@ impl VTable for Dict { fn reduce_parent( array: ArrayView<'_, Self>, - parent: &ArrayRef, + parent: &ParentRef<'_>, child_idx: usize, ) -> VortexResult> { PARENT_RULES.evaluate(array, parent, child_idx) diff --git a/vortex-array/src/arrays/extension/vtable/mod.rs b/vortex-array/src/arrays/extension/vtable/mod.rs index 852593abddb..f8fbf547687 100644 --- a/vortex-array/src/arrays/extension/vtable/mod.rs +++ b/vortex-array/src/arrays/extension/vtable/mod.rs @@ -20,6 +20,7 @@ use crate::array::Array; use crate::array::ArrayId; use crate::array::ArrayParts; use crate::array::ArrayView; +use crate::array::ParentRef; use crate::array::VTable; use crate::array::ValidityVTableFromChild; use crate::arrays::extension::array::SLOT_NAMES; @@ -191,7 +192,7 @@ impl VTable for Extension { fn reduce_parent( array: ArrayView<'_, Self>, - parent: &ArrayRef, + parent: &ParentRef<'_>, child_idx: usize, ) -> VortexResult> { PARENT_RULES.evaluate(array, parent, child_idx) diff --git a/vortex-array/src/arrays/filter/array.rs b/vortex-array/src/arrays/filter/array.rs index dd811ad24a2..53783c2bac1 100644 --- a/vortex-array/src/arrays/filter/array.rs +++ b/vortex-array/src/arrays/filter/array.rs @@ -56,7 +56,7 @@ impl FilterData { Self { mask } } - fn try_new(array_len: usize, mask: Mask) -> VortexResult { + pub fn try_new(array_len: usize, mask: Mask) -> VortexResult { vortex_ensure_eq!( array_len, mask.len(), @@ -91,25 +91,30 @@ impl FilterData { impl Array { /// Creates a new `FilterArray`. pub fn new(array: ArrayRef, mask: Mask) -> Self { - let dtype = array.dtype().clone(); - let len = mask.true_count(); - let data = FilterData::new(mask); - unsafe { - Array::from_parts_unchecked( - ArrayParts::new(Filter, dtype, len, data).with_slots(smallvec![Some(array)]), - ) - } + unsafe { Array::from_parts_unchecked(Self::new_parts(array, mask)) } } /// Constructs a new `FilterArray`. pub fn try_new(array: ArrayRef, mask: Mask) -> VortexResult { + Ok(unsafe { Array::from_parts_unchecked(Self::try_new_parts(array, mask)?) }) + } + + /// Builds the [`ArrayParts`]. The parts can then be optimized through + /// [`ParentRef::optimize`](crate::array::ParentRef::optimize) or materialized + /// directly with [`ArrayParts::into_array`]. + pub fn try_new_parts(array: ArrayRef, mask: Mask) -> VortexResult> { let dtype = array.dtype().clone(); let len = mask.true_count(); let data = FilterData::try_new(array.len(), mask)?; - Ok(unsafe { - Array::from_parts_unchecked( - ArrayParts::new(Filter, dtype, len, data).with_slots(smallvec![Some(array)]), - ) - }) + Ok(ArrayParts::new(Filter, dtype, len, data).with_slots(smallvec![Some(array)])) + } + + /// Builds the [`ArrayParts`] without checking that the mask length matches + /// the array length. See [`Self::try_new_parts`] for the checked variant. + pub fn new_parts(array: ArrayRef, mask: Mask) -> ArrayParts { + let dtype = array.dtype().clone(); + let len = mask.true_count(); + let data = FilterData::new(mask); + ArrayParts::new(Filter, dtype, len, data).with_slots(smallvec![Some(array)]) } } diff --git a/vortex-array/src/arrays/filter/kernel.rs b/vortex-array/src/arrays/filter/kernel.rs index 8c65fa99724..524bdaa780f 100644 --- a/vortex-array/src/arrays/filter/kernel.rs +++ b/vortex-array/src/arrays/filter/kernel.rs @@ -16,6 +16,7 @@ use crate::Canonical; use crate::ExecutionCtx; use crate::IntoArray; use crate::array::ArrayView; +use crate::array::ParentRef; use crate::array::VTable; use crate::arrays::Filter; use crate::arrays::dict::TakeExecuteAdaptor; @@ -104,6 +105,22 @@ where } ::filter(array, parent.filter_mask()) } + + fn reduce_parent_ref( + &self, + array: ArrayView<'_, V>, + parent: &ParentRef<'_>, + child_idx: usize, + ) -> VortexResult> { + assert_eq!(child_idx, 0); + let Some(parent) = parent.as_opt::() else { + return Ok(None); + }; + if let Some(result) = precondition::(array, parent.filter_mask()) { + return Ok(Some(result)); + } + ::filter(array, parent.filter_mask()) + } } /// Adaptor that wraps a [`FilterKernel`] impl as an [`ExecuteParentKernel`]. @@ -130,3 +147,27 @@ where ::filter(array, parent.filter_mask(), ctx) } } + +#[cfg(test)] +mod tests { + use vortex_error::VortexResult; + use vortex_mask::Mask; + + use crate::IntoArray; + use crate::ParentRef; + use crate::arrays::Constant; + use crate::arrays::ConstantArray; + use crate::arrays::FilterArray; + + #[test] + fn reduce_adaptor_handles_stack_backed_filter_parent() -> VortexResult<()> { + let child = ConstantArray::new(7i32, 4).into_array(); + let parts = FilterArray::try_new_parts(child, Mask::from_iter([true, false, true, false]))?; + + let reduced = ParentRef::from_parts(&parts).optimize()?; + + assert!(reduced.is::()); + assert_eq!(reduced.len(), 2); + Ok(()) + } +} diff --git a/vortex-array/src/arrays/filter/vtable.rs b/vortex-array/src/arrays/filter/vtable.rs index 2361c541997..e9b1a69907c 100644 --- a/vortex-array/src/arrays/filter/vtable.rs +++ b/vortex-array/src/arrays/filter/vtable.rs @@ -24,6 +24,7 @@ use crate::array::Array; use crate::array::ArrayId; use crate::array::ArrayView; use crate::array::OperationsVTable; +use crate::array::ParentRef; use crate::array::VTable; use crate::array::ValidityVTable; use crate::arrays::filter::FilterArrayExt; @@ -66,6 +67,7 @@ impl VTable for Filter { type TypedArrayData = FilterData; type OperationsVTable = Self; type ValidityVTable = Self; + fn id(&self) -> ArrayId { static ID: CachedId = CachedId::new("vortex.filter"); *ID @@ -165,7 +167,7 @@ impl VTable for Filter { fn reduce_parent( array: ArrayView<'_, Self>, - parent: &ArrayRef, + parent: &ParentRef<'_>, child_idx: usize, ) -> VortexResult> { PARENT_RULES.evaluate(array, parent, child_idx) diff --git a/vortex-array/src/arrays/fixed_size_list/vtable/mod.rs b/vortex-array/src/arrays/fixed_size_list/vtable/mod.rs index ca38f3b777b..cc0e4215634 100644 --- a/vortex-array/src/arrays/fixed_size_list/vtable/mod.rs +++ b/vortex-array/src/arrays/fixed_size_list/vtable/mod.rs @@ -22,6 +22,7 @@ use crate::Precision; use crate::array::Array; use crate::array::ArrayId; use crate::array::ArrayView; +use crate::array::ParentRef; use crate::array::VTable; use crate::arrays::fixed_size_list::FixedSizeListData; use crate::arrays::fixed_size_list::array::ELEMENTS_SLOT; @@ -61,6 +62,7 @@ impl VTable for FixedSizeList { type OperationsVTable = Self; type ValidityVTable = Self; + fn id(&self) -> ArrayId { static ID: CachedId = CachedId::new("vortex.fixed_size_list"); *ID @@ -80,7 +82,7 @@ impl VTable for FixedSizeList { fn reduce_parent( array: ArrayView<'_, Self>, - parent: &ArrayRef, + parent: &ParentRef<'_>, child_idx: usize, ) -> VortexResult> { PARENT_RULES.evaluate(array, parent, child_idx) diff --git a/vortex-array/src/arrays/list/vtable/mod.rs b/vortex-array/src/arrays/list/vtable/mod.rs index 1b404e9edbc..02be610e173 100644 --- a/vortex-array/src/arrays/list/vtable/mod.rs +++ b/vortex-array/src/arrays/list/vtable/mod.rs @@ -24,6 +24,7 @@ use crate::array::Array; use crate::array::ArrayId; use crate::array::ArrayParts; use crate::array::ArrayView; +use crate::array::ParentRef; use crate::array::VTable; use crate::arrays::list::ListArrayExt; use crate::arrays::list::ListData; @@ -68,6 +69,7 @@ impl VTable for List { type OperationsVTable = Self; type ValidityVTable = Self; + fn id(&self) -> ArrayId { static ID: CachedId = CachedId::new("vortex.list"); *ID @@ -87,7 +89,7 @@ impl VTable for List { fn reduce_parent( array: ArrayView<'_, Self>, - parent: &ArrayRef, + parent: &ParentRef<'_>, child_idx: usize, ) -> VortexResult> { PARENT_RULES.evaluate(array, parent, child_idx) diff --git a/vortex-array/src/arrays/listview/vtable/mod.rs b/vortex-array/src/arrays/listview/vtable/mod.rs index ddfa4aa0e6b..88d6fd26707 100644 --- a/vortex-array/src/arrays/listview/vtable/mod.rs +++ b/vortex-array/src/arrays/listview/vtable/mod.rs @@ -23,6 +23,7 @@ use crate::Precision; use crate::array::Array; use crate::array::ArrayId; use crate::array::ArrayView; +use crate::array::ParentRef; use crate::array::VTable; use crate::arrays::listview::ListViewArrayExt; use crate::arrays::listview::ListViewData; @@ -75,6 +76,7 @@ impl VTable for ListView { type OperationsVTable = Self; type ValidityVTable = Self; + fn id(&self) -> ArrayId { static ID: CachedId = CachedId::new("vortex.listview"); *ID @@ -214,7 +216,7 @@ impl VTable for ListView { fn reduce_parent( array: ArrayView<'_, Self>, - parent: &ArrayRef, + parent: &ParentRef<'_>, child_idx: usize, ) -> VortexResult> { PARENT_RULES.evaluate(array, parent, child_idx) diff --git a/vortex-array/src/arrays/masked/vtable/mod.rs b/vortex-array/src/arrays/masked/vtable/mod.rs index 257448c6eec..c95bc4b61d3 100644 --- a/vortex-array/src/arrays/masked/vtable/mod.rs +++ b/vortex-array/src/arrays/masked/vtable/mod.rs @@ -27,6 +27,7 @@ use crate::VortexSessionExecute; use crate::array::Array; use crate::array::ArrayId; use crate::array::ArrayView; +use crate::array::ParentRef; use crate::array::VTable; use crate::array::validity_to_child; use crate::arrays::ConstantArray; @@ -186,7 +187,7 @@ impl VTable for Masked { fn reduce_parent( array: ArrayView<'_, Self>, - parent: &ArrayRef, + parent: &ParentRef<'_>, child_idx: usize, ) -> VortexResult> { PARENT_RULES.evaluate(array, parent, child_idx) diff --git a/vortex-array/src/arrays/null/mod.rs b/vortex-array/src/arrays/null/mod.rs index dd31ef16853..dfd1a220f38 100644 --- a/vortex-array/src/arrays/null/mod.rs +++ b/vortex-array/src/arrays/null/mod.rs @@ -16,6 +16,7 @@ use crate::array::ArrayParts; use crate::array::ArrayView; use crate::array::EmptyArrayData; use crate::array::OperationsVTable; +use crate::array::ParentRef; use crate::array::VTable; use crate::array::ValidityVTable; use crate::arrays::null::compute::rules::PARENT_RULES; @@ -100,7 +101,7 @@ impl VTable for Null { fn reduce_parent( array: ArrayView<'_, Self>, - parent: &ArrayRef, + parent: &ParentRef<'_>, child_idx: usize, ) -> VortexResult> { PARENT_RULES.evaluate(array, parent, child_idx) diff --git a/vortex-array/src/arrays/patched/vtable/mod.rs b/vortex-array/src/arrays/patched/vtable/mod.rs index f35a13600c5..a89c1539985 100644 --- a/vortex-array/src/arrays/patched/vtable/mod.rs +++ b/vortex-array/src/arrays/patched/vtable/mod.rs @@ -29,6 +29,7 @@ use crate::array::Array; use crate::array::ArrayId; use crate::array::ArrayParts; use crate::array::ArrayView; +use crate::array::ParentRef; use crate::array::VTable; use crate::array::ValidityChild; use crate::array::ValidityVTableFromChild; @@ -312,7 +313,7 @@ impl VTable for Patched { fn reduce_parent( array: ArrayView<'_, Self>, - parent: &ArrayRef, + parent: &ParentRef<'_>, child_idx: usize, ) -> VortexResult> { PARENT_RULES.evaluate(array, parent, child_idx) diff --git a/vortex-array/src/arrays/primitive/vtable/mod.rs b/vortex-array/src/arrays/primitive/vtable/mod.rs index 5130665cd30..aaaab6378c5 100644 --- a/vortex-array/src/arrays/primitive/vtable/mod.rs +++ b/vortex-array/src/arrays/primitive/vtable/mod.rs @@ -12,6 +12,7 @@ use crate::ExecutionCtx; use crate::ExecutionResult; use crate::array::Array; use crate::array::ArrayView; +use crate::array::ParentRef; use crate::array::VTable; use crate::arrays::primitive::PrimitiveData; use crate::buffer::BufferHandle; @@ -186,7 +187,7 @@ impl VTable for Primitive { fn reduce_parent( array: ArrayView<'_, Self>, - parent: &ArrayRef, + parent: &ParentRef<'_>, child_idx: usize, ) -> VortexResult> { RULES.evaluate(array, parent, child_idx) diff --git a/vortex-array/src/arrays/scalar_fn/vtable/mod.rs b/vortex-array/src/arrays/scalar_fn/vtable/mod.rs index 5906b3372f4..22b9bca61e4 100644 --- a/vortex-array/src/arrays/scalar_fn/vtable/mod.rs +++ b/vortex-array/src/arrays/scalar_fn/vtable/mod.rs @@ -27,6 +27,7 @@ use crate::array::Array; use crate::array::ArrayId; use crate::array::ArrayParts; use crate::array::ArrayView; +use crate::array::ParentRef; use crate::array::VTable; use crate::arrays::scalar_fn::array::ScalarFnArrayExt; use crate::arrays::scalar_fn::array::ScalarFnData; @@ -160,7 +161,7 @@ impl VTable for ScalarFn { fn reduce_parent( array: ArrayView<'_, Self>, - parent: &ArrayRef, + parent: &ParentRef<'_>, child_idx: usize, ) -> VortexResult> { PARENT_RULES.evaluate(array, parent, child_idx) @@ -208,11 +209,19 @@ impl Matcher for AnyScalarFn { type Match<'a> = ArrayView<'a, ScalarFn>; fn matches(array: &ArrayRef) -> bool { - array.is::() + ScalarFn::matches(array) } fn try_match(array: &ArrayRef) -> Option> { - array.as_opt::() + array.as_typed::() + } + + fn matches_parent(parent: &ParentRef<'_>) -> bool { + ScalarFn::matches_parent(parent) + } + + fn try_match_parent<'a>(parent: &'a ParentRef<'_>) -> Option> { + parent.as_opt::() } } @@ -224,19 +233,34 @@ impl Matcher for ExactScalarFn { type Match<'a> = ScalarFnArrayView<'a, F>; fn matches(array: &ArrayRef) -> bool { - if let Some(scalar_fn_array) = array.as_opt::() { - scalar_fn_array.data().scalar_fn().is::() - } else { - false - } + array + .as_typed::() + .is_some_and(|view| view.scalar_fn().is::()) } fn try_match(array: &ArrayRef) -> Option> { - let scalar_fn_array = array.as_opt::()?; + let scalar_fn_array = array.as_typed::()?; + let scalar_fn_data = scalar_fn_array.data(); + let scalar_fn = scalar_fn_data.scalar_fn().downcast_ref::()?; + Some(ScalarFnArrayView { + array: scalar_fn_array.array(), + vtable: scalar_fn.vtable(), + options: scalar_fn.options(), + }) + } + + fn matches_parent(parent: &ParentRef<'_>) -> bool { + parent + .typed_data::() + .is_some_and(|data| data.scalar_fn().is::()) + } + + fn try_match_parent<'a>(parent: &'a ParentRef<'_>) -> Option> { + let scalar_fn_array = parent.as_opt::()?; let scalar_fn_data = scalar_fn_array.data(); let scalar_fn = scalar_fn_data.scalar_fn().downcast_ref::()?; Some(ScalarFnArrayView { - array, + array: scalar_fn_array.array(), vtable: scalar_fn.vtable(), options: scalar_fn.options(), }) diff --git a/vortex-array/src/arrays/shared/vtable.rs b/vortex-array/src/arrays/shared/vtable.rs index f13a262479e..d7719ef5cbd 100644 --- a/vortex-array/src/arrays/shared/vtable.rs +++ b/vortex-array/src/arrays/shared/vtable.rs @@ -52,6 +52,7 @@ impl VTable for Shared { type TypedArrayData = SharedData; type OperationsVTable = Self; type ValidityVTable = Self; + fn id(&self) -> ArrayId { static ID: CachedId = CachedId::new("vortex.shared"); *ID diff --git a/vortex-array/src/arrays/slice/array.rs b/vortex-array/src/arrays/slice/array.rs index 9d7e0bb13ec..3cbb0e3cec4 100644 --- a/vortex-array/src/arrays/slice/array.rs +++ b/vortex-array/src/arrays/slice/array.rs @@ -84,25 +84,30 @@ impl SliceData { impl Array { /// Constructs a new `SliceArray`. pub fn try_new(child: ArrayRef, range: Range) -> VortexResult { + Ok(unsafe { Array::from_parts_unchecked(Self::try_new_parts(child, range)?) }) + } + + /// Constructs a new `SliceArray`. + pub fn new(child: ArrayRef, range: Range) -> Self { + unsafe { Array::from_parts_unchecked(Self::new_parts(child, range)) } + } + + /// Builds the [`ArrayParts`] for a slice. The parts can then be + /// optimized through [`ParentRef::optimize`](crate::array::ParentRef::optimize) + /// or materialized directly with [`ArrayParts::into_array`]. + pub fn try_new_parts(child: ArrayRef, range: Range) -> VortexResult> { let len = range.len(); let dtype = child.dtype().clone(); let data = SliceData::try_new(child.len(), range)?; - Ok(unsafe { - Array::from_parts_unchecked( - ArrayParts::new(Slice, dtype, len, data).with_slots(smallvec![Some(child)]), - ) - }) + Ok(ArrayParts::new(Slice, dtype, len, data).with_slots(smallvec![Some(child)])) } - /// Constructs a new `SliceArray`. - pub fn new(child: ArrayRef, range: Range) -> Self { + /// Builds the [`ArrayParts`] without bounds-checking the range. See + /// [`Self::try_new_parts`] for the checked variant. + pub fn new_parts(child: ArrayRef, range: Range) -> ArrayParts { let len = range.len(); let dtype = child.dtype().clone(); let data = SliceData::new(range); - unsafe { - Array::from_parts_unchecked( - ArrayParts::new(Slice, dtype, len, data).with_slots(smallvec![Some(child)]), - ) - } + ArrayParts::new(Slice, dtype, len, data).with_slots(smallvec![Some(child)]) } } diff --git a/vortex-array/src/arrays/slice/mod.rs b/vortex-array/src/arrays/slice/mod.rs index 8eec9d85310..2664598a7c6 100644 --- a/vortex-array/src/arrays/slice/mod.rs +++ b/vortex-array/src/arrays/slice/mod.rs @@ -26,6 +26,7 @@ use crate::Canonical; use crate::ExecutionCtx; use crate::IntoArray; use crate::array::ArrayView; +use crate::array::ParentRef; use crate::array::VTable; use crate::kernel::ExecuteParentKernel; use crate::matcher::Matcher; @@ -96,6 +97,25 @@ where } ::slice(array, parent.range.clone()) } + + /// Override the default `try_match_parent`-driven dispatch so a `SliceArray` parent + /// borrowed from [`ArrayParts`](crate::array::ArrayParts) can drive reduction + /// without first allocating an `Arc>`. + fn reduce_parent_ref( + &self, + array: ArrayView<'_, V>, + parent: &ParentRef<'_>, + child_idx: usize, + ) -> VortexResult> { + assert_eq!(child_idx, 0); + let Some(parent_view) = parent.as_opt::() else { + return Ok(None); + }; + if let Some(result) = precondition::(array, &parent_view.range) { + return Ok(Some(result)); + } + ::slice(array, parent_view.range.clone()) + } } /// Adaptor that wraps a [`SliceKernel`] impl as an [`ExecuteParentKernel`]. diff --git a/vortex-array/src/arrays/slice/vtable.rs b/vortex-array/src/arrays/slice/vtable.rs index ac0ecc18039..71860ca22d7 100644 --- a/vortex-array/src/arrays/slice/vtable.rs +++ b/vortex-array/src/arrays/slice/vtable.rs @@ -24,6 +24,7 @@ use crate::array::Array; use crate::array::ArrayId; use crate::array::ArrayView; use crate::array::OperationsVTable; +use crate::array::ParentRef; use crate::array::VTable; use crate::array::ValidityVTable; use crate::arrays::slice::SliceArrayExt; @@ -63,6 +64,7 @@ impl VTable for Slice { type TypedArrayData = SliceData; type OperationsVTable = Self; type ValidityVTable = Self; + fn id(&self) -> ArrayId { static ID: CachedId = CachedId::new("vortex.slice"); *ID @@ -153,7 +155,7 @@ impl VTable for Slice { fn reduce_parent( array: ArrayView<'_, Self>, - parent: &ArrayRef, + parent: &ParentRef<'_>, child_idx: usize, ) -> VortexResult> { PARENT_RULES.evaluate(array, parent, child_idx) diff --git a/vortex-array/src/arrays/struct_/compute/cast.rs b/vortex-array/src/arrays/struct_/compute/cast.rs index f954dbe2fea..c044b11854c 100644 --- a/vortex-array/src/arrays/struct_/compute/cast.rs +++ b/vortex-array/src/arrays/struct_/compute/cast.rs @@ -9,6 +9,7 @@ use crate::ArrayRef; use crate::ArrayView; use crate::ExecutionCtx; use crate::IntoArray; +use crate::ParentRef; use crate::arrays::ConstantArray; use crate::arrays::Struct; use crate::arrays::StructArray; @@ -23,14 +24,14 @@ use crate::scalar_fn::fns::cast::Cast; pub(crate) fn struct_cast_execute_parent( child: &ArrayRef, - parent: &ArrayRef, + parent: &ParentRef, _child_idx: usize, ctx: &mut ExecutionCtx, ) -> VortexResult> { let Some(array) = child.as_opt::() else { return Ok(None); }; - let Some(parent) = ExactScalarFn::::try_match(parent) else { + let Some(parent) = ExactScalarFn::::try_match_parent(parent) else { return Ok(None); }; @@ -124,6 +125,7 @@ mod tests { use crate::ArrayRef; use crate::ExecutionCtx; use crate::IntoArray; + use crate::ParentRef; use crate::VortexSessionExecute; use crate::arrays::ConstantArray; use crate::arrays::PrimitiveArray; @@ -153,7 +155,7 @@ mod tests { fn null_struct_cast_execute_parent( child: &ArrayRef, - parent: &ArrayRef, + parent: &ParentRef<'_>, _child_idx: usize, _ctx: &mut ExecutionCtx, ) -> VortexResult> { diff --git a/vortex-array/src/arrays/struct_/compute/rules.rs b/vortex-array/src/arrays/struct_/compute/rules.rs index ae3a67b50e3..1323b4f9e97 100644 --- a/vortex-array/src/arrays/struct_/compute/rules.rs +++ b/vortex-array/src/arrays/struct_/compute/rules.rs @@ -6,6 +6,7 @@ use vortex_error::vortex_err; use crate::ArrayRef; use crate::IntoArray; +use crate::ParentRef; use crate::array::ArrayView; use crate::arrays::ConstantArray; use crate::arrays::Struct; @@ -35,13 +36,13 @@ pub(crate) const PARENT_RULES: ParentRuleSet = ParentRuleSet::new(&[ pub(crate) fn struct_cast_reduce_parent( child: &ArrayRef, - parent: &ArrayRef, + parent: &ParentRef<'_>, _child_idx: usize, ) -> VortexResult> { let Some(array) = child.as_opt::() else { return Ok(None); }; - let Some(parent) = ExactScalarFn::::try_match(parent) else { + let Some(parent) = ExactScalarFn::::try_match_parent(parent) else { return Ok(None); }; @@ -131,6 +132,7 @@ mod tests { use crate::ArrayRef; use crate::IntoArray; + use crate::ParentRef; use crate::array::ArrayPlugin; use crate::arrays::ScalarFn; use crate::arrays::Struct; @@ -153,12 +155,13 @@ mod tests { use crate::scalar_fn::ScalarFnVTable; use crate::scalar_fn::fns::cast::Cast; use crate::validity::Validity; + static SESSION: LazyLock = LazyLock::new(|| VortexSession::empty().with::()); fn no_struct_cast_plugin( _child: &ArrayRef, - _parent: &ArrayRef, + _parent: &ParentRef<'_>, _child_idx: usize, ) -> VortexResult> { Ok(None) diff --git a/vortex-array/src/arrays/struct_/vtable/mod.rs b/vortex-array/src/arrays/struct_/vtable/mod.rs index 71ec8a8e2df..5a59a1536a5 100644 --- a/vortex-array/src/arrays/struct_/vtable/mod.rs +++ b/vortex-array/src/arrays/struct_/vtable/mod.rs @@ -16,6 +16,7 @@ use crate::array::Array; use crate::array::ArrayParts; use crate::array::ArrayView; use crate::array::EmptyArrayData; +use crate::array::ParentRef; use crate::array::VTable; use crate::array::child_to_validity; use crate::arrays::struct_::array::FIELDS_OFFSET; @@ -42,6 +43,7 @@ impl VTable for Struct { type OperationsVTable = Self; type ValidityVTable = Self; + fn id(&self) -> ArrayId { static ID: CachedId = CachedId::new("vortex.struct"); *ID @@ -187,7 +189,7 @@ impl VTable for Struct { fn reduce_parent( array: ArrayView<'_, Self>, - parent: &ArrayRef, + parent: &ParentRef<'_>, child_idx: usize, ) -> VortexResult> { PARENT_RULES.evaluate(array, parent, child_idx) diff --git a/vortex-array/src/arrays/varbin/vtable/mod.rs b/vortex-array/src/arrays/varbin/vtable/mod.rs index c1a0941e720..05eb7786277 100644 --- a/vortex-array/src/arrays/varbin/vtable/mod.rs +++ b/vortex-array/src/arrays/varbin/vtable/mod.rs @@ -18,6 +18,7 @@ use crate::IntoArray; use crate::array::Array; use crate::array::ArrayId; use crate::array::ArrayView; +use crate::array::ParentRef; use crate::array::VTable; use crate::arrays::varbin::VarBinArrayExt; use crate::arrays::varbin::VarBinData; @@ -70,6 +71,7 @@ impl VTable for VarBin { type OperationsVTable = Self; type ValidityVTable = Self; + fn id(&self) -> ArrayId { static ID: CachedId = CachedId::new("vortex.varbin"); *ID @@ -176,7 +178,7 @@ impl VTable for VarBin { fn reduce_parent( array: ArrayView<'_, Self>, - parent: &ArrayRef, + parent: &ParentRef<'_>, child_idx: usize, ) -> VortexResult> { PARENT_RULES.evaluate(array, parent, child_idx) diff --git a/vortex-array/src/arrays/varbinview/vtable/mod.rs b/vortex-array/src/arrays/varbinview/vtable/mod.rs index 45db5d904ac..fa541e2e839 100644 --- a/vortex-array/src/arrays/varbinview/vtable/mod.rs +++ b/vortex-array/src/arrays/varbinview/vtable/mod.rs @@ -22,6 +22,7 @@ use crate::Precision; use crate::array::Array; use crate::array::ArrayId; use crate::array::ArrayView; +use crate::array::ParentRef; use crate::array::VTable; use crate::arrays::varbinview::BinaryView; use crate::arrays::varbinview::VarBinViewData; @@ -211,7 +212,7 @@ impl VTable for VarBinView { fn reduce_parent( array: ArrayView<'_, Self>, - parent: &ArrayRef, + parent: &ParentRef<'_>, child_idx: usize, ) -> VortexResult> { PARENT_RULES.evaluate(array, parent, child_idx) diff --git a/vortex-array/src/arrays/variant/vtable/mod.rs b/vortex-array/src/arrays/variant/vtable/mod.rs index 3c0dd76c575..cc6195b7ead 100644 --- a/vortex-array/src/arrays/variant/vtable/mod.rs +++ b/vortex-array/src/arrays/variant/vtable/mod.rs @@ -24,6 +24,7 @@ use crate::array::ArrayId; use crate::array::ArrayParts; use crate::array::ArrayView; use crate::array::EmptyArrayData; +use crate::array::ParentRef; use crate::array::VTable; use crate::arrays::variant::CORE_STORAGE_SLOT; use crate::arrays::variant::NUM_SLOTS; @@ -187,7 +188,7 @@ impl VTable for Variant { fn reduce_parent( array: ArrayView<'_, Self>, - parent: &ArrayRef, + parent: &ParentRef<'_>, child_idx: usize, ) -> VortexResult> { RULES.evaluate(array, parent, child_idx) diff --git a/vortex-array/src/canonical.rs b/vortex-array/src/canonical.rs index b1773d453f2..35894b96090 100644 --- a/vortex-array/src/canonical.rs +++ b/vortex-array/src/canonical.rs @@ -18,6 +18,7 @@ use crate::Executable; use crate::ExecutionCtx; use crate::IntoArray; use crate::array::ArrayView; +use crate::array::ParentRef; use crate::array::child_to_validity; use crate::arrays::Bool; use crate::arrays::BoolArray; @@ -1062,6 +1063,11 @@ pub struct AnyCanonical; impl Matcher for AnyCanonical { type Match<'a> = CanonicalView<'a>; + /// Fast encoding-id check that skips [`ParentRef`] construction. This is the + /// hot path for [`ArrayRef::is_canonical`](crate::ArrayRef::is_canonical), so + /// each canonical encoding is checked via the cheap `ArrayRef::is::()` + /// direct downcast. + #[inline] fn matches(array: &ArrayRef) -> bool { array.is::() || array.is::() @@ -1075,6 +1081,8 @@ impl Matcher for AnyCanonical { || array.is::() } + /// Direct heap-array downcasts; mirrors [`Self::try_match_parent`] but skips + /// the [`ParentRef`] construction that would otherwise wrap each call. fn try_match(array: &ArrayRef) -> Option> { if let Some(a) = array.as_opt::() { Some(CanonicalView::Null(a)) @@ -1098,6 +1106,43 @@ impl Matcher for AnyCanonical { array.as_opt::().map(CanonicalView::Extension) } } + + fn matches_parent(parent: &ParentRef<'_>) -> bool { + Null::matches_parent(parent) + || Bool::matches_parent(parent) + || Primitive::matches_parent(parent) + || Decimal::matches_parent(parent) + || Struct::matches_parent(parent) + || ListView::matches_parent(parent) + || FixedSizeList::matches_parent(parent) + || VarBinView::matches_parent(parent) + || Variant::matches_parent(parent) + || Extension::matches_parent(parent) + } + + fn try_match_parent<'a>(parent: &'a ParentRef<'_>) -> Option> { + if let Some(a) = parent.as_opt::() { + Some(CanonicalView::Null(a)) + } else if let Some(a) = parent.as_opt::() { + Some(CanonicalView::Bool(a)) + } else if let Some(a) = parent.as_opt::() { + Some(CanonicalView::Primitive(a)) + } else if let Some(a) = parent.as_opt::() { + Some(CanonicalView::Decimal(a)) + } else if let Some(a) = parent.as_opt::() { + Some(CanonicalView::Struct(a)) + } else if let Some(a) = parent.as_opt::() { + Some(CanonicalView::List(a)) + } else if let Some(a) = parent.as_opt::() { + Some(CanonicalView::FixedSizeList(a)) + } else if let Some(a) = parent.as_opt::() { + Some(CanonicalView::VarBinView(a)) + } else if let Some(a) = parent.as_opt::() { + Some(CanonicalView::Variant(a)) + } else { + parent.as_opt::().map(CanonicalView::Extension) + } + } } #[cfg(test)] diff --git a/vortex-array/src/columnar.rs b/vortex-array/src/columnar.rs index 2e4bdc328fd..02b9463216e 100644 --- a/vortex-array/src/columnar.rs +++ b/vortex-array/src/columnar.rs @@ -12,6 +12,7 @@ use crate::Executable; use crate::ExecutionCtx; use crate::IntoArray; use crate::array::ArrayView; +use crate::array::ParentRef; use crate::arrays::Constant; use crate::arrays::ConstantArray; use crate::dtype::DType; @@ -93,11 +94,32 @@ pub struct AnyColumnar; impl Matcher for AnyColumnar { type Match<'a> = ColumnarView<'a>; + /// Fast encoding-id check that skips [`ParentRef`] construction. Mirror of + /// [`AnyCanonical::matches`](crate::AnyCanonical::matches) for the same reason. + #[inline] + fn matches(array: &ArrayRef) -> bool { + array.is::() || AnyCanonical::matches(array) + } + + /// Direct heap-array downcasts; skips the [`ParentRef`] construction that the + /// default [`Self::try_match`] would otherwise do. fn try_match(array: &ArrayRef) -> Option> { if let Some(constant) = array.as_opt::() { Some(ColumnarView::Constant(constant)) } else { - array.as_opt::().map(ColumnarView::Canonical) + AnyCanonical::try_match(array).map(ColumnarView::Canonical) + } + } + + fn matches_parent(parent: &ParentRef<'_>) -> bool { + Constant::matches_parent(parent) || AnyCanonical::matches_parent(parent) + } + + fn try_match_parent<'a>(parent: &'a ParentRef<'_>) -> Option> { + if let Some(constant) = parent.as_opt::() { + Some(ColumnarView::Constant(constant)) + } else { + AnyCanonical::try_match_parent(parent).map(ColumnarView::Canonical) } } } diff --git a/vortex-array/src/executor.rs b/vortex-array/src/executor.rs index d6070ac1a4d..2ef87e85ce0 100644 --- a/vortex-array/src/executor.rs +++ b/vortex-array/src/executor.rs @@ -35,6 +35,7 @@ use crate::ArrayRef; use crate::Canonical; use crate::IntoArray; use crate::array::ArrayId; +use crate::array::ParentRef; use crate::builders::ArrayBuilder; use crate::builders::builder_with_capacity_in; use crate::dtype::DType; @@ -409,9 +410,10 @@ impl Executable for ArrayRef { return Ok(reduced); } + let parent_ref = ParentRef::from_array_ref(&array); for (slot_idx, slot) in array.slots().iter().enumerate() { let Some(child) = slot else { continue }; - if let Some(reduced_parent) = child.reduce_parent(&array, slot_idx)? { + if let Some(reduced_parent) = child.reduce_parent(&parent_ref, slot_idx)? { ctx.log(format_args!( "reduce_parent: slot[{}]({}) rewrote {} -> {}", slot_idx, @@ -549,8 +551,9 @@ fn execute_parent_for_child( && let Some(plugins) = kernels.find_execute_parent(parent.encoding_id(), child.encoding_id()) { + let parent_ref = ParentRef::from_array_ref(parent); for plugin in plugins.as_ref() { - if let Some(result) = plugin(child, parent, slot_idx, ctx)? { + if let Some(result) = plugin(child, &parent_ref, slot_idx, ctx)? { return Ok(Some(result)); } } diff --git a/vortex-array/src/matcher.rs b/vortex-array/src/matcher.rs index 532931df083..b33f30a7f86 100644 --- a/vortex-array/src/matcher.rs +++ b/vortex-array/src/matcher.rs @@ -2,34 +2,45 @@ // SPDX-FileCopyrightText: Copyright the Vortex contributors use crate::ArrayRef; +use crate::array::ParentRef; /// Trait for matching array types. pub trait Matcher { type Match<'a>; - /// Check if the given array matches this matcher type + /// Check if the given array matches this matcher type. + /// + /// The default implementation delegates through `try_match`, but matchers that + /// can answer cheaply (encoding-id checks, no view construction) should override + /// this directly so hot callers like `ArrayRef::is::()` don't pay the + /// `try_match` cost. fn matches(array: &ArrayRef) -> bool { Self::try_match(array).is_some() } /// Try to match the given array, returning the matched view type if successful. + /// + /// Both heap-array entry points (`matches`, `try_match`) and parent entry points + /// (`matches_parent`, `try_match_parent`) are required because they borrow from + /// different things: heap entries borrow from `&ArrayRef`, parent entries borrow + /// from `&ParentRef`. Most stack-friendly matchers should return the matched + /// view without forcing a materialization — defer the heap allocation to + /// [`ArrayView::array`](crate::array::ArrayView::array) on the cold path. fn try_match(array: &ArrayRef) -> Option>; -} - -/// Matches any array type (wildcard matcher) -#[derive(Debug)] -pub struct AnyArray; -impl Matcher for AnyArray { - type Match<'a> = &'a ArrayRef; - - #[inline(always)] - fn matches(_array: &ArrayRef) -> bool { - true + /// Check if the given parent matches this matcher type. + /// + /// The default implementation delegates through `try_match_parent`. Override + /// when a cheaper check (e.g. an encoding-id comparison) suffices. + fn matches_parent(parent: &ParentRef<'_>) -> bool { + Self::try_match_parent(parent).is_some() } - #[inline(always)] - fn try_match(array: &ArrayRef) -> Option> { - Some(array) - } + /// Try to match a [`ParentRef`]. + /// + /// The returned `Match` borrows from `parent`, so matchers can return a + /// stack-backed [`ArrayView`](crate::array::ArrayView) without forcing the + /// parent to materialize. Implementations typically delegate to + /// [`ParentRef::as_opt`]. + fn try_match_parent<'a>(parent: &'a ParentRef<'_>) -> Option>; } diff --git a/vortex-array/src/optimizer/kernels.rs b/vortex-array/src/optimizer/kernels.rs index d38bc9402d1..73f0ce15ef3 100644 --- a/vortex-array/src/optimizer/kernels.rs +++ b/vortex-array/src/optimizer/kernels.rs @@ -31,6 +31,7 @@ use std::sync::Arc; use std::sync::LazyLock; use arc_swap::ArcSwap; +use vortex_array::arrays::Struct; use vortex_error::VortexResult; use vortex_session::Ref; use vortex_session::SessionExt; @@ -39,13 +40,13 @@ use vortex_session::registry::Id; use vortex_utils::aliases::DefaultHashBuilder; use vortex_utils::aliases::hash_map::HashMap; +use crate::ArrayPlugin; use crate::ArrayRef; use crate::ExecutionCtx; -use crate::array::VTable; -use crate::arrays::Struct; +use crate::array::ParentRef; use crate::arrays::struct_::compute::cast::struct_cast_execute_parent; use crate::arrays::struct_::compute::rules::struct_cast_reduce_parent; -use crate::scalar_fn::ScalarFnVTable; +use crate::scalar_fn::ScalarFnPlugin; use crate::scalar_fn::fns::cast::Cast; /// Shared hasher used to combine `(outer, child)` tuples into registry keys. @@ -59,8 +60,11 @@ static FN_HASHER: LazyLock = LazyLock::new(DefaultHashBuilde /// /// Implementations must preserve the parent's logical length and dtype, matching the invariant /// required of static parent-reduce rules. -pub type ReduceParentFn = - fn(child: &ArrayRef, parent: &ArrayRef, child_idx: usize) -> VortexResult>; +pub type ReduceParentFn = fn( + child: &ArrayRef, + parent: &ParentRef<'_>, + child_idx: usize, +) -> VortexResult>; #[derive(Debug, Clone, Copy, PartialEq, Eq, Hash, Ord, PartialOrd)] #[repr(transparent)] @@ -88,7 +92,7 @@ impl Borrow for ReduceParentFnId { /// required of static `execute_parent` kernels. pub type ExecuteParentFn = fn( child: &ArrayRef, - parent: &ArrayRef, + parent: &ParentRef<'_>, child_idx: usize, ctx: &mut ExecutionCtx, ) -> VortexResult>; diff --git a/vortex-array/src/optimizer/mod.rs b/vortex-array/src/optimizer/mod.rs index d6e93ca0561..794ceee5f07 100644 --- a/vortex-array/src/optimizer/mod.rs +++ b/vortex-array/src/optimizer/mod.rs @@ -22,6 +22,7 @@ use vortex_session::SessionExt; use vortex_session::VortexSession; use crate::ArrayRef; +use crate::array::ParentRef; use crate::optimizer::kernels::ArrayKernels; pub mod kernels; @@ -87,6 +88,7 @@ fn try_optimize( // Apply parent reduction rules to each slot in the context of the current array. // Its important to take all slots here, as `current_array` can change inside the loop. + let parent_ref = ParentRef::from_array_ref(¤t_array); for (slot_idx, slot) in current_array.slots().iter().enumerate() { let Some(child) = slot else { continue }; @@ -96,7 +98,7 @@ fn try_optimize( array_ref.find_reduce_parent(current_array.encoding_id(), child.encoding_id()) { for plugin in plugins.as_ref() { - if let Some(new_array) = plugin(child, ¤t_array, slot_idx)? { + if let Some(new_array) = plugin(child, &parent_ref, slot_idx)? { current_array = new_array; any_optimizations = true; continue 'outer; @@ -104,7 +106,7 @@ fn try_optimize( } } - if let Some(new_array) = child.reduce_parent(¤t_array, slot_idx)? { + if let Some(new_array) = child.reduce_parent(&parent_ref, slot_idx)? { // If the parent was replaced, then we attempt to reduce it again. current_array = new_array; any_optimizations = true; diff --git a/vortex-array/src/optimizer/rules.rs b/vortex-array/src/optimizer/rules.rs index e505b21a199..a5595e999e6 100644 --- a/vortex-array/src/optimizer/rules.rs +++ b/vortex-array/src/optimizer/rules.rs @@ -26,6 +26,7 @@ use vortex_error::VortexResult; use crate::ArrayRef; use crate::array::ArrayView; +use crate::array::ParentRef; use crate::array::VTable; use crate::matcher::Matcher; @@ -48,6 +49,16 @@ pub trait ArrayReduceRule: Debug + Send + Sync + 'static { /// The child sees the parent's type via the associated `Parent` [`Matcher`] and can return /// a replacement for the parent. This enables optimizations like pushing operations through /// compression layers (e.g., pushing a scalar function into dictionary values). +/// +/// # Stack-backed parents +/// +/// Construction-side callers borrow `ArrayParts` as a [`ParentRef`] via +/// [`ParentRef::optimize`](crate::array::ParentRef::optimize). [`Matcher::try_match_parent`] +/// returns a stack-backed [`ArrayView`] without materializing an +/// `Arc>`, so rules that consume only the typed metadata (e.g. +/// `view.dtype()`, `view.data()`, `view.slots()`) can fire without forcing a +/// heap allocation. Materialization only happens if the rule reaches for +/// [`ArrayView::array`]. pub trait ArrayParentReduceRule: Debug + Send + Sync + 'static { /// The parent array type this rule matches against. type Parent: Matcher; @@ -58,23 +69,59 @@ pub trait ArrayParentReduceRule: Debug + Send + Sync + 'static { /// - `Ok(Some(new_array))` if the rule applied successfully /// - `Ok(None)` if the rule doesn't apply /// - `Err(e)` if an error occurred + /// + /// # Stack-backed parents + /// + /// This method receives the parent through [`Matcher::Match`]. For the blanket + /// `impl Matcher for V`, that is an [`ArrayView`] borrowed from + /// the parent — no `Arc>` is allocated unless the rule reaches + /// for [`ArrayView::array`]. fn reduce_parent( &self, array: ArrayView<'_, V>, parent: ::Match<'_>, child_idx: usize, ) -> VortexResult>; + + /// Attempt to rewrite the child given a [`ParentRef`]. + /// + /// This is the dispatch entry point used by [`ParentRuleSet`]. The default + /// implementation extracts the parent's typed view via + /// [`Matcher::try_match_parent`] and then delegates to + /// [`reduce_parent`](Self::reduce_parent). + /// + /// Override this when the rule wants direct access to the [`ParentRef`] — + /// for example to call [`ParentRef::as_opt`] for a different encoding than + /// `Self::Parent`, or to inspect the parent's encoding id before paying for + /// the typed-view construction. + /// + /// # Stability + /// + /// **Unstable.** This is the new `ParentRef`-based dispatch entry; the signature + /// and contract are expected to change as more rules migrate off + /// [`reduce_parent`](Self::reduce_parent). Treat overrides as opt-in for now. + fn reduce_parent_ref( + &self, + array: ArrayView<'_, V>, + parent: &ParentRef<'_>, + child_idx: usize, + ) -> VortexResult> { + let Some(parent_view) = ::try_match_parent(parent) else { + return Ok(None); + }; + self.reduce_parent(array, parent_view, child_idx) + } } /// Type-erased version of [`ArrayParentReduceRule`] used for dynamic dispatch within /// [`ParentRuleSet`]. pub trait DynArrayParentReduceRule: Debug + Send + Sync { - fn matches(&self, parent: &ArrayRef) -> bool; + fn matches(&self, parent: &ParentRef<'_>) -> bool; fn reduce_parent( &self, array: ArrayView<'_, V>, - parent: &ArrayRef, + parent: &ParentRef<'_>, child_idx: usize, ) -> VortexResult>; } @@ -98,20 +145,17 @@ impl> Debug for ParentReduceRuleAdapter> DynArrayParentReduceRule for ParentReduceRuleAdapter { - fn matches(&self, parent: &ArrayRef) -> bool { - K::Parent::matches(parent) + fn matches(&self, parent: &ParentRef<'_>) -> bool { + K::Parent::matches_parent(parent) } fn reduce_parent( &self, child: ArrayView<'_, V>, - parent: &ArrayRef, + parent: &ParentRef<'_>, child_idx: usize, ) -> VortexResult> { - let Some(parent_view) = K::Parent::try_match(parent) else { - return Ok(None); - }; - self.rule.reduce_parent(child, parent_view, child_idx) + self.rule.reduce_parent_ref(child, parent, child_idx) } } @@ -171,7 +215,7 @@ impl ParentRuleSet { pub fn evaluate( &self, child: ArrayView<'_, V>, - parent: &ArrayRef, + parent: &ParentRef<'_>, child_idx: usize, ) -> VortexResult> { for rule in self.rules.iter() { diff --git a/vortex-cuda/src/dynamic_dispatch/plan_builder.rs b/vortex-cuda/src/dynamic_dispatch/plan_builder.rs index d20af9f7d21..d17635e0c55 100644 --- a/vortex-cuda/src/dynamic_dispatch/plan_builder.rs +++ b/vortex-cuda/src/dynamic_dispatch/plan_builder.rs @@ -12,6 +12,7 @@ use itertools::zip_eq; use tracing::trace; use vortex::array::ArrayRef; use vortex::array::ArrayVTable; +use vortex::array::ParentRef; use vortex::array::arrays::Dict; use vortex::array::arrays::Primitive; use vortex::array::arrays::Slice; @@ -520,7 +521,8 @@ impl FusedPlan { let slice_arr = array.as_::(); let child = slice_arr.child().clone(); - if let Some(reduced) = child.reduce_parent(&array, 0)? { + let parent_ref = ParentRef::from_array_ref(&array); + if let Some(reduced) = child.reduce_parent(&parent_ref, 0)? { return self.walk(reduced, pending_subtrees); } From b281cee4d580b44ffbe5fb66beff2559ee7460b2 Mon Sep 17 00:00:00 2001 From: Robert Kruszewski Date: Thu, 28 May 2026 18:16:41 +0100 Subject: [PATCH 02/17] less Signed-off-by: Robert Kruszewski --- vortex-array/src/array/parent.rs | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/vortex-array/src/array/parent.rs b/vortex-array/src/array/parent.rs index 06de10b48b3..c5d956374e9 100644 --- a/vortex-array/src/array/parent.rs +++ b/vortex-array/src/array/parent.rs @@ -230,7 +230,7 @@ impl<'a> ParentRef<'a> { pub(crate) fn is_encoding(&self) -> bool { match self.data { ParentData::Heap { data, .. } => data.is::>(), - ParentData::Parts { vtable, .. } => return vtable.is::(), + ParentData::Parts { vtable, .. } => vtable.is::(), } } From ce14c70987228f8b4c747c3e0934e09016b3f79f Mon Sep 17 00:00:00 2001 From: Robert Kruszewski Date: Mon, 18 May 2026 20:54:37 +0100 Subject: [PATCH 03/17] simpler Signed-off-by: Robert Kruszewski --- vortex-array/src/arrays/dict/take.rs | 26 ----------------- vortex-array/src/arrays/filter/kernel.rs | 17 ----------- vortex-array/src/arrays/slice/mod.rs | 20 ------------- vortex-array/src/optimizer/rules.rs | 36 ++++-------------------- 4 files changed, 5 insertions(+), 94 deletions(-) diff --git a/vortex-array/src/arrays/dict/take.rs b/vortex-array/src/arrays/dict/take.rs index 10b2533fb04..7ff1b659337 100644 --- a/vortex-array/src/arrays/dict/take.rs +++ b/vortex-array/src/arrays/dict/take.rs @@ -10,11 +10,9 @@ use crate::Canonical; use crate::ExecutionCtx; use crate::IntoArray; use crate::array::ArrayView; -use crate::array::ParentRef; use crate::array::VTable; use crate::arrays::ConstantArray; use crate::arrays::dict::DictArraySlotsExt; -use crate::arrays::dict::DictSlotsView; use crate::expr::stats::Precision; use crate::expr::stats::Stat; use crate::expr::stats::StatsProvider; @@ -108,30 +106,6 @@ where } Ok(result) } - - fn reduce_parent_ref( - &self, - array: ArrayView<'_, V>, - parent: &ParentRef<'_>, - child_idx: usize, - ) -> VortexResult> { - // Only handle the values child (index 1), not the codes child (index 0). - if child_idx != 1 { - return Ok(None); - } - let Some(parent) = parent.as_opt::() else { - return Ok(None); - }; - let codes = DictSlotsView::from_slots(parent.slots()).codes; - if let Some(result) = precondition::(array, codes) { - return Ok(Some(result)); - } - let result = ::take(array, codes)?; - if let Some(taken) = &result { - propagate_take_stats(array.array(), taken, codes)?; - } - Ok(result) - } } #[derive(Default, Debug)] diff --git a/vortex-array/src/arrays/filter/kernel.rs b/vortex-array/src/arrays/filter/kernel.rs index 524bdaa780f..0f75770999d 100644 --- a/vortex-array/src/arrays/filter/kernel.rs +++ b/vortex-array/src/arrays/filter/kernel.rs @@ -16,7 +16,6 @@ use crate::Canonical; use crate::ExecutionCtx; use crate::IntoArray; use crate::array::ArrayView; -use crate::array::ParentRef; use crate::array::VTable; use crate::arrays::Filter; use crate::arrays::dict::TakeExecuteAdaptor; @@ -105,22 +104,6 @@ where } ::filter(array, parent.filter_mask()) } - - fn reduce_parent_ref( - &self, - array: ArrayView<'_, V>, - parent: &ParentRef<'_>, - child_idx: usize, - ) -> VortexResult> { - assert_eq!(child_idx, 0); - let Some(parent) = parent.as_opt::() else { - return Ok(None); - }; - if let Some(result) = precondition::(array, parent.filter_mask()) { - return Ok(Some(result)); - } - ::filter(array, parent.filter_mask()) - } } /// Adaptor that wraps a [`FilterKernel`] impl as an [`ExecuteParentKernel`]. diff --git a/vortex-array/src/arrays/slice/mod.rs b/vortex-array/src/arrays/slice/mod.rs index 2664598a7c6..8eec9d85310 100644 --- a/vortex-array/src/arrays/slice/mod.rs +++ b/vortex-array/src/arrays/slice/mod.rs @@ -26,7 +26,6 @@ use crate::Canonical; use crate::ExecutionCtx; use crate::IntoArray; use crate::array::ArrayView; -use crate::array::ParentRef; use crate::array::VTable; use crate::kernel::ExecuteParentKernel; use crate::matcher::Matcher; @@ -97,25 +96,6 @@ where } ::slice(array, parent.range.clone()) } - - /// Override the default `try_match_parent`-driven dispatch so a `SliceArray` parent - /// borrowed from [`ArrayParts`](crate::array::ArrayParts) can drive reduction - /// without first allocating an `Arc>`. - fn reduce_parent_ref( - &self, - array: ArrayView<'_, V>, - parent: &ParentRef<'_>, - child_idx: usize, - ) -> VortexResult> { - assert_eq!(child_idx, 0); - let Some(parent_view) = parent.as_opt::() else { - return Ok(None); - }; - if let Some(result) = precondition::(array, &parent_view.range) { - return Ok(Some(result)); - } - ::slice(array, parent_view.range.clone()) - } } /// Adaptor that wraps a [`SliceKernel`] impl as an [`ExecuteParentKernel`]. diff --git a/vortex-array/src/optimizer/rules.rs b/vortex-array/src/optimizer/rules.rs index a5595e999e6..fe21f6c0746 100644 --- a/vortex-array/src/optimizer/rules.rs +++ b/vortex-array/src/optimizer/rules.rs @@ -72,7 +72,7 @@ pub trait ArrayParentReduceRule: Debug + Send + Sync + 'static { /// /// # Stack-backed parents /// - /// This method receives the parent through [`Matcher::Match`]. For the blanket + /// The parent is received through [`Matcher::Match`]. For the blanket /// `impl Matcher for V`, that is an [`ArrayView`] borrowed from /// the parent — no `Arc>` is allocated unless the rule reaches /// for [`ArrayView::array`]. @@ -82,35 +82,6 @@ pub trait ArrayParentReduceRule: Debug + Send + Sync + 'static { parent: ::Match<'_>, child_idx: usize, ) -> VortexResult>; - - /// Attempt to rewrite the child given a [`ParentRef`]. - /// - /// This is the dispatch entry point used by [`ParentRuleSet`]. The default - /// implementation extracts the parent's typed view via - /// [`Matcher::try_match_parent`] and then delegates to - /// [`reduce_parent`](Self::reduce_parent). - /// - /// Override this when the rule wants direct access to the [`ParentRef`] — - /// for example to call [`ParentRef::as_opt`] for a different encoding than - /// `Self::Parent`, or to inspect the parent's encoding id before paying for - /// the typed-view construction. - /// - /// # Stability - /// - /// **Unstable.** This is the new `ParentRef`-based dispatch entry; the signature - /// and contract are expected to change as more rules migrate off - /// [`reduce_parent`](Self::reduce_parent). Treat overrides as opt-in for now. - fn reduce_parent_ref( - &self, - array: ArrayView<'_, V>, - parent: &ParentRef<'_>, - child_idx: usize, - ) -> VortexResult> { - let Some(parent_view) = ::try_match_parent(parent) else { - return Ok(None); - }; - self.reduce_parent(array, parent_view, child_idx) - } } /// Type-erased version of [`ArrayParentReduceRule`] used for dynamic dispatch within @@ -155,7 +126,10 @@ impl> DynArrayParentReduceRule parent: &ParentRef<'_>, child_idx: usize, ) -> VortexResult> { - self.rule.reduce_parent_ref(child, parent, child_idx) + let Some(parent_view) = ::try_match_parent(parent) else { + return Ok(None); + }; + self.rule.reduce_parent(child, parent_view, child_idx) } } From 730da8c70903b6d5a6bd3f2fe2ba41bef46a7a11 Mon Sep 17 00:00:00 2001 From: Robert Kruszewski Date: Tue, 19 May 2026 00:42:36 +0100 Subject: [PATCH 04/17] more Signed-off-by: Robert Kruszewski --- vortex-array/src/array/erased.rs | 3 +- vortex-array/src/array/parent.rs | 20 +-- .../src/arrays/scalar_fn/vtable/mod.rs | 119 ++++++++++++------ .../src/scalar_fn/fns/between/kernel.rs | 19 +-- .../src/scalar_fn/fns/binary/compare.rs | 10 +- .../src/scalar_fn/fns/fill_null/kernel.rs | 12 +- vortex-array/src/scalar_fn/fns/like/kernel.rs | 13 +- .../src/scalar_fn/fns/list_contains/kernel.rs | 13 +- vortex-array/src/scalar_fn/fns/mask/kernel.rs | 14 +-- vortex-array/src/scalar_fn/fns/zip/kernel.rs | 17 +-- vortex-tensor/src/scalar_fns/inner_product.rs | 4 +- vortex-tensor/src/scalar_fns/l2_denorm.rs | 7 +- vortex-tensor/src/scalar_fns/l2_norm.rs | 5 +- .../src/scalar_fns/sorf_transform/vtable.rs | 5 +- vortex-tensor/src/utils.rs | 13 +- 15 files changed, 120 insertions(+), 154 deletions(-) diff --git a/vortex-array/src/array/erased.rs b/vortex-array/src/array/erased.rs index 1ce88a23eb2..c0eafd4eb0c 100644 --- a/vortex-array/src/array/erased.rs +++ b/vortex-array/src/array/erased.rs @@ -228,7 +228,8 @@ impl ArrayRef { return Ok(Canonical::empty(self.dtype()).into_array()); } - let sliced = SliceArray::try_new_parts(self.clone(), range)?.optimize()?; + let sliced = SliceArray::try_new_parts(self.clone(), range)?; + let sliced = ParentRef::from_parts(&sliced).optimize()?; // Propagate some stats from the original array to the sliced array. if !sliced.is::() { diff --git a/vortex-array/src/array/parent.rs b/vortex-array/src/array/parent.rs index c5d956374e9..cb5901f9a91 100644 --- a/vortex-array/src/array/parent.rs +++ b/vortex-array/src/array/parent.rs @@ -159,13 +159,21 @@ impl<'a> ParentRef<'a> { { for plugin in plugins.as_ref() { if let Some(reduced) = plugin(child, self, slot_idx)? { - return cascade(reduced, session).map(Some); + return match session { + Some(s) => reduced.optimize_ctx(s), + None => reduced.optimize(), + } + .map(Some); } } } if let Some(reduced) = child.reduce_parent(self, slot_idx)? { - return cascade(reduced, session).map(Some); + return match session { + Some(s) => reduced.optimize_ctx(s), + None => reduced.optimize(), + } + .map(Some); } } @@ -316,14 +324,6 @@ impl ArrayBacking for ParentRef<'_> { } } -#[inline] -fn cascade(reduced: ArrayRef, session: Option<&VortexSession>) -> VortexResult { - match session { - Some(s) => reduced.optimize_ctx(s), - None => reduced.optimize(), - } -} - /// Materializes stack-borrowed parts of encoding `V` into an owned [`ArrayRef`]. /// /// Used as the function pointer stored inside [`ParentData::Parts`]. The diff --git a/vortex-array/src/arrays/scalar_fn/vtable/mod.rs b/vortex-array/src/arrays/scalar_fn/vtable/mod.rs index 22b9bca61e4..99ef99bb98e 100644 --- a/vortex-array/src/arrays/scalar_fn/vtable/mod.rs +++ b/vortex-array/src/arrays/scalar_fn/vtable/mod.rs @@ -10,6 +10,7 @@ use std::marker::PhantomData; use std::ops::Deref; use itertools::Itertools; +use vortex_error::VortexExpect; use vortex_error::VortexResult; use vortex_error::vortex_bail; use vortex_error::vortex_ensure; @@ -208,18 +209,10 @@ pub struct AnyScalarFn; impl Matcher for AnyScalarFn { type Match<'a> = ArrayView<'a, ScalarFn>; - fn matches(array: &ArrayRef) -> bool { - ScalarFn::matches(array) - } - fn try_match(array: &ArrayRef) -> Option> { array.as_typed::() } - fn matches_parent(parent: &ParentRef<'_>) -> bool { - ScalarFn::matches_parent(parent) - } - fn try_match_parent<'a>(parent: &'a ParentRef<'_>) -> Option> { parent.as_opt::() } @@ -229,55 +222,103 @@ impl Matcher for AnyScalarFn { #[derive(Debug, Default)] pub struct ExactScalarFn(PhantomData); -impl Matcher for ExactScalarFn { - type Match<'a> = ScalarFnArrayView<'a, F>; - - fn matches(array: &ArrayRef) -> bool { - array - .as_typed::() - .is_some_and(|view| view.scalar_fn().is::()) - } - - fn try_match(array: &ArrayRef) -> Option> { - let scalar_fn_array = array.as_typed::()?; - let scalar_fn_data = scalar_fn_array.data(); - let scalar_fn = scalar_fn_data.scalar_fn().downcast_ref::()?; +impl ExactScalarFn { + /// Promote an [`ArrayView<'_, ScalarFn>`] to a [`ScalarFnArrayView<'_, F>`] if the + /// inner scalar function is `F`. + /// + /// Shared by [`Matcher::try_match`] and [`Matcher::try_match_parent`]: both paths + /// produce an `ArrayView<'_, ScalarFn>` first and then go through this helper. + #[inline] + fn from_view(view: ArrayView<'_, ScalarFn>) -> Option> { + let scalar_fn = view.data().scalar_fn().downcast_ref::()?; Some(ScalarFnArrayView { - array: scalar_fn_array.array(), + view, vtable: scalar_fn.vtable(), options: scalar_fn.options(), }) } +} + +impl Matcher for ExactScalarFn { + type Match<'a> = ScalarFnArrayView<'a, F>; - fn matches_parent(parent: &ParentRef<'_>) -> bool { - parent - .typed_data::() - .is_some_and(|data| data.scalar_fn().is::()) + fn try_match(array: &ArrayRef) -> Option> { + Self::from_view(array.as_typed::()?) } fn try_match_parent<'a>(parent: &'a ParentRef<'_>) -> Option> { - let scalar_fn_array = parent.as_opt::()?; - let scalar_fn_data = scalar_fn_array.data(); - let scalar_fn = scalar_fn_data.scalar_fn().downcast_ref::()?; - Some(ScalarFnArrayView { - array: scalar_fn_array.array(), - vtable: scalar_fn.vtable(), - options: scalar_fn.options(), - }) + Self::from_view(parent.as_opt::()?) } } +/// A typed view over a [`ScalarFn`] array exposing the concrete `F`-typed `vtable` +/// and `options`. +/// +/// Wraps an [`ArrayView<'_, ScalarFn>`] so all metadata accessors (`dtype`, `len`, +/// `slots`, `encoding_id`) and the cold `array()` materialization go through the +/// same flat-field / [`ArrayBacking`](crate::array::ArrayBacking) path as +/// [`ArrayView`] — stack-backed parents stay on the stack until a consumer reaches +/// for the underlying [`ArrayRef`]. pub struct ScalarFnArrayView<'a, F: scalar_fn::ScalarFnVTable> { - array: &'a ArrayRef, + view: ArrayView<'a, ScalarFn>, pub vtable: &'a F, pub options: &'a F::Options, } -impl Deref for ScalarFnArrayView<'_, F> { - type Target = ArrayRef; +impl<'a, F: scalar_fn::ScalarFnVTable> ScalarFnArrayView<'a, F> { + /// Returns the underlying [`ScalarFn`]-typed array view. + #[inline] + pub fn view(&self) -> ArrayView<'a, ScalarFn> { + self.view + } + + /// Returns the child array at the given slot. + /// + /// Reads from `slots()` directly without forcing stack-backed parents to + /// materialize. + pub fn child_at(&self, idx: usize) -> &'a ArrayRef { + self.view.slots()[idx] + .as_ref() + .vortex_expect("ScalarFnArray child slot") + } + + /// Alias for [`Self::child_at`]. + #[inline] + pub fn get_child(&self, idx: usize) -> &'a ArrayRef { + self.child_at(idx) + } + + /// Returns the number of child slots. + #[inline] + pub fn child_count(&self) -> usize { + self.view.slots().len() + } + + /// Iterates over the array's children. + pub fn iter_children(&self) -> impl Iterator + '_ { + (0..self.child_count()).map(|idx| self.child_at(idx)) + } + + /// Collects the children into a `Vec` of cloned `ArrayRef`s. + pub fn children(&self) -> Vec { + self.iter_children().cloned().collect() + } +} + +impl Copy for ScalarFnArrayView<'_, F> {} + +impl Clone for ScalarFnArrayView<'_, F> { + fn clone(&self) -> Self { + *self + } +} + +impl<'a, F: scalar_fn::ScalarFnVTable> Deref for ScalarFnArrayView<'a, F> { + type Target = ArrayView<'a, ScalarFn>; - fn deref(&self) -> &Self::Target { - self.array + #[inline] + fn deref(&self) -> &ArrayView<'a, ScalarFn> { + &self.view } } diff --git a/vortex-array/src/scalar_fn/fns/between/kernel.rs b/vortex-array/src/scalar_fn/fns/between/kernel.rs index ee4fb688982..fedd5a7ae38 100644 --- a/vortex-array/src/scalar_fn/fns/between/kernel.rs +++ b/vortex-array/src/scalar_fn/fns/between/kernel.rs @@ -1,7 +1,6 @@ // SPDX-License-Identifier: Apache-2.0 // SPDX-FileCopyrightText: Copyright the Vortex contributors -use vortex_error::VortexExpect; use vortex_error::VortexResult; use super::Between; @@ -11,9 +10,7 @@ use crate::ArrayRef; use crate::ExecutionCtx; use crate::array::ArrayView; use crate::array::VTable; -use crate::arrays::ScalarFn; use crate::arrays::scalar_fn::ExactScalarFn; -use crate::arrays::scalar_fn::ScalarFnArrayExt; use crate::arrays::scalar_fn::ScalarFnArrayView; use crate::kernel::ExecuteParentKernel; use crate::optimizer::rules::ArrayParentReduceRule; @@ -63,12 +60,8 @@ where if child_idx != 0 { return Ok(None); } - let scalar_fn_array = parent - .as_opt::() - .vortex_expect("ExactScalarFn matcher confirmed ScalarFnArray"); - let children = scalar_fn_array.children(); - let lower = &children[1]; - let upper = &children[2]; + let lower = parent.get_child(1); + let upper = parent.get_child(2); let arr = array.array().clone(); if let Some(result) = precondition(&arr, lower, upper)? { return Ok(Some(result)); @@ -98,12 +91,8 @@ where if child_idx != 0 { return Ok(None); } - let scalar_fn_array = parent - .as_opt::() - .vortex_expect("ExactScalarFn matcher confirmed ScalarFnArray"); - let children = scalar_fn_array.children(); - let lower = &children[1]; - let upper = &children[2]; + let lower = parent.get_child(1); + let upper = parent.get_child(2); let arr = array.array().clone(); if let Some(result) = precondition(&arr, lower, upper)? { return Ok(Some(result)); diff --git a/vortex-array/src/scalar_fn/fns/binary/compare.rs b/vortex-array/src/scalar_fn/fns/binary/compare.rs index a09a28164bd..950c3687343 100644 --- a/vortex-array/src/scalar_fn/fns/binary/compare.rs +++ b/vortex-array/src/scalar_fn/fns/binary/compare.rs @@ -20,9 +20,7 @@ use crate::array::ArrayView; use crate::array::VTable; use crate::arrays::Constant; use crate::arrays::ConstantArray; -use crate::arrays::ScalarFn; use crate::arrays::scalar_fn::ExactScalarFn; -use crate::arrays::scalar_fn::ScalarFnArrayExt; use crate::arrays::scalar_fn::ScalarFnArrayView; use crate::arrow::ArrowSessionExt; use crate::arrow::Datum; @@ -74,15 +72,11 @@ where return Ok(None); }; - // Get the ScalarFnArray to access children - let Some(scalar_fn_array) = parent.as_opt::() else { - return Ok(None); - }; // Normalize so `array` is always LHS, swapping the operator if needed // TODO(joe): should be go this here or in the Rule/Kernel let (cmp_op, other) = match child_idx { - 0 => (cmp_op, scalar_fn_array.get_child(1)), - 1 => (cmp_op.swap(), scalar_fn_array.get_child(0)), + 0 => (cmp_op, parent.get_child(1)), + 1 => (cmp_op.swap(), parent.get_child(0)), _ => return Ok(None), }; diff --git a/vortex-array/src/scalar_fn/fns/fill_null/kernel.rs b/vortex-array/src/scalar_fn/fns/fill_null/kernel.rs index eea3dd6ef7b..b80c43814be 100644 --- a/vortex-array/src/scalar_fn/fns/fill_null/kernel.rs +++ b/vortex-array/src/scalar_fn/fns/fill_null/kernel.rs @@ -12,9 +12,7 @@ use crate::array::ArrayView; use crate::array::VTable; use crate::arrays::Constant; use crate::arrays::ConstantArray; -use crate::arrays::ScalarFn; use crate::arrays::scalar_fn::ExactScalarFn; -use crate::arrays::scalar_fn::ScalarFnArrayExt; use crate::arrays::scalar_fn::ScalarFnArrayView; use crate::builtins::ArrayBuiltins; use crate::kernel::ExecuteParentKernel; @@ -122,10 +120,7 @@ where if child_idx != 0 { return Ok(None); } - let scalar_fn_array = parent - .as_opt::() - .vortex_expect("ExactScalarFn matcher confirmed ScalarFnArray"); - let fill_value = scalar_fn_array + let fill_value = parent .get_child(1) .as_constant() .vortex_expect("fill_null fill_value must be constant"); @@ -158,10 +153,7 @@ where if child_idx != 0 { return Ok(None); } - let scalar_fn_array = parent - .as_opt::() - .vortex_expect("ExactScalarFn matcher confirmed ScalarFnArray"); - let fill_value = scalar_fn_array + let fill_value = parent .get_child(1) .as_constant() .vortex_expect("fill_null fill_value must be constant"); diff --git a/vortex-array/src/scalar_fn/fns/like/kernel.rs b/vortex-array/src/scalar_fn/fns/like/kernel.rs index b3b683212ff..72cbbae3377 100644 --- a/vortex-array/src/scalar_fn/fns/like/kernel.rs +++ b/vortex-array/src/scalar_fn/fns/like/kernel.rs @@ -1,16 +1,13 @@ // SPDX-License-Identifier: Apache-2.0 // SPDX-FileCopyrightText: Copyright the Vortex contributors -use vortex_error::VortexExpect; use vortex_error::VortexResult; use crate::ArrayRef; use crate::ExecutionCtx; use crate::array::ArrayView; use crate::array::VTable; -use crate::arrays::ScalarFn; use crate::arrays::scalar_fn::ExactScalarFn; -use crate::arrays::scalar_fn::ScalarFnArrayExt; use crate::arrays::scalar_fn::ScalarFnArrayView; use crate::kernel::ExecuteParentKernel; use crate::optimizer::rules::ArrayParentReduceRule; @@ -68,10 +65,7 @@ where if child_idx != 0 { return Ok(None); } - let scalar_fn_array = parent - .as_opt::() - .vortex_expect("ExactScalarFn matcher confirmed ScalarFnArray"); - let pattern = scalar_fn_array.get_child(1); + let pattern = parent.get_child(1); let options = *parent.options; ::like(array, pattern, options) } @@ -97,10 +91,7 @@ where if child_idx != 0 { return Ok(None); } - let scalar_fn_array = parent - .as_opt::() - .vortex_expect("ExactScalarFn matcher confirmed ScalarFnArray"); - let pattern = scalar_fn_array.get_child(1); + let pattern = parent.get_child(1); let options = *parent.options; ::like(array, pattern, options, ctx) } diff --git a/vortex-array/src/scalar_fn/fns/list_contains/kernel.rs b/vortex-array/src/scalar_fn/fns/list_contains/kernel.rs index 563600bfeee..38f82e84b8c 100644 --- a/vortex-array/src/scalar_fn/fns/list_contains/kernel.rs +++ b/vortex-array/src/scalar_fn/fns/list_contains/kernel.rs @@ -1,16 +1,13 @@ // SPDX-License-Identifier: Apache-2.0 // SPDX-FileCopyrightText: Copyright the Vortex contributors -use vortex_error::VortexExpect; use vortex_error::VortexResult; use crate::ArrayRef; use crate::ExecutionCtx; use crate::array::ArrayView; use crate::array::VTable; -use crate::arrays::ScalarFn; use crate::arrays::scalar_fn::ExactScalarFn; -use crate::arrays::scalar_fn::ScalarFnArrayExt; use crate::arrays::scalar_fn::ScalarFnArrayView; use crate::kernel::ExecuteParentKernel; use crate::optimizer::rules::ArrayParentReduceRule; @@ -66,10 +63,7 @@ where if child_idx != 1 { return Ok(None); } - let scalar_fn_array = parent - .as_opt::() - .vortex_expect("ExactScalarFn matcher confirmed ScalarFnArray"); - let list = scalar_fn_array.get_child(0); + let list = parent.get_child(0); ::list_contains(list, array) } } @@ -95,10 +89,7 @@ where if child_idx != 1 { return Ok(None); } - let scalar_fn_array = parent - .as_opt::() - .vortex_expect("ExactScalarFn matcher confirmed ScalarFnArray"); - let list = scalar_fn_array.get_child(0); + let list = parent.get_child(0); ::list_contains(list, array, ctx) } } diff --git a/vortex-array/src/scalar_fn/fns/mask/kernel.rs b/vortex-array/src/scalar_fn/fns/mask/kernel.rs index 5346e4e236c..02d3349fc88 100644 --- a/vortex-array/src/scalar_fn/fns/mask/kernel.rs +++ b/vortex-array/src/scalar_fn/fns/mask/kernel.rs @@ -2,7 +2,6 @@ // SPDX-FileCopyrightText: Copyright the Vortex contributors use vortex_error::VortexResult; -use vortex_error::vortex_err; use crate::ArrayRef; use crate::ExecutionCtx; @@ -72,14 +71,11 @@ where } // The mask child (child 1) is a non-nullable BoolArray where true=keep. // If it's not yet a BoolArray, we can't reduce without execution. - let parent_ref: ArrayRef = (*parent).clone(); - let mask_child = parent_ref - .nth_child(1) - .ok_or_else(|| vortex_err!("Mask expression must have 2 children"))?; + let mask_child = parent.get_child(1); if mask_child.as_opt::().is_none() { return Ok(None); }; - ::mask(array, &mask_child) + ::mask(array, mask_child) } } @@ -104,9 +100,7 @@ where if child_idx != 0 { return Ok(None); } - let mask_child = parent - .nth_child(1) - .ok_or_else(|| vortex_err!("Mask expression must have 2 children"))?; - ::mask(array, &mask_child, ctx) + let mask_child = parent.get_child(1); + ::mask(array, mask_child, ctx) } } diff --git a/vortex-array/src/scalar_fn/fns/zip/kernel.rs b/vortex-array/src/scalar_fn/fns/zip/kernel.rs index 575c2f4c55a..0ca31190184 100644 --- a/vortex-array/src/scalar_fn/fns/zip/kernel.rs +++ b/vortex-array/src/scalar_fn/fns/zip/kernel.rs @@ -1,16 +1,13 @@ // SPDX-License-Identifier: Apache-2.0 // SPDX-FileCopyrightText: Copyright the Vortex contributors -use vortex_error::VortexExpect; use vortex_error::VortexResult; use crate::ArrayRef; use crate::ExecutionCtx; use crate::array::ArrayView; use crate::array::VTable; -use crate::arrays::ScalarFn; use crate::arrays::scalar_fn::ExactScalarFn; -use crate::arrays::scalar_fn::ScalarFnArrayExt; use crate::arrays::scalar_fn::ScalarFnArrayView; use crate::kernel::ExecuteParentKernel; use crate::optimizer::rules::ArrayParentReduceRule; @@ -67,11 +64,8 @@ where if child_idx != 0 { return Ok(None); } - let scalar_fn_array = parent - .as_opt::() - .vortex_expect("ExactScalarFn matcher confirmed ScalarFnArray"); - let if_false = scalar_fn_array.get_child(1); - let mask_array = scalar_fn_array.get_child(2); + let if_false = parent.get_child(1); + let mask_array = parent.get_child(2); ::zip(array, if_false, mask_array) } } @@ -96,11 +90,8 @@ where if child_idx != 0 { return Ok(None); } - let scalar_fn_array = parent - .as_opt::() - .vortex_expect("ExactScalarFn matcher confirmed ScalarFnArray"); - let if_false = scalar_fn_array.get_child(1); - let mask_array = scalar_fn_array.get_child(2); + let if_false = parent.get_child(1); + let mask_array = parent.get_child(2); ::zip(array, if_false, mask_array, ctx) } } diff --git a/vortex-tensor/src/scalar_fns/inner_product.rs b/vortex-tensor/src/scalar_fns/inner_product.rs index 197d5cc2fb3..b5df68a0b9b 100644 --- a/vortex-tensor/src/scalar_fns/inner_product.rs +++ b/vortex-tensor/src/scalar_fns/inner_product.rs @@ -361,9 +361,7 @@ impl InnerProduct { let new_constant = Vector::constant_array(&rotated_query, len)?; // Extract the SorfTransform child (the already-padded Vector). - let sorf_child = sorf_view - .nth_child(0) - .vortex_expect("SorfTransform must have exactly one child"); + let sorf_child = sorf_view.get_child(0).clone(); // Recursively execute the rewritten inner product. This allows case 2 to fire on // the rewritten tree if the sorf child is `Vector[FSL(Dict)]`. Termination is diff --git a/vortex-tensor/src/scalar_fns/l2_denorm.rs b/vortex-tensor/src/scalar_fns/l2_denorm.rs index 00ad7d75e8a..670f8f5ca76 100644 --- a/vortex-tensor/src/scalar_fns/l2_denorm.rs +++ b/vortex-tensor/src/scalar_fns/l2_denorm.rs @@ -16,12 +16,10 @@ use vortex_array::arrays::Extension; use vortex_array::arrays::ExtensionArray; use vortex_array::arrays::FixedSizeListArray; use vortex_array::arrays::PrimitiveArray; -use vortex_array::arrays::ScalarFn as ScalarFnArrayEncoding; use vortex_array::arrays::ScalarFnArray; use vortex_array::arrays::extension::ExtensionArrayExt; use vortex_array::arrays::fixed_size_list::FixedSizeListArrayExt; use vortex_array::arrays::scalar_fn::ExactScalarFn; -use vortex_array::arrays::scalar_fn::ScalarFnArrayExt; use vortex_array::arrays::scalar_fn::ScalarFnArrayView; use vortex_array::arrays::scalar_fn::plugin::ScalarFnArrayParts; use vortex_array::arrays::scalar_fn::plugin::ScalarFnArrayVTable; @@ -288,9 +286,8 @@ impl ScalarFnArrayVTable for L2Denorm { view: &ScalarFnArrayView, _session: &VortexSession, ) -> VortexResult>> { - let scalar_fn_array = view.as_::(); - let normalized_dtype = Some(scalar_fn_array.child_at(0).dtype().try_into()?); - let norms_dtype = Some(scalar_fn_array.child_at(1).dtype().try_into()?); + let normalized_dtype = Some(view.child_at(0).dtype().try_into()?); + let norms_dtype = Some(view.child_at(1).dtype().try_into()?); Ok(Some( L2DenormMetadata { normalized_dtype, diff --git a/vortex-tensor/src/scalar_fns/l2_norm.rs b/vortex-tensor/src/scalar_fns/l2_norm.rs index d760c3429bd..306b2b8f597 100644 --- a/vortex-tensor/src/scalar_fns/l2_norm.rs +++ b/vortex-tensor/src/scalar_fns/l2_norm.rs @@ -12,11 +12,9 @@ use vortex_array::arrays::Constant; use vortex_array::arrays::ConstantArray; use vortex_array::arrays::ExtensionArray; use vortex_array::arrays::PrimitiveArray; -use vortex_array::arrays::ScalarFn as ScalarFnArrayEncoding; use vortex_array::arrays::ScalarFnArray; use vortex_array::arrays::extension::ExtensionArrayExt; use vortex_array::arrays::scalar_fn::ExactScalarFn; -use vortex_array::arrays::scalar_fn::ScalarFnArrayExt; use vortex_array::arrays::scalar_fn::ScalarFnArrayView; use vortex_array::arrays::scalar_fn::plugin::ScalarFnArrayParts; use vortex_array::arrays::scalar_fn::plugin::ScalarFnArrayVTable; @@ -209,8 +207,7 @@ impl ScalarFnArrayVTable for L2Norm { view: &ScalarFnArrayView, _session: &VortexSession, ) -> VortexResult>> { - let scalar_fn_array = view.as_::(); - let input_dtype = Some(scalar_fn_array.child_at(0).dtype().try_into()?); + let input_dtype = Some(view.child_at(0).dtype().try_into()?); Ok(Some(L2NormMetadata { input_dtype }.encode_to_vec())) } diff --git a/vortex-tensor/src/scalar_fns/sorf_transform/vtable.rs b/vortex-tensor/src/scalar_fns/sorf_transform/vtable.rs index 76648decae2..f045a406d8c 100644 --- a/vortex-tensor/src/scalar_fns/sorf_transform/vtable.rs +++ b/vortex-tensor/src/scalar_fns/sorf_transform/vtable.rs @@ -14,10 +14,8 @@ use vortex_array::IntoArray; use vortex_array::arrays::ExtensionArray; use vortex_array::arrays::FixedSizeListArray; use vortex_array::arrays::PrimitiveArray; -use vortex_array::arrays::ScalarFn as ScalarFnArrayEncoding; use vortex_array::arrays::extension::ExtensionArrayExt; use vortex_array::arrays::fixed_size_list::FixedSizeListArrayExt; -use vortex_array::arrays::scalar_fn::ScalarFnArrayExt; use vortex_array::arrays::scalar_fn::ScalarFnArrayView; use vortex_array::arrays::scalar_fn::plugin::ScalarFnArrayParts; use vortex_array::arrays::scalar_fn::plugin::ScalarFnArrayVTable; @@ -210,8 +208,7 @@ impl ScalarFnArrayVTable for SorfTransform { view: &ScalarFnArrayView, _session: &VortexSession, ) -> VortexResult>> { - let scalar_fn_array = view.as_::(); - let child_dtype = Some(scalar_fn_array.child_at(0).dtype().try_into()?); + let child_dtype = Some(view.child_at(0).dtype().try_into()?); let metadata = SorfTransformMetadata { child_dtype, ..SorfTransformMetadata::from(view.options) diff --git a/vortex-tensor/src/utils.rs b/vortex-tensor/src/utils.rs index e6d2cce453b..0f46131ee81 100644 --- a/vortex-tensor/src/utils.rs +++ b/vortex-tensor/src/utils.rs @@ -10,11 +10,9 @@ use vortex_array::arrays::Constant; use vortex_array::arrays::ConstantArray; use vortex_array::arrays::FixedSizeListArray; use vortex_array::arrays::PrimitiveArray; -use vortex_array::arrays::ScalarFn; use vortex_array::arrays::fixed_size_list::FixedSizeListArrayExt; use vortex_array::arrays::primitive::PrimitiveArrayExt; use vortex_array::arrays::scalar_fn::ExactScalarFn; -use vortex_array::arrays::scalar_fn::ScalarFnArrayExt; use vortex_array::arrays::scalar_fn::ScalarFnArrayView; use vortex_array::dtype::DType; use vortex_array::dtype::NativePType; @@ -67,11 +65,7 @@ pub fn extract_l2_denorm_children(array: &ArrayRef) -> (ArrayRef, ArrayRef) { let sfn = array .as_opt::>() .vortex_expect("expected ScalarFnArray wrapping L2Denorm"); - ( - sfn.nth_child(0) - .vortex_expect("L2Denorm missing normalized array"), - sfn.nth_child(1).vortex_expect("L2Denorm missing norms"), - ) + (sfn.get_child(0).clone(), sfn.get_child(1).clone()) } /// Validates that `input_dtype` is a float-valued tensor-like extension dtype. @@ -273,9 +267,8 @@ impl BinaryTensorOpMetadata { pub(crate) fn encode_from_view( view: &ScalarFnArrayView, ) -> VortexResult> { - let scalar_fn_array = view.as_::(); - let lhs_dtype = Some(scalar_fn_array.child_at(0).dtype().try_into()?); - let rhs_dtype = Some(scalar_fn_array.child_at(1).dtype().try_into()?); + let lhs_dtype = Some(view.child_at(0).dtype().try_into()?); + let rhs_dtype = Some(view.child_at(1).dtype().try_into()?); Ok(Self { lhs_dtype, rhs_dtype, From 2f5f777fe8de68c2624377a36ca99cb5ba5e97d0 Mon Sep 17 00:00:00 2001 From: Robert Kruszewski Date: Tue, 19 May 2026 11:15:14 +0100 Subject: [PATCH 05/17] rename Signed-off-by: Robert Kruszewski --- vortex-array/src/array/erased.rs | 48 +++++++------- vortex-array/src/array/parent.rs | 32 +++++++++- .../src/arrays/primitive/compute/slice.rs | 12 ++-- .../src/arrays/scalar_fn/vtable/mod.rs | 18 +++--- .../src/arrays/struct_/compute/cast.rs | 3 +- .../src/arrays/struct_/compute/rules.rs | 3 +- vortex-array/src/canonical.rs | 32 +++++----- vortex-array/src/columnar.rs | 30 ++++----- vortex-array/src/executor.rs | 6 +- vortex-array/src/kernel.rs | 4 +- vortex-array/src/matcher.rs | 64 +++++++++++-------- vortex-array/src/optimizer/rules.rs | 6 +- 12 files changed, 151 insertions(+), 107 deletions(-) diff --git a/vortex-array/src/array/erased.rs b/vortex-array/src/array/erased.rs index c0eafd4eb0c..cb365d15fe4 100644 --- a/vortex-array/src/array/erased.rs +++ b/vortex-array/src/array/erased.rs @@ -386,7 +386,7 @@ impl ArrayRef { /// Does the array match the given matcher. pub fn is(&self) -> bool { - M::matches(self) + M::matches_ref(self) } /// Returns the array downcast by the given matcher. @@ -396,11 +396,11 @@ impl ArrayRef { /// Returns the array downcast by the given matcher. /// - /// Routes through the heap-array entry points (`Matcher::matches` / - /// `Matcher::try_match`) so matchers with a cheap, direct downcast — like the - /// blanket `VTable` matcher — don't pay for a [`ParentRef`] construction here. + /// Routes through the heap-array entry points (`Matcher::matches_ref` / + /// `Matcher::try_match_ref`) so matchers with a cheap, direct downcast — like + /// the blanket `VTable` matcher — don't pay for a [`ParentRef`] construction here. pub fn as_opt(&self) -> Option> { - M::try_match(self) + M::try_match_ref(self) } /// Returns the array downcast to the given `Array` as an owned typed handle. @@ -728,25 +728,10 @@ impl IntoArray for ArrayRef { impl Matcher for V { type Match<'a> = ArrayView<'a, V>; - /// Fast encoding-id check that skips [`ParentRef`] construction. The hot - /// `ArrayRef::is::()` path goes through here, so any extra work shows up in - /// downstream micro-benchmarks (`patches_lookup`, `chunk_array_builder`, ...). - #[inline] - fn matches(array: &ArrayRef) -> bool { - array.dyn_array().as_any().is::>() - } - - /// Direct downcast — same fast path as [`Matcher::matches`] but also produces - /// the [`ArrayView`] when it matches. - #[inline] - fn try_match(array: &ArrayRef) -> Option> { - array.as_typed::() - } - /// Match by encoding id (no materialization). Equivalent to - /// [`Matcher::try_match_parent`].is_some() but avoids constructing an + /// [`Matcher::try_match`].is_some() but avoids constructing an /// [`ArrayView`] for parents that do not need one. - fn matches_parent(parent: &ParentRef<'_>) -> bool { + fn matches(parent: &ParentRef<'_>) -> bool { parent.is_encoding::() } @@ -755,7 +740,22 @@ impl Matcher for V { /// The returned [`ArrayView`] is stack-backed when the parent is stack-backed, /// so no `Arc>` is allocated until a downstream consumer reaches /// for [`ArrayView::array`]. - fn try_match_parent<'a>(parent: &'a ParentRef<'_>) -> Option> { - parent.as_opt::() + fn try_match<'a>(parent: &'a ParentRef<'_>) -> Option> { + parent.as_view::() + } + + /// Fast encoding-id check that skips [`ParentRef`] construction. The hot + /// `ArrayRef::is::()` path goes through here, so any extra work shows up in + /// downstream micro-benchmarks (`patches_lookup`, `chunk_array_builder`, ...). + #[inline] + fn matches_ref(array: &ArrayRef) -> bool { + array.dyn_array().as_any().is::>() + } + + /// Direct downcast — same fast path as [`Matcher::matches_ref`] but also produces + /// the [`ArrayView`] when it matches. + #[inline] + fn try_match_ref(array: &ArrayRef) -> Option> { + array.as_typed::() } } diff --git a/vortex-array/src/array/parent.rs b/vortex-array/src/array/parent.rs index cb5901f9a91..f210df2b8bf 100644 --- a/vortex-array/src/array/parent.rs +++ b/vortex-array/src/array/parent.rs @@ -30,6 +30,7 @@ use crate::array::ArraySlots; use crate::array::ArrayView; use crate::array::VTable; use crate::dtype::DType; +use crate::matcher::Matcher; use crate::optimizer::ArrayOptimizer; use crate::optimizer::kernels::ArrayKernels; @@ -258,12 +259,41 @@ impl<'a> ParentRef<'a> { /// stack-backed when the parent is stack-backed — no materialization happens /// up front. Materialization is deferred to [`ArrayView::array`], which goes /// through [`ArrayBacking::array_ref`] on the parent's internal cache. - pub fn as_opt(&self) -> Option> { + /// + /// This is the low-level entry point used by the blanket `VTable` matcher + /// implementation. Prefer [`Self::as_opt`] for matcher-based downcasts. + pub fn as_view(&self) -> Option> { let data = self.typed_data::()?; // SAFETY: `typed_data::()` returned Some, so the parent's encoding is // `V` and `data` is the `V::TypedArrayData` reachable through `self`. Some(unsafe { ArrayView::new_from_parent(self, data) }) } + + /// Does the parent match the given matcher. + /// + /// Mirrors [`ArrayRef::is`](crate::ArrayRef::is) for the parent-side dispatch + /// chain. Routes through [`Matcher::matches`] so matchers that can answer with + /// a cheap encoding-id check don't force a downcast. + pub fn is(&self) -> bool { + M::matches(self) + } + + /// Returns the parent downcast by the given matcher, or `None` if it doesn't match. + /// + /// Mirrors [`ArrayRef::as_opt`](crate::ArrayRef::as_opt) for the parent-side + /// dispatch chain. The returned `Match` borrows from `self`, so stack-backed + /// parents stay on the stack until a consumer reaches for + /// [`ArrayView::array`]. + pub fn as_opt(&self) -> Option> { + M::try_match(self) + } + + /// Returns the parent downcast by the given matcher, panicking if it doesn't match. + /// + /// Mirrors [`ArrayRef::as_`](crate::ArrayRef::as_). + pub fn as_(&self) -> M::Match<'_> { + self.as_opt::().vortex_expect("Failed to downcast") + } } impl Debug for ParentRef<'_> { diff --git a/vortex-array/src/arrays/primitive/compute/slice.rs b/vortex-array/src/arrays/primitive/compute/slice.rs index 1830ef3f8c6..32218f9a1f2 100644 --- a/vortex-array/src/arrays/primitive/compute/slice.rs +++ b/vortex-array/src/arrays/primitive/compute/slice.rs @@ -17,11 +17,13 @@ use crate::match_each_native_ptype; impl SliceReduce for Primitive { fn slice(array: ArrayView<'_, Self>, range: Range) -> VortexResult> { let result = match_each_native_ptype!(array.ptype(), |T| { - PrimitiveArray::from_buffer_handle( - array.buffer_handle().slice_typed::(range.clone()), - T::PTYPE, - array.validity()?.slice(range)?, - ) + unsafe { + PrimitiveArray::new_unchecked_from_handle( + array.buffer_handle().slice_typed::(range.clone()), + T::PTYPE, + array.validity()?.slice(range)?, + ) + } .into_array() }); Ok(Some(result)) diff --git a/vortex-array/src/arrays/scalar_fn/vtable/mod.rs b/vortex-array/src/arrays/scalar_fn/vtable/mod.rs index 99ef99bb98e..6038b141105 100644 --- a/vortex-array/src/arrays/scalar_fn/vtable/mod.rs +++ b/vortex-array/src/arrays/scalar_fn/vtable/mod.rs @@ -209,12 +209,12 @@ pub struct AnyScalarFn; impl Matcher for AnyScalarFn { type Match<'a> = ArrayView<'a, ScalarFn>; - fn try_match(array: &ArrayRef) -> Option> { - array.as_typed::() + fn try_match<'a>(parent: &'a ParentRef<'_>) -> Option> { + parent.as_opt::() } - fn try_match_parent<'a>(parent: &'a ParentRef<'_>) -> Option> { - parent.as_opt::() + fn try_match_ref(array: &ArrayRef) -> Option> { + array.as_typed::() } } @@ -226,7 +226,7 @@ impl ExactScalarFn { /// Promote an [`ArrayView<'_, ScalarFn>`] to a [`ScalarFnArrayView<'_, F>`] if the /// inner scalar function is `F`. /// - /// Shared by [`Matcher::try_match`] and [`Matcher::try_match_parent`]: both paths + /// Shared by [`Matcher::try_match`] and [`Matcher::try_match_ref`]: both paths /// produce an `ArrayView<'_, ScalarFn>` first and then go through this helper. #[inline] fn from_view(view: ArrayView<'_, ScalarFn>) -> Option> { @@ -242,12 +242,12 @@ impl ExactScalarFn { impl Matcher for ExactScalarFn { type Match<'a> = ScalarFnArrayView<'a, F>; - fn try_match(array: &ArrayRef) -> Option> { - Self::from_view(array.as_typed::()?) + fn try_match<'a>(parent: &'a ParentRef<'_>) -> Option> { + Self::from_view(parent.as_opt::()?) } - fn try_match_parent<'a>(parent: &'a ParentRef<'_>) -> Option> { - Self::from_view(parent.as_opt::()?) + fn try_match_ref(array: &ArrayRef) -> Option> { + Self::from_view(array.as_typed::()?) } } diff --git a/vortex-array/src/arrays/struct_/compute/cast.rs b/vortex-array/src/arrays/struct_/compute/cast.rs index c044b11854c..f1e51e3ebb2 100644 --- a/vortex-array/src/arrays/struct_/compute/cast.rs +++ b/vortex-array/src/arrays/struct_/compute/cast.rs @@ -18,7 +18,6 @@ use crate::arrays::struct_::StructArrayExt; use crate::builtins::ArrayBuiltins; use crate::dtype::DType; use crate::dtype::StructFields; -use crate::matcher::Matcher; use crate::scalar::Scalar; use crate::scalar_fn::fns::cast::Cast; @@ -31,7 +30,7 @@ pub(crate) fn struct_cast_execute_parent( let Some(array) = child.as_opt::() else { return Ok(None); }; - let Some(parent) = ExactScalarFn::::try_match_parent(parent) else { + let Some(parent) = parent.as_opt::>() else { return Ok(None); }; diff --git a/vortex-array/src/arrays/struct_/compute/rules.rs b/vortex-array/src/arrays/struct_/compute/rules.rs index 1323b4f9e97..4df7d9a574a 100644 --- a/vortex-array/src/arrays/struct_/compute/rules.rs +++ b/vortex-array/src/arrays/struct_/compute/rules.rs @@ -18,7 +18,6 @@ use crate::arrays::slice::SliceReduceAdaptor; use crate::arrays::struct_::StructArrayExt; use crate::arrays::struct_::compute::cast::struct_cast_fields; use crate::builtins::ArrayBuiltins; -use crate::matcher::Matcher; use crate::optimizer::rules::ArrayParentReduceRule; use crate::optimizer::rules::ParentRuleSet; use crate::scalar::Scalar; @@ -42,7 +41,7 @@ pub(crate) fn struct_cast_reduce_parent( let Some(array) = child.as_opt::() else { return Ok(None); }; - let Some(parent) = ExactScalarFn::::try_match_parent(parent) else { + let Some(parent) = parent.as_opt::>() else { return Ok(None); }; diff --git a/vortex-array/src/canonical.rs b/vortex-array/src/canonical.rs index 35894b96090..8e14225f1a5 100644 --- a/vortex-array/src/canonical.rs +++ b/vortex-array/src/canonical.rs @@ -1068,7 +1068,7 @@ impl Matcher for AnyCanonical { /// each canonical encoding is checked via the cheap `ArrayRef::is::()` /// direct downcast. #[inline] - fn matches(array: &ArrayRef) -> bool { + fn matches_ref(array: &ArrayRef) -> bool { array.is::() || array.is::() || array.is::() @@ -1081,9 +1081,9 @@ impl Matcher for AnyCanonical { || array.is::() } - /// Direct heap-array downcasts; mirrors [`Self::try_match_parent`] but skips - /// the [`ParentRef`] construction that would otherwise wrap each call. - fn try_match(array: &ArrayRef) -> Option> { + /// Direct heap-array downcasts; mirrors [`Self::try_match`] but skips the + /// [`ParentRef`] construction that would otherwise wrap each call. + fn try_match_ref(array: &ArrayRef) -> Option> { if let Some(a) = array.as_opt::() { Some(CanonicalView::Null(a)) } else if let Some(a) = array.as_opt::() { @@ -1107,20 +1107,20 @@ impl Matcher for AnyCanonical { } } - fn matches_parent(parent: &ParentRef<'_>) -> bool { - Null::matches_parent(parent) - || Bool::matches_parent(parent) - || Primitive::matches_parent(parent) - || Decimal::matches_parent(parent) - || Struct::matches_parent(parent) - || ListView::matches_parent(parent) - || FixedSizeList::matches_parent(parent) - || VarBinView::matches_parent(parent) - || Variant::matches_parent(parent) - || Extension::matches_parent(parent) + fn matches(parent: &ParentRef<'_>) -> bool { + Null::matches(parent) + || Bool::matches(parent) + || Primitive::matches(parent) + || Decimal::matches(parent) + || Struct::matches(parent) + || ListView::matches(parent) + || FixedSizeList::matches(parent) + || VarBinView::matches(parent) + || Variant::matches(parent) + || Extension::matches(parent) } - fn try_match_parent<'a>(parent: &'a ParentRef<'_>) -> Option> { + fn try_match<'a>(parent: &'a ParentRef<'_>) -> Option> { if let Some(a) = parent.as_opt::() { Some(CanonicalView::Null(a)) } else if let Some(a) = parent.as_opt::() { diff --git a/vortex-array/src/columnar.rs b/vortex-array/src/columnar.rs index 02b9463216e..34ad48d9510 100644 --- a/vortex-array/src/columnar.rs +++ b/vortex-array/src/columnar.rs @@ -94,32 +94,32 @@ pub struct AnyColumnar; impl Matcher for AnyColumnar { type Match<'a> = ColumnarView<'a>; - /// Fast encoding-id check that skips [`ParentRef`] construction. Mirror of - /// [`AnyCanonical::matches`](crate::AnyCanonical::matches) for the same reason. - #[inline] - fn matches(array: &ArrayRef) -> bool { - array.is::() || AnyCanonical::matches(array) + fn matches(parent: &ParentRef<'_>) -> bool { + Constant::matches(parent) || AnyCanonical::matches(parent) } - /// Direct heap-array downcasts; skips the [`ParentRef`] construction that the - /// default [`Self::try_match`] would otherwise do. - fn try_match(array: &ArrayRef) -> Option> { - if let Some(constant) = array.as_opt::() { + fn try_match<'a>(parent: &'a ParentRef<'_>) -> Option> { + if let Some(constant) = parent.as_opt::() { Some(ColumnarView::Constant(constant)) } else { - AnyCanonical::try_match(array).map(ColumnarView::Canonical) + AnyCanonical::try_match(parent).map(ColumnarView::Canonical) } } - fn matches_parent(parent: &ParentRef<'_>) -> bool { - Constant::matches_parent(parent) || AnyCanonical::matches_parent(parent) + /// Fast encoding-id check that skips [`ParentRef`] construction. Mirror of + /// [`AnyCanonical::matches_ref`](crate::AnyCanonical::matches_ref) for the same reason. + #[inline] + fn matches_ref(array: &ArrayRef) -> bool { + array.is::() || AnyCanonical::matches_ref(array) } - fn try_match_parent<'a>(parent: &'a ParentRef<'_>) -> Option> { - if let Some(constant) = parent.as_opt::() { + /// Direct heap-array downcasts; skips the [`ParentRef`] construction that the + /// default [`Self::try_match`] would otherwise do. + fn try_match_ref(array: &ArrayRef) -> Option> { + if let Some(constant) = array.as_opt::() { Some(ColumnarView::Constant(constant)) } else { - AnyCanonical::try_match_parent(parent).map(ColumnarView::Canonical) + AnyCanonical::try_match_ref(array).map(ColumnarView::Canonical) } } } diff --git a/vortex-array/src/executor.rs b/vortex-array/src/executor.rs index 2ef87e85ce0..670fc31d5e0 100644 --- a/vortex-array/src/executor.rs +++ b/vortex-array/src/executor.rs @@ -168,9 +168,9 @@ impl ArrayRef { for _ in 0..max_iterations { let is_done = stack .last() - .map_or(M::matches as DonePredicate, |frame| frame.done); + .map_or(M::matches_ref as DonePredicate, |frame| frame.done); - if is_done(¤t_array) || AnyCanonical::matches(¤t_array) { + if is_done(¤t_array) || current_array.is::() { match stack.pop() { None => { debug_assert!( @@ -682,7 +682,7 @@ impl ExecutionResult { pub fn execute_slot(array: impl IntoArray, slot_idx: usize) -> Self { Self { array: array.into_array(), - step: ExecutionStep::ExecuteSlot(slot_idx, M::matches), + step: ExecutionStep::ExecuteSlot(slot_idx, M::matches_ref), } } diff --git a/vortex-array/src/kernel.rs b/vortex-array/src/kernel.rs index f5b75471437..e86ce0e92b8 100644 --- a/vortex-array/src/kernel.rs +++ b/vortex-array/src/kernel.rs @@ -134,7 +134,7 @@ impl> Debug for ParentKernelAdapter { impl> DynParentKernel for ParentKernelAdapter { fn matches(&self, parent: &ArrayRef) -> bool { - K::Parent::matches(parent) + parent.is::() } fn execute_parent( @@ -144,7 +144,7 @@ impl> DynParentKernel for ParentKernelAd child_idx: usize, ctx: &mut ExecutionCtx, ) -> VortexResult> { - let Some(parent_view) = K::Parent::try_match(parent) else { + let Some(parent_view) = parent.as_opt::() else { return Ok(None); }; self.kernel diff --git a/vortex-array/src/matcher.rs b/vortex-array/src/matcher.rs index b33f30a7f86..535fea62a21 100644 --- a/vortex-array/src/matcher.rs +++ b/vortex-array/src/matcher.rs @@ -5,35 +5,31 @@ use crate::ArrayRef; use crate::array::ParentRef; /// Trait for matching array types. +/// +/// Matchers expose two parallel entry points: +/// +/// - [`matches`](Self::matches) / [`try_match`](Self::try_match) take a [`ParentRef`]. +/// This is the more general path because a `ParentRef` can borrow either a +/// heap-allocated [`ArrayRef`] or stack-allocated construction parts, so it works +/// uniformly for the optimizer's parent-reduce dispatch. +/// - [`matches_ref`](Self::matches_ref) / [`try_match_ref`](Self::try_match_ref) take +/// an [`ArrayRef`] directly. They exist as a fast path for callers that already +/// hold a heap-allocated array (e.g. `ArrayRef::is::()`, `ArrayRef::as_opt::()`) +/// so they don't pay for [`ParentRef`] construction. +/// +/// Both pairs are required because the result types borrow from different things: +/// the heap entries borrow from `&ArrayRef`, the parent entries borrow from +/// `&ParentRef`, and the lifetimes can't be unified without either materializing +/// stack-backed parts or routing every call through a parent allocation. pub trait Matcher { type Match<'a>; - /// Check if the given array matches this matcher type. - /// - /// The default implementation delegates through `try_match`, but matchers that - /// can answer cheaply (encoding-id checks, no view construction) should override - /// this directly so hot callers like `ArrayRef::is::()` don't pay the - /// `try_match` cost. - fn matches(array: &ArrayRef) -> bool { - Self::try_match(array).is_some() - } - - /// Try to match the given array, returning the matched view type if successful. - /// - /// Both heap-array entry points (`matches`, `try_match`) and parent entry points - /// (`matches_parent`, `try_match_parent`) are required because they borrow from - /// different things: heap entries borrow from `&ArrayRef`, parent entries borrow - /// from `&ParentRef`. Most stack-friendly matchers should return the matched - /// view without forcing a materialization — defer the heap allocation to - /// [`ArrayView::array`](crate::array::ArrayView::array) on the cold path. - fn try_match(array: &ArrayRef) -> Option>; - /// Check if the given parent matches this matcher type. /// - /// The default implementation delegates through `try_match_parent`. Override - /// when a cheaper check (e.g. an encoding-id comparison) suffices. - fn matches_parent(parent: &ParentRef<'_>) -> bool { - Self::try_match_parent(parent).is_some() + /// The default implementation delegates through [`try_match`](Self::try_match). + /// Override when a cheaper check (e.g. an encoding-id comparison) suffices. + fn matches(parent: &ParentRef<'_>) -> bool { + Self::try_match(parent).is_some() } /// Try to match a [`ParentRef`]. @@ -42,5 +38,23 @@ pub trait Matcher { /// stack-backed [`ArrayView`](crate::array::ArrayView) without forcing the /// parent to materialize. Implementations typically delegate to /// [`ParentRef::as_opt`]. - fn try_match_parent<'a>(parent: &'a ParentRef<'_>) -> Option>; + fn try_match<'a>(parent: &'a ParentRef<'_>) -> Option>; + + /// Check if the given heap-allocated array matches this matcher type. + /// + /// The default implementation delegates through + /// [`try_match_ref`](Self::try_match_ref), but matchers that can answer cheaply + /// (encoding-id checks, no view construction) should override this directly so + /// hot callers like `ArrayRef::is::()` don't pay the `try_match_ref` cost. + fn matches_ref(array: &ArrayRef) -> bool { + Self::try_match_ref(array).is_some() + } + + /// Try to match a heap-allocated [`ArrayRef`], returning the matched view type + /// if successful. + /// + /// This is the heap-only fast path: callers that already hold an `ArrayRef` + /// skip `ParentRef` construction. Implementations typically delegate to + /// [`ArrayRef::as_typed`](crate::ArrayRef::as_typed). + fn try_match_ref(array: &ArrayRef) -> Option>; } diff --git a/vortex-array/src/optimizer/rules.rs b/vortex-array/src/optimizer/rules.rs index fe21f6c0746..134e24ca833 100644 --- a/vortex-array/src/optimizer/rules.rs +++ b/vortex-array/src/optimizer/rules.rs @@ -53,7 +53,7 @@ pub trait ArrayReduceRule: Debug + Send + Sync + 'static { /// # Stack-backed parents /// /// Construction-side callers borrow `ArrayParts` as a [`ParentRef`] via -/// [`ParentRef::optimize`](crate::array::ParentRef::optimize). [`Matcher::try_match_parent`] +/// [`ParentRef::optimize`](crate::array::ParentRef::optimize). [`Matcher::try_match`] /// returns a stack-backed [`ArrayView`] without materializing an /// `Arc>`, so rules that consume only the typed metadata (e.g. /// `view.dtype()`, `view.data()`, `view.slots()`) can fire without forcing a @@ -117,7 +117,7 @@ impl> DynArrayParentReduceRule for ParentReduceRuleAdapter { fn matches(&self, parent: &ParentRef<'_>) -> bool { - K::Parent::matches_parent(parent) + parent.is::() } fn reduce_parent( @@ -126,7 +126,7 @@ impl> DynArrayParentReduceRule parent: &ParentRef<'_>, child_idx: usize, ) -> VortexResult> { - let Some(parent_view) = ::try_match_parent(parent) else { + let Some(parent_view) = parent.as_opt::() else { return Ok(None); }; self.rule.reduce_parent(child, parent_view, child_idx) From 01a2f4c71fa115be1f166ed54bc2b64b5c4e1a4e Mon Sep 17 00:00:00 2001 From: Robert Kruszewski Date: Tue, 19 May 2026 11:39:02 +0100 Subject: [PATCH 06/17] simpler Signed-off-by: Robert Kruszewski --- vortex-array/src/array/erased.rs | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/vortex-array/src/array/erased.rs b/vortex-array/src/array/erased.rs index cb365d15fe4..e6498ea2b1d 100644 --- a/vortex-array/src/array/erased.rs +++ b/vortex-array/src/array/erased.rs @@ -749,7 +749,7 @@ impl Matcher for V { /// downstream micro-benchmarks (`patches_lookup`, `chunk_array_builder`, ...). #[inline] fn matches_ref(array: &ArrayRef) -> bool { - array.dyn_array().as_any().is::>() + array.0.data.as_any().is::>() } /// Direct downcast — same fast path as [`Matcher::matches_ref`] but also produces From 8ddbc54ba6057bbdbbe36ba796e2a72d24dd7973 Mon Sep 17 00:00:00 2001 From: Robert Kruszewski Date: Tue, 19 May 2026 11:45:56 +0100 Subject: [PATCH 07/17] nits Signed-off-by: Robert Kruszewski --- vortex-array/src/canonical.rs | 20 ++++++++++---------- vortex-array/src/columnar.rs | 8 ++++---- 2 files changed, 14 insertions(+), 14 deletions(-) diff --git a/vortex-array/src/canonical.rs b/vortex-array/src/canonical.rs index 8e14225f1a5..a1794957ac1 100644 --- a/vortex-array/src/canonical.rs +++ b/vortex-array/src/canonical.rs @@ -1108,16 +1108,16 @@ impl Matcher for AnyCanonical { } fn matches(parent: &ParentRef<'_>) -> bool { - Null::matches(parent) - || Bool::matches(parent) - || Primitive::matches(parent) - || Decimal::matches(parent) - || Struct::matches(parent) - || ListView::matches(parent) - || FixedSizeList::matches(parent) - || VarBinView::matches(parent) - || Variant::matches(parent) - || Extension::matches(parent) + parent.is::() + || parent.is::() + || parent.is::() + || parent.is::() + || parent.is::() + || parent.is::() + || parent.is::() + || parent.is::() + || parent.is::() + || parent.is::() } fn try_match<'a>(parent: &'a ParentRef<'_>) -> Option> { diff --git a/vortex-array/src/columnar.rs b/vortex-array/src/columnar.rs index 34ad48d9510..45b35ad014b 100644 --- a/vortex-array/src/columnar.rs +++ b/vortex-array/src/columnar.rs @@ -95,14 +95,14 @@ impl Matcher for AnyColumnar { type Match<'a> = ColumnarView<'a>; fn matches(parent: &ParentRef<'_>) -> bool { - Constant::matches(parent) || AnyCanonical::matches(parent) + parent.is::() || parent.is::() } fn try_match<'a>(parent: &'a ParentRef<'_>) -> Option> { if let Some(constant) = parent.as_opt::() { Some(ColumnarView::Constant(constant)) } else { - AnyCanonical::try_match(parent).map(ColumnarView::Canonical) + parent.as_opt::().map(ColumnarView::Canonical) } } @@ -110,7 +110,7 @@ impl Matcher for AnyColumnar { /// [`AnyCanonical::matches_ref`](crate::AnyCanonical::matches_ref) for the same reason. #[inline] fn matches_ref(array: &ArrayRef) -> bool { - array.is::() || AnyCanonical::matches_ref(array) + array.is::() || array.is::() } /// Direct heap-array downcasts; skips the [`ParentRef`] construction that the @@ -119,7 +119,7 @@ impl Matcher for AnyColumnar { if let Some(constant) = array.as_opt::() { Some(ColumnarView::Constant(constant)) } else { - AnyCanonical::try_match_ref(array).map(ColumnarView::Canonical) + array.as_opt::().map(ColumnarView::Canonical) } } } From f777c819b98aaf3c9caf923d2365667dbcb43eb2 Mon Sep 17 00:00:00 2001 From: Robert Kruszewski Date: Tue, 19 May 2026 12:24:53 +0100 Subject: [PATCH 08/17] less Signed-off-by: Robert Kruszewski --- vortex-array/src/array/erased.rs | 6 ++++-- vortex-array/src/array/typed.rs | 21 --------------------- 2 files changed, 4 insertions(+), 23 deletions(-) diff --git a/vortex-array/src/array/erased.rs b/vortex-array/src/array/erased.rs index e6498ea2b1d..0d48a955421 100644 --- a/vortex-array/src/array/erased.rs +++ b/vortex-array/src/array/erased.rs @@ -254,12 +254,14 @@ impl ArrayRef { /// Wraps the array in a [`FilterArray`] such that it is logically filtered by the given mask. pub fn filter(&self, mask: Mask) -> VortexResult { - FilterArray::try_new_parts(self.clone(), mask)?.optimize() + let parts = FilterArray::try_new_parts(self.clone(), mask)?; + ParentRef::from_parts(&parts).optimize() } /// Wraps the array in a [`DictArray`] such that it is logically taken by the given indices. pub fn take(&self, indices: ArrayRef) -> VortexResult { - DictArray::try_new_parts(indices, self.clone())?.optimize() + let parts = DictArray::try_new_parts(indices, self.clone())?; + ParentRef::from_parts(&parts).optimize() } /// Fetch the scalar at the given index. diff --git a/vortex-array/src/array/typed.rs b/vortex-array/src/array/typed.rs index 98605542779..16539e8e6ad 100644 --- a/vortex-array/src/array/typed.rs +++ b/vortex-array/src/array/typed.rs @@ -13,7 +13,6 @@ use std::ops::DerefMut; use std::sync::Arc; use vortex_error::VortexResult; -use vortex_session::VortexSession; use crate::ArrayRef; use crate::ArraySlots; @@ -23,7 +22,6 @@ use crate::LEGACY_SESSION; use crate::VortexSessionExecute; use crate::array::ArrayId; use crate::array::ArrayView; -use crate::array::ParentRef; use crate::array::VTable; use crate::dtype::DType; use crate::stats::ArrayStats; @@ -81,25 +79,6 @@ impl ArrayParts { pub fn into_array(self) -> ArrayRef { unsafe { Array::::from_parts_unchecked(self).into_array() } } - - /// Optimize the parts directly, without ever requiring an [`ArrayRef`]. - /// - /// Builds a [`ParentRef`] borrowing `self` and runs the parent-reduce dispatch - /// chain on it. If a child rule fires before materialization the wrapper - /// allocation is skipped entirely; otherwise the parts are materialized and run - /// through the full optimizer. - /// - /// This is the entry point that lets construction-side callers hand a - /// stack-allocated array over to the optimizer without first paying for - /// `Arc>`. - pub fn optimize(self) -> VortexResult { - ParentRef::from_parts(&self).optimize() - } - - /// Same as [`Self::optimize`] but also consults session-registered kernels. - pub fn optimize_ctx(self, session: &VortexSession) -> VortexResult { - ParentRef::from_parts(&self).optimize_ctx(session) - } } /// Shared bound for helpers that should work over both owned [`Array`] and borrowed From 9ea88180d154beb76cc9a735816bc4bf2a753b8e Mon Sep 17 00:00:00 2001 From: Robert Kruszewski Date: Tue, 19 May 2026 13:35:47 +0100 Subject: [PATCH 09/17] less Signed-off-by: Robert Kruszewski --- vortex-array/src/array/parent.rs | 55 +++---------------- vortex-array/src/arrays/dict/array.rs | 5 +- vortex-array/src/arrays/filter/array.rs | 24 ++++---- .../src/arrays/scalar_fn/vtable/mod.rs | 16 ++++++ vortex-array/src/arrays/slice/array.rs | 23 ++++---- 5 files changed, 50 insertions(+), 73 deletions(-) diff --git a/vortex-array/src/array/parent.rs b/vortex-array/src/array/parent.rs index f210df2b8bf..8ecd262f700 100644 --- a/vortex-array/src/array/parent.rs +++ b/vortex-array/src/array/parent.rs @@ -19,8 +19,6 @@ use std::sync::OnceLock; use vortex_error::VortexExpect; use vortex_error::VortexResult; -use vortex_session::SessionExt; -use vortex_session::VortexSession; use crate::ArrayRef; use crate::array::ArrayData; @@ -32,7 +30,6 @@ use crate::array::VTable; use crate::dtype::DType; use crate::matcher::Matcher; use crate::optimizer::ArrayOptimizer; -use crate::optimizer::kernels::ArrayKernels; /// A parent array, possibly stack-allocated, used by the `reduce_parent` dispatch chain. /// @@ -57,7 +54,7 @@ pub struct ParentRef<'a> { /// /// Carries `&dyn Any` rather than `&V`/`&V::TypedArrayData` so [`ParentRef`] is not /// itself generic over `V`. The `+ Send + Sync` bound mirrors the bounds on -/// [`VTable`](crate::array::VTable) and `V::TypedArrayData`, keeping [`ParentRef`] +/// [`VTable`](array::VTable) and `V::TypedArrayData`, keeping [`ParentRef`] /// and the [`ArrayView`] built on top of it `Send + Sync`. type AnyRef<'a> = &'a (dyn Any + Send + Sync); @@ -132,53 +129,15 @@ impl<'a> ParentRef<'a> { /// full optimizer is run on it so legacy rules whose matchers still require an /// [`ArrayView`] also get a chance to fire. pub fn optimize(self) -> VortexResult { - match self.try_reduce_parent(None)? { - Some(reduced) => Ok(reduced), - None => self.into_array_ref().optimize(), - } - } - - /// Same as [`Self::optimize`] but also consults [`ArrayKernels`] from `session`. - pub fn optimize_ctx(self, session: &VortexSession) -> VortexResult { - match self.try_reduce_parent(Some(session))? { - Some(reduced) => Ok(reduced), - None => self.into_array_ref().optimize_ctx(session), - } - } - - fn try_reduce_parent(&self, session: Option<&VortexSession>) -> VortexResult> { - let kernels = session.and_then(|s| s.get_opt::()); - for (slot_idx, slot) in self.slots.iter().enumerate() { let Some(child) = slot else { continue }; - // Session kernels take precedence over static `PARENT_RULES`, matching - // the existing optimizer's ordering. - if let Some(kernels) = &kernels - && let Some(plugins) = - kernels.find_reduce_parent(self.encoding_id, child.encoding_id()) - { - for plugin in plugins.as_ref() { - if let Some(reduced) = plugin(child, self, slot_idx)? { - return match session { - Some(s) => reduced.optimize_ctx(s), - None => reduced.optimize(), - } - .map(Some); - } - } - } - - if let Some(reduced) = child.reduce_parent(self, slot_idx)? { - return match session { - Some(s) => reduced.optimize_ctx(s), - None => reduced.optimize(), - } - .map(Some); + if let Some(reduced) = child.reduce_parent(&self, slot_idx)? { + return reduced.optimize(); } } - Ok(None) + Ok(self.into_array_ref()) } /// Returns the encoding id of the parent. @@ -271,7 +230,7 @@ impl<'a> ParentRef<'a> { /// Does the parent match the given matcher. /// - /// Mirrors [`ArrayRef::is`](crate::ArrayRef::is) for the parent-side dispatch + /// Mirrors [`ArrayRef::is`](ArrayRef::is) for the parent-side dispatch /// chain. Routes through [`Matcher::matches`] so matchers that can answer with /// a cheap encoding-id check don't force a downcast. pub fn is(&self) -> bool { @@ -280,7 +239,7 @@ impl<'a> ParentRef<'a> { /// Returns the parent downcast by the given matcher, or `None` if it doesn't match. /// - /// Mirrors [`ArrayRef::as_opt`](crate::ArrayRef::as_opt) for the parent-side + /// Mirrors [`ArrayRef::as_opt`](ArrayRef::as_opt) for the parent-side /// dispatch chain. The returned `Match` borrows from `self`, so stack-backed /// parents stay on the stack until a consumer reaches for /// [`ArrayView::array`]. @@ -290,7 +249,7 @@ impl<'a> ParentRef<'a> { /// Returns the parent downcast by the given matcher, panicking if it doesn't match. /// - /// Mirrors [`ArrayRef::as_`](crate::ArrayRef::as_). + /// Mirrors [`ArrayRef::as_`](ArrayRef::as_). pub fn as_(&self) -> M::Match<'_> { self.as_opt::().vortex_expect("Failed to downcast") } diff --git a/vortex-array/src/arrays/dict/array.rs b/vortex-array/src/arrays/dict/array.rs index 291aa549d91..148040b9251 100644 --- a/vortex-array/src/arrays/dict/array.rs +++ b/vortex-array/src/arrays/dict/array.rs @@ -239,7 +239,10 @@ impl Array { /// Build the [`ArrayParts`]. The parts can then be optimized through /// [`ParentRef::optimize`](crate::array::ParentRef::optimize) or materialized /// directly with [`ArrayParts::into_array`]. - pub fn try_new_parts(codes: ArrayRef, values: ArrayRef) -> VortexResult> { + pub(crate) fn try_new_parts( + codes: ArrayRef, + values: ArrayRef, + ) -> VortexResult> { let dtype = values .dtype() .union_nullability(codes.dtype().nullability()); diff --git a/vortex-array/src/arrays/filter/array.rs b/vortex-array/src/arrays/filter/array.rs index 53783c2bac1..f5a0f776089 100644 --- a/vortex-array/src/arrays/filter/array.rs +++ b/vortex-array/src/arrays/filter/array.rs @@ -52,11 +52,11 @@ pub trait FilterArrayExt: TypedArrayRef { impl> FilterArrayExt for T {} impl FilterData { - pub fn new(mask: Mask) -> Self { + fn new(mask: Mask) -> Self { Self { mask } } - pub fn try_new(array_len: usize, mask: Mask) -> VortexResult { + fn try_new(array_len: usize, mask: Mask) -> VortexResult { vortex_ensure_eq!( array_len, mask.len(), @@ -91,7 +91,14 @@ impl FilterData { impl Array { /// Creates a new `FilterArray`. pub fn new(array: ArrayRef, mask: Mask) -> Self { - unsafe { Array::from_parts_unchecked(Self::new_parts(array, mask)) } + let dtype = array.dtype().clone(); + let len = mask.true_count(); + let data = FilterData::new(mask); + unsafe { + Array::from_parts_unchecked( + ArrayParts::new(Filter, dtype, len, data).with_slots(smallvec![Some(array)]), + ) + } } /// Constructs a new `FilterArray`. @@ -102,19 +109,10 @@ impl Array { /// Builds the [`ArrayParts`]. The parts can then be optimized through /// [`ParentRef::optimize`](crate::array::ParentRef::optimize) or materialized /// directly with [`ArrayParts::into_array`]. - pub fn try_new_parts(array: ArrayRef, mask: Mask) -> VortexResult> { + pub(crate) fn try_new_parts(array: ArrayRef, mask: Mask) -> VortexResult> { let dtype = array.dtype().clone(); let len = mask.true_count(); let data = FilterData::try_new(array.len(), mask)?; Ok(ArrayParts::new(Filter, dtype, len, data).with_slots(smallvec![Some(array)])) } - - /// Builds the [`ArrayParts`] without checking that the mask length matches - /// the array length. See [`Self::try_new_parts`] for the checked variant. - pub fn new_parts(array: ArrayRef, mask: Mask) -> ArrayParts { - let dtype = array.dtype().clone(); - let len = mask.true_count(); - let data = FilterData::new(mask); - ArrayParts::new(Filter, dtype, len, data).with_slots(smallvec![Some(array)]) - } } diff --git a/vortex-array/src/arrays/scalar_fn/vtable/mod.rs b/vortex-array/src/arrays/scalar_fn/vtable/mod.rs index 6038b141105..c12e391199c 100644 --- a/vortex-array/src/arrays/scalar_fn/vtable/mod.rs +++ b/vortex-array/src/arrays/scalar_fn/vtable/mod.rs @@ -242,10 +242,26 @@ impl ExactScalarFn { impl Matcher for ExactScalarFn { type Match<'a> = ScalarFnArrayView<'a, F>; + /// Skip the `ArrayView` + `ScalarFnArrayView` construction that the default + /// `try_match(...).is_some()` would do. Two cheap downcasts suffice: encoding + /// id, then scalar function id. + fn matches(parent: &ParentRef<'_>) -> bool { + parent + .typed_data::() + .is_some_and(|data| data.scalar_fn().is::()) + } + fn try_match<'a>(parent: &'a ParentRef<'_>) -> Option> { Self::from_view(parent.as_opt::()?) } + /// Heap-side mirror of [`Self::matches`], same reasoning. + fn matches_ref(array: &ArrayRef) -> bool { + array + .as_typed::() + .is_some_and(|view| view.data().scalar_fn().is::()) + } + fn try_match_ref(array: &ArrayRef) -> Option> { Self::from_view(array.as_typed::()?) } diff --git a/vortex-array/src/arrays/slice/array.rs b/vortex-array/src/arrays/slice/array.rs index 3cbb0e3cec4..9a353937456 100644 --- a/vortex-array/src/arrays/slice/array.rs +++ b/vortex-array/src/arrays/slice/array.rs @@ -89,25 +89,26 @@ impl Array { /// Constructs a new `SliceArray`. pub fn new(child: ArrayRef, range: Range) -> Self { - unsafe { Array::from_parts_unchecked(Self::new_parts(child, range)) } + let len = range.len(); + let dtype = child.dtype().clone(); + let data = SliceData::new(range); + unsafe { + Array::from_parts_unchecked( + ArrayParts::new(Slice, dtype, len, data).with_slots(smallvec![Some(child)]), + ) + } } /// Builds the [`ArrayParts`] for a slice. The parts can then be /// optimized through [`ParentRef::optimize`](crate::array::ParentRef::optimize) /// or materialized directly with [`ArrayParts::into_array`]. - pub fn try_new_parts(child: ArrayRef, range: Range) -> VortexResult> { + pub(crate) fn try_new_parts( + child: ArrayRef, + range: Range, + ) -> VortexResult> { let len = range.len(); let dtype = child.dtype().clone(); let data = SliceData::try_new(child.len(), range)?; Ok(ArrayParts::new(Slice, dtype, len, data).with_slots(smallvec![Some(child)])) } - - /// Builds the [`ArrayParts`] without bounds-checking the range. See - /// [`Self::try_new_parts`] for the checked variant. - pub fn new_parts(child: ArrayRef, range: Range) -> ArrayParts { - let len = range.len(); - let dtype = child.dtype().clone(); - let data = SliceData::new(range); - ArrayParts::new(Slice, dtype, len, data).with_slots(smallvec![Some(child)]) - } } From 204e2d7295c0a246b68976350a4b9728fbe11f35 Mon Sep 17 00:00:00 2001 From: Robert Kruszewski Date: Fri, 29 May 2026 00:03:17 +0100 Subject: [PATCH 10/17] more methods Signed-off-by: Robert Kruszewski --- vortex-array/src/array/parent.rs | 122 +++++++++++++++++- .../src/arrays/chunked/compute/rules.rs | 18 +-- vortex-array/src/arrays/dict/compute/like.rs | 15 ++- vortex-array/src/arrays/dict/compute/rules.rs | 16 +-- vortex-array/src/arrays/scalar_fn/array.rs | 22 +++- vortex-array/src/builtins.rs | 85 +++++++----- vortex-array/src/validity.rs | 16 ++- 7 files changed, 219 insertions(+), 75 deletions(-) diff --git a/vortex-array/src/array/parent.rs b/vortex-array/src/array/parent.rs index 8ecd262f700..5ca1c03538e 100644 --- a/vortex-array/src/array/parent.rs +++ b/vortex-array/src/array/parent.rs @@ -19,6 +19,7 @@ use std::sync::OnceLock; use vortex_error::VortexExpect; use vortex_error::VortexResult; +use vortex_error::vortex_ensure; use crate::ArrayRef; use crate::array::ArrayData; @@ -67,6 +68,7 @@ enum ParentData<'a> { vtable: AnyRef<'a>, data: AnyRef<'a>, materialize: MaterializeFn, + reduce: ReduceFn, }, } @@ -83,6 +85,15 @@ type MaterializeFn = fn( slots: &[Option], ) -> ArrayRef; +/// Function pointer that runs encoding `V`'s self-reduce rules against a (possibly +/// stack-borrowed) parent. +/// +/// Stored alongside [`MaterializeFn`] in [`ParentData::Parts`] so [`ParentRef::optimize`] +/// can dispatch `V::reduce` without being generic over `V`. The implementation builds a +/// stack-backed [`ArrayView`] over the borrowed parts, so a rule that only inspects +/// metadata never forces a materialization. +type ReduceFn = fn(parent: &ParentRef<'_>) -> VortexResult>; + impl<'a> ParentRef<'a> { /// Build a [`ParentRef`] borrowing a heap-allocated [`ArrayRef`]. #[inline] @@ -116,6 +127,7 @@ impl<'a> ParentRef<'a> { vtable: &parts.vtable, data: &parts.data, materialize: materialize_parts::, + reduce: reduce_parts::, }, cache: OnceLock::new(), } @@ -123,12 +135,20 @@ impl<'a> ParentRef<'a> { /// Optimize this parent, materializing the parts if no stack reduction fires. /// - /// Tries `reduce_parent` on each child slot first, which can match against the - /// stack-borrowed parent without ever allocating an `Arc>`. If - /// nothing matches, the parent is materialized into a real [`ArrayRef`] and the - /// full optimizer is run on it so legacy rules whose matchers still require an - /// [`ArrayView`] also get a chance to fire. + /// Mirrors one iteration of [`ArrayRef::optimize`](crate::optimizer::ArrayOptimizer): + /// the parent's own `reduce` rules are tried first, then `reduce_parent` on each child + /// slot. Both run against the (possibly stack-borrowed) parent, so a reduction that + /// only inspects metadata never allocates an `Arc>`. When a rule fires + /// the result is re-driven through the full [`ArrayRef::optimize`] fixpoint. + /// + /// Running `reduce` first is what makes this equivalent to materializing the parts and + /// calling `ArrayRef::optimize`: the two paths differ only in whether the wrapper is + /// heap-allocated when no reduction applies. pub fn optimize(self) -> VortexResult { + if let Some(reduced) = self.reduce()? { + return reduced.optimize(); + } + for (slot_idx, slot) in self.slots.iter().enumerate() { let Some(child) = slot else { continue }; @@ -140,6 +160,36 @@ impl<'a> ParentRef<'a> { Ok(self.into_array_ref()) } + /// Run the parent encoding's self-reduce rules against the parent. + /// + /// Mirrors [`ArrayRef::reduce`](crate::ArrayRef::reduce) for the `ParentRef` dispatch + /// chain. Heap-backed parents delegate to the existing array; stack-backed parents + /// dispatch through the stored [`ReduceFn`] so the borrowed parts only materialize if a + /// rule reaches for an [`ArrayRef`]. The reduced array is validated to preserve the + /// parent's len and dtype, matching the heap path. + fn reduce(&self) -> VortexResult> { + let reduced = match self.data { + ParentData::Heap { array, .. } => return array.reduce(), + ParentData::Parts { reduce, .. } => reduce(self)?, + }; + let Some(reduced) = reduced else { + return Ok(None); + }; + vortex_ensure!( + reduced.len() == self.len, + "Reduced array length mismatch from {} to {}", + self.encoding_id, + reduced.encoding_id() + ); + vortex_ensure!( + reduced.dtype() == self.dtype, + "Reduced array dtype mismatch from {} to {}", + self.encoding_id, + reduced.encoding_id() + ); + Ok(Some(reduced)) + } + /// Returns the encoding id of the parent. #[inline] pub fn encoding_id(&self) -> ArrayId { @@ -186,6 +236,7 @@ impl<'a> ParentRef<'a> { vtable, data, materialize, + .. } => materialize(vtable, data, self.dtype, self.len, self.slots), } } @@ -306,6 +357,7 @@ impl ArrayBacking for ParentRef<'_> { vtable, data, materialize, + .. } => self .cache .get_or_init(|| materialize(vtable, data, self.dtype, self.len, self.slots)), @@ -339,6 +391,18 @@ fn materialize_parts( .into_array() } +/// Runs encoding `V`'s self-reduce rules against a (possibly stack-borrowed) parent. +/// +/// Used as the [`ReduceFn`] stored inside [`ParentData::Parts`]. Builds a stack-backed +/// [`ArrayView`] over the borrowed parts and dispatches to [`VTable::reduce`]; the view +/// only materializes if a rule reaches for an [`ArrayRef`]. +fn reduce_parts(parent: &ParentRef<'_>) -> VortexResult> { + let view = parent + .as_view::() + .vortex_expect("ParentRef reduce: encoding mismatch"); + V::reduce(view) +} + #[cfg(test)] mod tests { use vortex_error::VortexResult; @@ -346,8 +410,17 @@ mod tests { use super::ParentRef; use crate::IntoArray; use crate::arrays::BoolArray; + use crate::arrays::PrimitiveArray; + use crate::arrays::ScalarFnArray; use crate::arrays::Slice; use crate::arrays::SliceArray; + use crate::arrays::Struct; + use crate::assert_arrays_eq; + use crate::dtype::Nullability; + use crate::optimizer::ArrayOptimizer; + use crate::scalar_fn::ScalarFnVTableExt; + use crate::scalar_fn::fns::pack::Pack; + use crate::scalar_fn::fns::pack::PackOptions; #[test] fn parts_parent_ref_exposes_array_view() -> VortexResult<()> { @@ -385,4 +458,43 @@ mod tests { Ok(()) } + + /// Optimizing borrowed parts must produce the same array as materializing them and + /// calling [`ArrayRef::optimize`](crate::optimizer::ArrayOptimizer) — the two paths + /// differ only in whether the wrapper is heap-allocated. + /// + /// Regression test for [`ParentRef::optimize`] skipping the parent's own `reduce` + /// rules. A `Pack` scalar function collapses to a `StructArray` via the `ScalarFn` + /// encoding's self-`reduce`. No `reduce_parent` rule mirrors this, so the reduction is + /// only reachable through self-`reduce`: before `optimize` ran `reduce` first the stack + /// path returned the `ScalarFn` wrapper while materialize-then-optimize returned the + /// struct. + #[test] + fn optimize_matches_heap_path() -> VortexResult<()> { + let a = PrimitiveArray::from_iter([1i32, 2, 3]).into_array(); + let b = PrimitiveArray::from_iter([4i32, 5, 6]).into_array(); + let len = a.len(); + let pack = Pack.bind(PackOptions { + names: ["a", "b"].into(), + nullability: Nullability::NonNullable, + }); + + let heap = ScalarFnArray::try_new(pack.clone(), vec![a.clone(), b.clone()], len)? + .into_array() + .optimize()?; + let parts = ScalarFnArray::try_new_parts(pack, vec![a, b], len)?; + let stack = ParentRef::from_parts(&parts).optimize()?; + + assert!( + heap.is::(), + "heap path should collapse Pack to a struct" + ); + assert!( + stack.is::(), + "stack path should collapse Pack to a struct" + ); + assert_arrays_eq!(stack, heap); + + Ok(()) + } } diff --git a/vortex-array/src/arrays/chunked/compute/rules.rs b/vortex-array/src/arrays/chunked/compute/rules.rs index d8d324a8e86..973879c1590 100644 --- a/vortex-array/src/arrays/chunked/compute/rules.rs +++ b/vortex-array/src/arrays/chunked/compute/rules.rs @@ -6,6 +6,7 @@ use vortex_error::VortexResult; use crate::ArrayRef; use crate::IntoArray; +use crate::ParentRef; use crate::array::ArrayView; use crate::arrays::Chunked; use crate::arrays::ChunkedArray; @@ -16,7 +17,6 @@ use crate::arrays::ScalarFnArray; use crate::arrays::chunked::ChunkedArrayExt; use crate::arrays::scalar_fn::AnyScalarFn; use crate::arrays::scalar_fn::ScalarFnArrayExt; -use crate::optimizer::ArrayOptimizer; use crate::optimizer::rules::ArrayParentReduceRule; use crate::optimizer::rules::ParentRuleSet; use crate::scalar_fn::fns::cast::CastReduceAdaptor; @@ -48,13 +48,12 @@ impl ArrayParentReduceRule for ChunkedUnaryScalarFnPushDownRule { let new_chunks: Vec<_> = array .iter_chunks() .map(|chunk| { - ScalarFnArray::try_new( + let parts = ScalarFnArray::try_new_parts( parent.scalar_fn().clone(), vec![chunk.clone()], chunk.len(), - )? - .into_array() - .optimize() + )?; + ParentRef::from_parts(&parts).optimize() }) .try_collect()?; @@ -104,9 +103,12 @@ impl ArrayParentReduceRule for ChunkedConstantScalarFnPushDownRule { }) .collect(); - ScalarFnArray::try_new(parent.scalar_fn().clone(), new_children, chunk.len())? - .into_array() - .optimize() + let parts = ScalarFnArray::try_new_parts( + parent.scalar_fn().clone(), + new_children, + chunk.len(), + )?; + ParentRef::from_parts(&parts).optimize() }) .try_collect()?; diff --git a/vortex-array/src/arrays/dict/compute/like.rs b/vortex-array/src/arrays/dict/compute/like.rs index 40bcea8fcf1..183570394a7 100644 --- a/vortex-array/src/arrays/dict/compute/like.rs +++ b/vortex-array/src/arrays/dict/compute/like.rs @@ -7,12 +7,13 @@ use super::Dict; use super::DictArray; use crate::ArrayRef; use crate::IntoArray; +use crate::ParentRef; use crate::array::ArrayView; use crate::arrays::ConstantArray; +use crate::arrays::ScalarFnArray; use crate::arrays::dict::DictArrayExt; use crate::arrays::dict::DictArraySlotsExt; -use crate::arrays::scalar_fn::ScalarFnFactoryExt; -use crate::optimizer::ArrayOptimizer; +use crate::scalar_fn::ScalarFnVTableExt; use crate::scalar_fn::fns::like::Like; use crate::scalar_fn::fns::like::LikeOptions; use crate::scalar_fn::fns::like::LikeReduce; @@ -30,9 +31,13 @@ impl LikeReduce for Dict { if let Some(pattern) = pattern.as_constant() { let pattern = ConstantArray::new(pattern, array.values().len()).into_array(); - let values = Like - .try_new_array(pattern.len(), options, [array.values().clone(), pattern])? - .optimize()?; + let len = pattern.len(); + let parts = ScalarFnArray::try_new_parts( + Like.bind(options), + vec![array.values().clone(), pattern], + len, + )?; + let values = ParentRef::from_parts(&parts).optimize()?; // SAFETY: LIKE preserves the len of the values, so codes are still pointing at // valid positions. diff --git a/vortex-array/src/arrays/dict/compute/rules.rs b/vortex-array/src/arrays/dict/compute/rules.rs index f6fe816a6cc..a18f7e17d37 100644 --- a/vortex-array/src/arrays/dict/compute/rules.rs +++ b/vortex-array/src/arrays/dict/compute/rules.rs @@ -6,6 +6,7 @@ use vortex_error::VortexResult; use crate::ArrayEq; use crate::ArrayRef; use crate::IntoArray; +use crate::ParentRef; use crate::Precision; use crate::array::ArrayView; use crate::array::VTable; @@ -21,7 +22,6 @@ use crate::arrays::scalar_fn::AnyScalarFn; use crate::arrays::scalar_fn::ScalarFnArrayExt; use crate::arrays::slice::SliceReduceAdaptor; use crate::builtins::ArrayBuiltins; -use crate::optimizer::ArrayOptimizer; use crate::optimizer::rules::ArrayParentReduceRule; use crate::optimizer::rules::ParentRuleSet; use crate::scalar_fn::fns::cast::Cast; @@ -126,10 +126,9 @@ impl ArrayParentReduceRule for DictionaryScalarFnValuesPushDownRule { } } - let new_values = - ScalarFnArray::try_new(parent.scalar_fn().clone(), new_children, values_len)? - .into_array() - .optimize()?; + let parts = + ScalarFnArray::try_new_parts(parent.scalar_fn().clone(), new_children, values_len)?; + let new_values = ParentRef::from_parts(&parts).optimize()?; // We can only push down null-sensitive functions when we have all-valid codes. // In these cases, we cannot have the codes influence the nullability of the output DType. @@ -193,13 +192,12 @@ impl ArrayParentReduceRule for DictionaryScalarFnCodesPullUpRule { } } - let new_values = ScalarFnArray::try_new( + let parts = ScalarFnArray::try_new_parts( parent.scalar_fn().clone(), new_children, array.values().len(), - )? - .into_array() - .optimize()?; + )?; + let new_values = ParentRef::from_parts(&parts).optimize()?; let new_dict = unsafe { DictArray::new_unchecked(array.codes().clone(), new_values) }.into_array(); diff --git a/vortex-array/src/arrays/scalar_fn/array.rs b/vortex-array/src/arrays/scalar_fn/array.rs index 2c77be3c329..65f378967b1 100644 --- a/vortex-array/src/arrays/scalar_fn/array.rs +++ b/vortex-array/src/arrays/scalar_fn/array.rs @@ -90,15 +90,25 @@ impl Array { children: Vec, len: usize, ) -> VortexResult { + Ok(unsafe { Array::from_parts_unchecked(Self::try_new_parts(scalar_fn, children, len)?) }) + } + + /// Build the [`ArrayParts`] for a ScalarFnArray without materializing it. + /// + /// Mirrors [`try_new`](Self::try_new) but stops short of allocating the backing + /// `ArrayRef`, so callers can drive the parts through + /// [`ParentRef::from_parts`](crate::ParentRef::from_parts)`.optimize()` and only pay + /// the wrapper allocation when no reduction fires. + pub(crate) fn try_new_parts( + scalar_fn: ScalarFnRef, + children: Vec, + len: usize, + ) -> VortexResult> { let arg_dtypes: Vec<_> = children.iter().map(|c| c.dtype().clone()).collect(); let dtype = scalar_fn.return_dtype(&arg_dtypes)?; let data = ScalarFnData::build(scalar_fn.clone(), children.clone(), len)?; let vtable = ScalarFn { id: scalar_fn.id() }; - Ok(unsafe { - Array::from_parts_unchecked( - ArrayParts::new(vtable, dtype, len, data) - .with_slots(children.into_iter().map(Some).collect::()), - ) - }) + Ok(ArrayParts::new(vtable, dtype, len, data) + .with_slots(children.into_iter().map(Some).collect::())) } } diff --git a/vortex-array/src/builtins.rs b/vortex-array/src/builtins.rs index dcbe934097e..6177edf965a 100644 --- a/vortex-array/src/builtins.rs +++ b/vortex-array/src/builtins.rs @@ -13,12 +13,13 @@ use vortex_error::VortexResult; use crate::ArrayRef; use crate::IntoArray; +use crate::ParentRef; use crate::arrays::ConstantArray; +use crate::arrays::ScalarFnArray; use crate::arrays::scalar_fn::ScalarFnFactoryExt; use crate::dtype::DType; use crate::dtype::FieldName; use crate::expr::Expression; -use crate::optimizer::ArrayOptimizer; use crate::scalar::Scalar; use crate::scalar_fn::EmptyOptions; use crate::scalar_fn::ScalarFnVTableExt; @@ -160,8 +161,8 @@ impl ArrayBuiltins for ArrayRef { if self.dtype() == &dtype { return Ok(self.clone()); } - Cast.try_new_array(self.len(), dtype, [self.clone()])? - .optimize() + let parts = ScalarFnArray::try_new_parts(Cast.bind(dtype), vec![self.clone()], self.len())?; + ParentRef::from_parts(&parts).optimize() } fn fill_null(&self, fill_value: impl Into) -> VortexResult { @@ -169,44 +170,54 @@ impl ArrayBuiltins for ArrayRef { if !self.dtype().is_nullable() { return self.cast(fill_value.dtype().clone()); } - FillNull - .try_new_array( - self.len(), - EmptyOptions, - [ - self.clone(), - ConstantArray::new(fill_value, self.len()).into_array(), - ], - )? - .optimize() + let parts = ScalarFnArray::try_new_parts( + FillNull.bind(EmptyOptions), + vec![ + self.clone(), + ConstantArray::new(fill_value, self.len()).into_array(), + ], + self.len(), + )?; + ParentRef::from_parts(&parts).optimize() } fn get_item(&self, field_name: impl Into) -> VortexResult { - GetItem - .try_new_array(self.len(), field_name.into(), [self.clone()])? - .optimize() + let parts = ScalarFnArray::try_new_parts( + GetItem.bind(field_name.into()), + vec![self.clone()], + self.len(), + )?; + ParentRef::from_parts(&parts).optimize() } fn is_null(&self) -> VortexResult { - IsNull - .try_new_array(self.len(), EmptyOptions, [self.clone()])? - .optimize() + let parts = ScalarFnArray::try_new_parts( + IsNull.bind(EmptyOptions), + vec![self.clone()], + self.len(), + )?; + ParentRef::from_parts(&parts).optimize() } fn is_not_null(&self) -> VortexResult { - IsNotNull - .try_new_array(self.len(), EmptyOptions, [self.clone()])? - .optimize() + let parts = ScalarFnArray::try_new_parts( + IsNotNull.bind(EmptyOptions), + vec![self.clone()], + self.len(), + )?; + ParentRef::from_parts(&parts).optimize() } fn mask(self, mask: ArrayRef) -> VortexResult { - Mask.try_new_array(self.len(), EmptyOptions, [self, mask])? - .optimize() + let len = self.len(); + let parts = ScalarFnArray::try_new_parts(Mask.bind(EmptyOptions), vec![self, mask], len)?; + ParentRef::from_parts(&parts).optimize() } fn not(&self) -> VortexResult { - Not.try_new_array(self.len(), EmptyOptions, [self.clone()])? - .optimize() + let parts = + ScalarFnArray::try_new_parts(Not.bind(EmptyOptions), vec![self.clone()], self.len())?; + ParentRef::from_parts(&parts).optimize() } fn zip(&self, if_true: ArrayRef, if_false: ArrayRef) -> VortexResult { @@ -214,15 +225,18 @@ impl ArrayBuiltins for ArrayRef { } fn list_contains(&self, value: ArrayRef) -> VortexResult { - ListContains - .try_new_array(self.len(), EmptyOptions, [self.clone(), value])? - .optimize() + let parts = ScalarFnArray::try_new_parts( + ListContains.bind(EmptyOptions), + vec![self.clone(), value], + self.len(), + )?; + ParentRef::from_parts(&parts).optimize() } fn binary(&self, rhs: ArrayRef, op: Operator) -> VortexResult { - Binary - .try_new_array(self.len(), op, [self.clone(), rhs])? - .optimize() + let parts = + ScalarFnArray::try_new_parts(Binary.bind(op), vec![self.clone(), rhs], self.len())?; + ParentRef::from_parts(&parts).optimize() } fn between( @@ -231,8 +245,9 @@ impl ArrayBuiltins for ArrayRef { upper: ArrayRef, options: BetweenOptions, ) -> VortexResult { - Between - .try_new_array(self.len(), options, [self, lower, upper])? - .optimize() + let len = self.len(); + let parts = + ScalarFnArray::try_new_parts(Between.bind(options), vec![self, lower, upper], len)?; + ParentRef::from_parts(&parts).optimize() } } diff --git a/vortex-array/src/validity.rs b/vortex-array/src/validity.rs index 204205d1f51..6f20748cb33 100644 --- a/vortex-array/src/validity.rs +++ b/vortex-array/src/validity.rs @@ -20,17 +20,18 @@ use crate::Canonical; use crate::ExecutionCtx; use crate::IntoArray; use crate::LEGACY_SESSION; +use crate::ParentRef; use crate::VortexSessionExecute; use crate::arrays::BoolArray; use crate::arrays::ChunkedArray; use crate::arrays::ConstantArray; -use crate::arrays::scalar_fn::ScalarFnFactoryExt; +use crate::arrays::ScalarFnArray; use crate::builtins::ArrayBuiltins; use crate::dtype::DType; use crate::dtype::Nullability; -use crate::optimizer::ArrayOptimizer; use crate::patches::Patches; use crate::scalar::Scalar; +use crate::scalar_fn::ScalarFnVTableExt; use crate::scalar_fn::fns::binary::Binary; use crate::scalar_fn::fns::operators::Operator; @@ -265,11 +266,12 @@ impl Validity { | (Validity::AllValid, Validity::NonNullable) | (Validity::AllValid, Validity::AllValid) => Validity::AllValid, // Here we actually have to do some work - (Validity::Array(lhs), Validity::Array(rhs)) => Validity::Array( - Binary - .try_new_array(lhs.len(), Operator::And, [lhs, rhs])? - .optimize()?, - ), + (Validity::Array(lhs), Validity::Array(rhs)) => { + let len = lhs.len(); + let parts = + ScalarFnArray::try_new_parts(Binary.bind(Operator::And), vec![lhs, rhs], len)?; + Validity::Array(ParentRef::from_parts(&parts).optimize()?) + } }) } From b1d0571cd6ffd92c417e4c5fe00bfe03da70a303 Mon Sep 17 00:00:00 2001 From: Robert Kruszewski Date: Fri, 29 May 2026 00:34:48 +0100 Subject: [PATCH 11/17] less as_ref Signed-off-by: Robert Kruszewski --- encodings/alp/src/alp/array.rs | 4 +- encodings/alp/src/alp_rd/array.rs | 8 +-- encodings/bytebool/src/array.rs | 4 +- .../src/decimal_byte_parts/mod.rs | 2 +- .../fastlanes/src/bitpacking/array/mod.rs | 8 +-- encodings/fastlanes/src/delta/array/mod.rs | 4 +- encodings/fastlanes/src/for/array/mod.rs | 2 +- encodings/fastlanes/src/rle/array/mod.rs | 6 +-- encodings/fsst/src/array.rs | 12 ++--- encodings/parquet-variant/src/array.rs | 10 ++-- encodings/runend/src/array.rs | 4 +- encodings/sparse/src/lib.rs | 6 +-- encodings/zigzag/src/array.rs | 2 +- vortex-array-macros/src/lib.rs | 6 +-- vortex-array/src/array/typed.rs | 54 +++++++++++++++++-- vortex-array/src/arrays/bool/array.rs | 13 ++--- vortex-array/src/arrays/bool/test_harness.rs | 4 +- vortex-array/src/arrays/chunked/array.rs | 14 ++--- .../src/arrays/chunked/paired_chunks.rs | 4 +- vortex-array/src/arrays/decimal/array.rs | 11 ++-- vortex-array/src/arrays/extension/array.rs | 2 +- vortex-array/src/arrays/filter/array.rs | 2 +- .../src/arrays/fixed_size_list/array.rs | 10 ++-- vortex-array/src/arrays/list/array.rs | 17 +++--- vortex-array/src/arrays/listview/array.rs | 21 ++++---- vortex-array/src/arrays/masked/array.rs | 4 +- vortex-array/src/arrays/patched/array.rs | 6 +-- .../src/arrays/primitive/array/mod.rs | 13 ++--- .../src/arrays/primitive/array/top_value.rs | 4 +- vortex-array/src/arrays/scalar_fn/array.rs | 4 +- vortex-array/src/arrays/shared/array.rs | 2 +- vortex-array/src/arrays/slice/array.rs | 2 +- vortex-array/src/arrays/struct_/array.rs | 15 +++--- vortex-array/src/arrays/varbin/array.rs | 17 +++--- vortex-array/src/arrays/varbinview/array.rs | 7 +-- vortex-array/src/arrays/varbinview/compact.rs | 4 +- vortex-array/src/arrays/variant/mod.rs | 4 +- vortex-array/src/iter.rs | 2 +- vortex-array/src/serde.rs | 2 +- 39 files changed, 169 insertions(+), 147 deletions(-) diff --git a/encodings/alp/src/alp/array.rs b/encodings/alp/src/alp/array.rs index 8dd43935644..f3ed1831d6a 100644 --- a/encodings/alp/src/alp/array.rs +++ b/encodings/alp/src/alp/array.rs @@ -422,8 +422,8 @@ pub trait ALPArrayExt: ALPArraySlotsExt { fn patches(&self) -> Option { PatchesData::patches_from_slots( self.patches_data.as_ref(), - self.as_ref().len(), - self.as_ref().slots(), + self.len(), + self.slots(), PATCH_SLOTS, ) } diff --git a/encodings/alp/src/alp_rd/array.rs b/encodings/alp/src/alp_rd/array.rs index 5539d285367..d948921b6f4 100644 --- a/encodings/alp/src/alp_rd/array.rs +++ b/encodings/alp/src/alp_rd/array.rs @@ -579,11 +579,11 @@ fn validate_parts( pub trait ALPRDArrayExt: TypedArrayRef { fn left_parts(&self) -> &ArrayRef { - left_parts_from_slots(self.as_ref().slots()) + left_parts_from_slots(self.slots()) } fn right_parts(&self) -> &ArrayRef { - right_parts_from_slots(self.as_ref().slots()) + right_parts_from_slots(self.slots()) } fn right_bit_width(&self) -> u8 { @@ -592,9 +592,9 @@ pub trait ALPRDArrayExt: TypedArrayRef { fn left_parts_patches(&self) -> Option { patches_from_slots( - self.as_ref().slots(), + self.slots(), self.patches_data.as_ref(), - self.as_ref().len(), + self.len(), ) } diff --git a/encodings/bytebool/src/array.rs b/encodings/bytebool/src/array.rs index 28656a6f46d..a574bd24516 100644 --- a/encodings/bytebool/src/array.rs +++ b/encodings/bytebool/src/array.rs @@ -188,8 +188,8 @@ impl Display for ByteBoolData { pub trait ByteBoolArrayExt: TypedArrayRef { fn validity(&self) -> Validity { child_to_validity( - self.as_ref().slots()[VALIDITY_SLOT].as_ref(), - self.as_ref().dtype().nullability(), + self.slots()[VALIDITY_SLOT].as_ref(), + self.dtype().nullability(), ) } } diff --git a/encodings/decimal-byte-parts/src/decimal_byte_parts/mod.rs b/encodings/decimal-byte-parts/src/decimal_byte_parts/mod.rs index c6f7f5ded94..e56f5cbb02b 100644 --- a/encodings/decimal-byte-parts/src/decimal_byte_parts/mod.rs +++ b/encodings/decimal-byte-parts/src/decimal_byte_parts/mod.rs @@ -207,7 +207,7 @@ pub struct DecimalBytePartsDataParts { pub trait DecimalBytePartsArrayExt: TypedArrayRef { fn msp(&self) -> &ArrayRef { - self.as_ref().slots()[MSP_SLOT] + self.slots()[MSP_SLOT] .as_ref() .vortex_expect("DecimalBytePartsArray msp slot") } diff --git a/encodings/fastlanes/src/bitpacking/array/mod.rs b/encodings/fastlanes/src/bitpacking/array/mod.rs index e5c64252fbc..2c13ea38363 100644 --- a/encodings/fastlanes/src/bitpacking/array/mod.rs +++ b/encodings/fastlanes/src/bitpacking/array/mod.rs @@ -295,15 +295,15 @@ pub trait BitPackedArrayExt: BitPackedArraySlotsExt { fn patches(&self) -> Option { PatchesData::patches_from_slots( self.patches_data.as_ref(), - self.as_ref().len(), - self.as_ref().slots(), + self.len(), + self.slots(), PATCH_SLOTS, ) } #[inline] fn validity(&self) -> Validity { - child_to_validity(self.validity_child(), self.as_ref().dtype().nullability()) + child_to_validity(self.validity_child(), self.dtype().nullability()) } #[inline] @@ -313,7 +313,7 @@ pub trait BitPackedArrayExt: BitPackedArraySlotsExt { #[inline] fn unpacked_chunks(&self) -> VortexResult> { - BitPackedData::unpacked_chunks::(self, self.as_ref().dtype(), self.as_ref().len()) + BitPackedData::unpacked_chunks::(self, self.dtype(), self.len()) } } diff --git a/encodings/fastlanes/src/delta/array/mod.rs b/encodings/fastlanes/src/delta/array/mod.rs index 33ece0deddd..ffc8dfc49be 100644 --- a/encodings/fastlanes/src/delta/array/mod.rs +++ b/encodings/fastlanes/src/delta/array/mod.rs @@ -90,13 +90,13 @@ impl Display for DeltaData { pub trait DeltaArrayExt: TypedArrayRef { fn bases(&self) -> &ArrayRef { - self.as_ref().slots()[BASES_SLOT] + self.slots()[BASES_SLOT] .as_ref() .vortex_expect("DeltaArray bases slot") } fn deltas(&self) -> &ArrayRef { - self.as_ref().slots()[DELTAS_SLOT] + self.slots()[DELTAS_SLOT] .as_ref() .vortex_expect("DeltaArray deltas slot") } diff --git a/encodings/fastlanes/src/for/array/mod.rs b/encodings/fastlanes/src/for/array/mod.rs index 691888ef5da..9554402abec 100644 --- a/encodings/fastlanes/src/for/array/mod.rs +++ b/encodings/fastlanes/src/for/array/mod.rs @@ -31,7 +31,7 @@ pub struct FoRData { pub trait FoRArrayExt: TypedArrayRef { fn encoded(&self) -> &ArrayRef { - self.as_ref().slots()[ENCODED_SLOT] + self.slots()[ENCODED_SLOT] .as_ref() .vortex_expect("FoRArray encoded slot") } diff --git a/encodings/fastlanes/src/rle/array/mod.rs b/encodings/fastlanes/src/rle/array/mod.rs index 8793179c7d3..923399a61b8 100644 --- a/encodings/fastlanes/src/rle/array/mod.rs +++ b/encodings/fastlanes/src/rle/array/mod.rs @@ -82,21 +82,21 @@ impl RLEData { pub trait RLEArrayExt: TypedArrayRef { #[inline] fn values(&self) -> &ArrayRef { - self.as_ref().slots()[VALUES_SLOT] + self.slots()[VALUES_SLOT] .as_ref() .vortex_expect("RLEArray values slot must be populated") } #[inline] fn indices(&self) -> &ArrayRef { - self.as_ref().slots()[INDICES_SLOT] + self.slots()[INDICES_SLOT] .as_ref() .vortex_expect("RLEArray indices slot must be populated") } #[inline] fn values_idx_offsets(&self) -> &ArrayRef { - self.as_ref().slots()[VALUES_IDX_OFFSETS_SLOT] + self.slots()[VALUES_IDX_OFFSETS_SLOT] .as_ref() .vortex_expect("RLEArray values_idx_offsets slot must be populated") } diff --git a/encodings/fsst/src/array.rs b/encodings/fsst/src/array.rs index 4a6b19f9acf..26349cec8f6 100644 --- a/encodings/fsst/src/array.rs +++ b/encodings/fsst/src/array.rs @@ -707,7 +707,7 @@ fn uncompressed_lengths_from_slots(slots: &[Option]) -> &ArrayRef { pub trait FSSTArrayExt: TypedArrayRef { fn uncompressed_lengths(&self) -> &ArrayRef { - uncompressed_lengths_from_slots(self.as_ref().slots()) + uncompressed_lengths_from_slots(self.slots()) } fn uncompressed_lengths_dtype(&self) -> &DType { @@ -717,13 +717,13 @@ pub trait FSSTArrayExt: TypedArrayRef { /// Reconstruct a [`VarBinArray`] for the compressed codes by combining the bytes /// from [`FSSTData`] with the offsets and validity stored in the array's slots. fn codes(&self) -> VarBinArray { - let offsets = self.as_ref().slots()[CODES_OFFSETS_SLOT] + let offsets = self.slots()[CODES_OFFSETS_SLOT] .as_ref() .vortex_expect("FSSTArray codes_offsets slot") .clone(); let validity = child_to_validity( - self.as_ref().slots()[CODES_VALIDITY_SLOT].as_ref(), - self.as_ref().dtype().nullability(), + self.slots()[CODES_VALIDITY_SLOT].as_ref(), + self.dtype().nullability(), ); let codes_bytes = self.codes_bytes_handle().clone(); // SAFETY: components were validated at construction time. @@ -731,7 +731,7 @@ pub trait FSSTArrayExt: TypedArrayRef { VarBinArray::new_unchecked_from_handle( offsets, codes_bytes, - DType::Binary(self.as_ref().dtype().nullability()), + DType::Binary(self.dtype().nullability()), validity, ) } @@ -739,7 +739,7 @@ pub trait FSSTArrayExt: TypedArrayRef { /// Get the DType of the codes array. fn codes_dtype(&self) -> DType { - DType::Binary(self.as_ref().dtype().nullability()) + DType::Binary(self.dtype().nullability()) } } diff --git a/encodings/parquet-variant/src/array.rs b/encodings/parquet-variant/src/array.rs index d7004af4ef8..aea9b48b7d9 100644 --- a/encodings/parquet-variant/src/array.rs +++ b/encodings/parquet-variant/src/array.rs @@ -311,7 +311,7 @@ fn inferred_shredded_field_validity( pub trait ParquetVariantArrayExt: TypedArrayRef { /// Returns the non-nullable Parquet Variant metadata child. fn metadata_array(&self) -> &ArrayRef { - self.as_ref().slots()[METADATA_SLOT] + self.slots()[METADATA_SLOT] .as_ref() .vortex_expect("ParquetVariantArray metadata slot") } @@ -319,19 +319,19 @@ pub trait ParquetVariantArrayExt: TypedArrayRef { /// Returns the outer row validity for the Variant values. fn validity(&self) -> Validity { child_to_validity( - self.as_ref().slots()[VALIDITY_SLOT].as_ref(), - self.as_ref().dtype().nullability(), + self.slots()[VALIDITY_SLOT].as_ref(), + self.dtype().nullability(), ) } /// Returns the optional raw Parquet Variant `value` child. fn value_array(&self) -> Option<&ArrayRef> { - self.as_ref().slots()[VALUE_SLOT].as_ref() + self.slots()[VALUE_SLOT].as_ref() } /// Returns the optional shredded Parquet Variant `typed_value` child. fn typed_value_array(&self) -> Option<&ArrayRef> { - self.as_ref().slots()[TYPED_VALUE_SLOT].as_ref() + self.slots()[TYPED_VALUE_SLOT].as_ref() } /// Converts this storage array to Arrow's canonical Parquet Variant extension storage. diff --git a/encodings/runend/src/array.rs b/encodings/runend/src/array.rs index e91c290cfa0..68384559a1b 100644 --- a/encodings/runend/src/array.rs +++ b/encodings/runend/src/array.rs @@ -217,13 +217,13 @@ pub trait RunEndArrayExt: TypedArrayRef { } fn ends(&self) -> &ArrayRef { - self.as_ref().slots()[ENDS_SLOT] + self.slots()[ENDS_SLOT] .as_ref() .vortex_expect("RunEndArray ends slot") } fn values(&self) -> &ArrayRef { - self.as_ref().slots()[VALUES_SLOT] + self.slots()[VALUES_SLOT] .as_ref() .vortex_expect("RunEndArray values slot") } diff --git a/encodings/sparse/src/lib.rs b/encodings/sparse/src/lib.rs index 5e63e600e92..ccb8e9daadd 100644 --- a/encodings/sparse/src/lib.rs +++ b/encodings/sparse/src/lib.rs @@ -141,13 +141,13 @@ impl SparseOwnedExt for Array { let patches = Patches::new( self.len(), self.patches().offset(), - self.as_ref().slots()[SparseSlots::PATCH_INDICES] + self.slots()[SparseSlots::PATCH_INDICES] .clone() .vortex_expect("indices"), - self.as_ref().slots()[SparseSlots::PATCH_VALUES] + self.slots()[SparseSlots::PATCH_VALUES] .clone() .vortex_expect("values"), - self.as_ref().slots()[SparseSlots::PATCH_CHUNK_OFFSETS].clone(), + self.slots()[SparseSlots::PATCH_CHUNK_OFFSETS].clone(), )?; Ok(SparseParts { patches, diff --git a/encodings/zigzag/src/array.rs b/encodings/zigzag/src/array.rs index d1dad0960d1..90bc9640e49 100644 --- a/encodings/zigzag/src/array.rs +++ b/encodings/zigzag/src/array.rs @@ -181,7 +181,7 @@ impl Display for ZigZagData { pub trait ZigZagArrayExt: TypedArrayRef { fn encoded(&self) -> &ArrayRef { - self.as_ref().slots()[ENCODED_SLOT] + self.slots()[ENCODED_SLOT] .as_ref() .vortex_expect("ZigZagArray encoded slot") } diff --git a/vortex-array-macros/src/lib.rs b/vortex-array-macros/src/lib.rs index 3105dac4add..00ef2ff2678 100644 --- a/vortex-array-macros/src/lib.rs +++ b/vortex-array-macros/src/lib.rs @@ -206,7 +206,7 @@ fn expand_array_slots( #[doc = "Returns a borrowed view of all slots."] fn slots_view(&self) -> #view_ident<'_> { - #view_ident::from_slots(self.as_ref().slots()) + #view_ident::from_slots(self.slots()) } } @@ -347,7 +347,7 @@ impl SlotField { #[inline] fn #field_ident(&self) -> &::vortex_array::ArrayRef { ::vortex_error::VortexExpect::vortex_expect( - self.as_ref().slots()[#struct_ident::#const_ident].as_ref(), + self.slots()[#struct_ident::#const_ident].as_ref(), #expect_message, ) } @@ -355,7 +355,7 @@ impl SlotField { SlotFieldType::Optional => quote! { #[inline] fn #field_ident(&self) -> Option<&::vortex_array::ArrayRef> { - self.as_ref().slots()[#struct_ident::#const_ident].as_ref() + self.slots()[#struct_ident::#const_ident].as_ref() } }, } diff --git a/vortex-array/src/array/typed.rs b/vortex-array/src/array/typed.rs index 16539e8e6ad..9862f8008bb 100644 --- a/vortex-array/src/array/typed.rs +++ b/vortex-array/src/array/typed.rs @@ -86,16 +86,62 @@ impl ArrayParts { /// /// Extension traits use this to share typed array logic while still exposing the backing /// [`ArrayRef`] and the encoding-specific [`VTable::TypedArrayData`]. -pub trait TypedArrayRef: AsRef + Deref { +pub trait TypedArrayRef: Deref { /// Returns an owned [`Array`] from the reference. + fn to_owned(&self) -> Array; + + fn slots(&self) -> &[Option]; + + fn len(&self) -> usize; + + fn dtype(&self) -> &DType; + + fn validity(&self) -> VortexResult; +} + +impl TypedArrayRef for Array { fn to_owned(&self) -> Array { - self.as_ref().clone().downcast() + self.clone() + } + + fn slots(&self) -> &[Option] { + self.slots() + } + + fn len(&self) -> usize { + self.len() + } + + fn dtype(&self) -> &DType { + self.dtype() + } + + fn validity(&self) -> VortexResult { + self.validity() } } -impl TypedArrayRef for Array {} +impl TypedArrayRef for ArrayView<'_, V> { + fn to_owned(&self) -> Array { + self.array().clone().downcast() + } + + fn slots(&self) -> &[Option] { + self.slots() + } + + fn len(&self) -> usize { + self.len() + } + + fn dtype(&self) -> &DType { + self.dtype() + } -impl TypedArrayRef for ArrayView<'_, V> {} + fn validity(&self) -> VortexResult { + self.validity() + } +} // ============================================================================= // ArrayData — the concrete type stored inside Arc // ============================================================================= diff --git a/vortex-array/src/arrays/bool/array.rs b/vortex-array/src/arrays/bool/array.rs index 6585e705899..9e5bc22cba5 100644 --- a/vortex-array/src/arrays/bool/array.rs +++ b/vortex-array/src/arrays/bool/array.rs @@ -86,22 +86,19 @@ pub struct BoolDataParts { pub trait BoolArrayExt: TypedArrayRef { fn nullability(&self) -> crate::dtype::Nullability { - match self.as_ref().dtype() { + match self.dtype() { DType::Bool(nullability) => *nullability, _ => unreachable!("BoolArrayExt requires a bool dtype"), } } fn validity(&self) -> Validity { - child_to_validity( - self.as_ref().slots()[VALIDITY_SLOT].as_ref(), - self.nullability(), - ) + child_to_validity(self.slots()[VALIDITY_SLOT].as_ref(), self.nullability()) } fn to_bit_buffer(&self) -> BitBuffer { let buffer = self.bits.as_host().clone(); - BitBuffer::new_with_offset(buffer, self.as_ref().len(), self.offset) + BitBuffer::new_with_offset(buffer, self.len(), self.offset) } fn maybe_execute_mask(&self, ctx: &mut ExecutionCtx) -> VortexResult> { @@ -122,11 +119,11 @@ pub trait BoolArrayExt: TypedArrayRef { fn to_mask_fill_null_false(&self, ctx: &mut ExecutionCtx) -> Mask { let validity_mask = self .validity() - .execute_mask(self.as_ref().len(), ctx) + .execute_mask(self.len(), ctx) .vortex_expect("Failed to compute validity mask"); let buffer = match validity_mask { Mask::AllTrue(_) => self.to_bit_buffer(), - Mask::AllFalse(_) => return Mask::new_false(self.as_ref().len()), + Mask::AllFalse(_) => return Mask::new_false(self.len()), Mask::Values(validity) => validity.bit_buffer() & self.to_bit_buffer(), }; Mask::from_buffer(buffer) diff --git a/vortex-array/src/arrays/bool/test_harness.rs b/vortex-array/src/arrays/bool/test_harness.rs index 4e13d62e3ef..8c5209a04d4 100644 --- a/vortex-array/src/arrays/bool/test_harness.rs +++ b/vortex-array/src/arrays/bool/test_harness.rs @@ -14,7 +14,7 @@ impl BoolArray { self.validity() .vortex_expect("failed to get validity") .execute_mask( - self.as_ref().len(), + self.len(), &mut LEGACY_SESSION.create_execution_ctx(), ) .vortex_expect("Failed to compute validity mask") @@ -29,7 +29,7 @@ impl BoolArray { self.validity() .vortex_expect("failed to get validity") .execute_mask( - self.as_ref().len(), + self.len(), &mut LEGACY_SESSION.create_execution_ctx(), ) .vortex_expect("Failed to compute validity mask") diff --git a/vortex-array/src/arrays/chunked/array.rs b/vortex-array/src/arrays/chunked/array.rs index a0ed3f98d67..89e5acc84c4 100644 --- a/vortex-array/src/arrays/chunked/array.rs +++ b/vortex-array/src/arrays/chunked/array.rs @@ -51,24 +51,24 @@ impl Display for ChunkedData { pub trait ChunkedArrayExt: TypedArrayRef { fn chunk_offsets_array(&self) -> &ArrayRef { - self.as_ref().slots()[CHUNK_OFFSETS_SLOT] + self.slots()[CHUNK_OFFSETS_SLOT] .as_ref() .vortex_expect("validated chunk offsets slot") } fn nchunks(&self) -> usize { - self.as_ref().slots().len().saturating_sub(CHUNKS_OFFSET) + self.slots().len().saturating_sub(CHUNKS_OFFSET) } fn chunk(&self, idx: usize) -> &ArrayRef { - self.as_ref().slots()[CHUNKS_OFFSET + idx] + self.slots()[CHUNKS_OFFSET + idx] .as_ref() .vortex_expect("validated chunk slot") } fn iter_chunks<'a>(&'a self) -> Box + 'a> { Box::new( - self.as_ref().slots()[CHUNKS_OFFSET..] + self.slots()[CHUNKS_OFFSET..] .iter() .map(|slot| slot.as_ref().vortex_expect("validated chunk slot")), ) @@ -88,7 +88,7 @@ pub trait ChunkedArrayExt: TypedArrayRef { fn find_chunk_idx(&self, index: usize) -> VortexResult<(usize, usize)> { assert!( - index <= self.as_ref().len(), + index <= self.len(), "Index out of bounds of the array" ); let chunk_offsets = self.chunk_offsets(); @@ -103,14 +103,14 @@ pub trait ChunkedArrayExt: TypedArrayRef { fn array_iterator(&self) -> impl ArrayIterator + '_ { ArrayIteratorAdapter::new( - self.as_ref().dtype().clone(), + self.dtype().clone(), self.iter_chunks().map(|chunk| Ok(chunk.clone())), ) } fn array_stream(&self) -> impl ArrayStream + '_ { ArrayStreamAdapter::new( - self.as_ref().dtype().clone(), + self.dtype().clone(), stream::iter(self.iter_chunks().map(|chunk| Ok(chunk.clone()))), ) } diff --git a/vortex-array/src/arrays/chunked/paired_chunks.rs b/vortex-array/src/arrays/chunked/paired_chunks.rs index 2145c88dbbd..4c9f4fa7691 100644 --- a/vortex-array/src/arrays/chunked/paired_chunks.rs +++ b/vortex-array/src/arrays/chunked/paired_chunks.rs @@ -70,7 +70,7 @@ pub(crate) struct PairedChunks { pub(crate) trait PairedChunksExt: ChunkedArrayExt { fn paired_chunks(&self, other: &T) -> PairedChunks { assert_eq!( - self.as_ref().len(), + self.len(), other.as_ref().len(), "paired_chunks requires arrays of equal length" ); @@ -78,7 +78,7 @@ pub(crate) trait PairedChunksExt: ChunkedArrayExt { left: ChunkCursor::new(self.chunks()), right: ChunkCursor::new(other.chunks()), pos: 0, - total_len: self.as_ref().len(), + total_len: self.len(), } } } diff --git a/vortex-array/src/arrays/decimal/array.rs b/vortex-array/src/arrays/decimal/array.rs index 1f4d6c0c277..88e594ed04f 100644 --- a/vortex-array/src/arrays/decimal/array.rs +++ b/vortex-array/src/arrays/decimal/array.rs @@ -130,28 +130,25 @@ pub struct DecimalDataParts { pub trait DecimalArrayExt: TypedArrayRef { fn decimal_dtype(&self) -> DecimalDType { - match self.as_ref().dtype() { + match self.dtype() { DType::Decimal(decimal_dtype, _) => *decimal_dtype, _ => unreachable!("DecimalArrayExt requires a decimal dtype"), } } fn nullability(&self) -> Nullability { - match self.as_ref().dtype() { + match self.dtype() { DType::Decimal(_, nullability) => *nullability, _ => unreachable!("DecimalArrayExt requires a decimal dtype"), } } fn validity_child(&self) -> Option<&ArrayRef> { - self.as_ref().slots()[VALIDITY_SLOT].as_ref() + self.slots()[VALIDITY_SLOT].as_ref() } fn validity(&self) -> Validity { - child_to_validity( - self.as_ref().slots()[VALIDITY_SLOT].as_ref(), - self.nullability(), - ) + child_to_validity(self.slots()[VALIDITY_SLOT].as_ref(), self.nullability()) } fn values_type(&self) -> DecimalType { diff --git a/vortex-array/src/arrays/extension/array.rs b/vortex-array/src/arrays/extension/array.rs index 0fd57030cdd..8bee9b344fa 100644 --- a/vortex-array/src/arrays/extension/array.rs +++ b/vortex-array/src/arrays/extension/array.rs @@ -31,7 +31,7 @@ pub trait ExtensionArrayExt: TypedArrayRef { } fn storage_array(&self) -> &ArrayRef { - self.as_ref().slots()[STORAGE_SLOT] + self.slots()[STORAGE_SLOT] .as_ref() .vortex_expect("ExtensionArray storage slot") } diff --git a/vortex-array/src/arrays/filter/array.rs b/vortex-array/src/arrays/filter/array.rs index f5a0f776089..2c12abda035 100644 --- a/vortex-array/src/arrays/filter/array.rs +++ b/vortex-array/src/arrays/filter/array.rs @@ -44,7 +44,7 @@ pub struct FilterDataParts { pub trait FilterArrayExt: TypedArrayRef { fn child(&self) -> &ArrayRef { - self.as_ref().slots()[CHILD_SLOT] + self.slots()[CHILD_SLOT] .as_ref() .vortex_expect("validated filter child slot") } diff --git a/vortex-array/src/arrays/fixed_size_list/array.rs b/vortex-array/src/arrays/fixed_size_list/array.rs index cc82551ab82..8460b2db11c 100644 --- a/vortex-array/src/arrays/fixed_size_list/array.rs +++ b/vortex-array/src/arrays/fixed_size_list/array.rs @@ -205,7 +205,7 @@ impl FixedSizeListData { pub trait FixedSizeListArrayExt: TypedArrayRef { fn dtype_parts(&self) -> (&DType, u32, crate::dtype::Nullability) { - match self.as_ref().dtype() { + match self.dtype() { DType::FixedSizeList(element_dtype, list_size, nullability) => { (element_dtype.as_ref(), *list_size, *nullability) } @@ -214,7 +214,7 @@ pub trait FixedSizeListArrayExt: TypedArrayRef { } fn elements(&self) -> &ArrayRef { - self.as_ref().slots()[ELEMENTS_SLOT] + self.slots()[ELEMENTS_SLOT] .as_ref() .vortex_expect("FixedSizeListArray elements slot") } @@ -226,15 +226,15 @@ pub trait FixedSizeListArrayExt: TypedArrayRef { fn fixed_size_list_validity(&self) -> Validity { let (_, _, nullability) = self.dtype_parts(); - child_to_validity(self.as_ref().slots()[VALIDITY_SLOT].as_ref(), nullability) + child_to_validity(self.slots()[VALIDITY_SLOT].as_ref(), nullability) } fn fixed_size_list_elements_at(&self, index: usize) -> VortexResult { debug_assert!( - index < self.as_ref().len(), + index < self.len(), "index {} out of bounds: the len is {}", index, - self.as_ref().len(), + self.len(), ); debug_assert!( self.fixed_size_list_validity() diff --git a/vortex-array/src/arrays/list/array.rs b/vortex-array/src/arrays/list/array.rs index 3e1c43ea3e9..3ca0c7ec1c9 100644 --- a/vortex-array/src/arrays/list/array.rs +++ b/vortex-array/src/arrays/list/array.rs @@ -268,36 +268,33 @@ impl ListData { pub trait ListArrayExt: TypedArrayRef { fn nullability(&self) -> crate::dtype::Nullability { - match self.as_ref().dtype() { + match self.dtype() { DType::List(_, nullability) => *nullability, _ => unreachable!("ListArrayExt requires a list dtype"), } } fn elements(&self) -> &ArrayRef { - self.as_ref().slots()[ELEMENTS_SLOT] + self.slots()[ELEMENTS_SLOT] .as_ref() .vortex_expect("ListArray elements slot") } fn offsets(&self) -> &ArrayRef { - self.as_ref().slots()[OFFSETS_SLOT] + self.slots()[OFFSETS_SLOT] .as_ref() .vortex_expect("ListArray offsets slot") } fn list_validity(&self) -> Validity { - child_to_validity( - self.as_ref().slots()[VALIDITY_SLOT].as_ref(), - self.nullability(), - ) + child_to_validity(self.slots()[VALIDITY_SLOT].as_ref(), self.nullability()) } fn offset_at(&self, index: usize) -> VortexResult { vortex_ensure!( - index <= self.as_ref().len(), + index <= self.len(), "Index {index} out of bounds 0..={}", - self.as_ref().len() + self.len() ); if let Some(p) = self.offsets().as_opt::() { @@ -321,7 +318,7 @@ pub trait ListArrayExt: TypedArrayRef { fn sliced_elements(&self) -> VortexResult { let start = self.offset_at(0)?; - let end = self.offset_at(self.as_ref().len())?; + let end = self.offset_at(self.len())?; self.elements().slice(start..end) } diff --git a/vortex-array/src/arrays/listview/array.rs b/vortex-array/src/arrays/listview/array.rs index 64e59d1687f..9655fbcd4ae 100644 --- a/vortex-array/src/arrays/listview/array.rs +++ b/vortex-array/src/arrays/listview/array.rs @@ -346,42 +346,39 @@ fn fill_referenced_mask( pub trait ListViewArrayExt: TypedArrayRef { fn nullability(&self) -> crate::dtype::Nullability { - match self.as_ref().dtype() { + match self.dtype() { DType::List(_, nullability) => *nullability, _ => unreachable!("ListViewArrayExt requires a list dtype"), } } fn elements(&self) -> &ArrayRef { - self.as_ref().slots()[ELEMENTS_SLOT] + self.slots()[ELEMENTS_SLOT] .as_ref() .vortex_expect("ListViewArray elements slot") } fn offsets(&self) -> &ArrayRef { - self.as_ref().slots()[OFFSETS_SLOT] + self.slots()[OFFSETS_SLOT] .as_ref() .vortex_expect("ListViewArray offsets slot") } fn sizes(&self) -> &ArrayRef { - self.as_ref().slots()[SIZES_SLOT] + self.slots()[SIZES_SLOT] .as_ref() .vortex_expect("ListViewArray sizes slot") } fn listview_validity(&self) -> Validity { - child_to_validity( - self.as_ref().slots()[VALIDITY_SLOT].as_ref(), - self.nullability(), - ) + child_to_validity(self.slots()[VALIDITY_SLOT].as_ref(), self.nullability()) } fn offset_at(&self, index: usize) -> usize { assert!( - index < self.as_ref().len(), + index < self.len(), "Index {index} out of bounds 0..{}", - self.as_ref().len() + self.len() ); self.offsets() .as_opt::() @@ -398,10 +395,10 @@ pub trait ListViewArrayExt: TypedArrayRef { fn size_at(&self, index: usize) -> usize { assert!( - index < self.as_ref().len(), + index < self.len(), "Index {} out of bounds 0..{}", index, - self.as_ref().len() + self.len() ); self.sizes() .as_opt::() diff --git a/vortex-array/src/arrays/masked/array.rs b/vortex-array/src/arrays/masked/array.rs index 5ba830cc0e5..af3c19eca78 100644 --- a/vortex-array/src/arrays/masked/array.rs +++ b/vortex-array/src/arrays/masked/array.rs @@ -40,8 +40,8 @@ impl Display for MaskedData { pub trait MaskedArrayExt: TypedArrayRef + MaskedArraySlotsExt { fn masked_validity(&self) -> Validity { child_to_validity( - self.as_ref().slots()[MaskedSlots::VALIDITY].as_ref(), - self.as_ref().dtype().nullability(), + self.slots()[MaskedSlots::VALIDITY].as_ref(), + self.dtype().nullability(), ) } } diff --git a/vortex-array/src/arrays/patched/array.rs b/vortex-array/src/arrays/patched/array.rs index b1e5367607b..127869bbf39 100644 --- a/vortex-array/src/arrays/patched/array.rs +++ b/vortex-array/src/arrays/patched/array.rs @@ -111,7 +111,7 @@ pub trait PatchedArrayExt: PatchedArraySlotsExt { #[inline] fn lane_range(&self, chunk: usize, lane: usize) -> VortexResult> { - assert!(chunk * 1024 <= self.as_ref().len() + self.offset()); + assert!(chunk * 1024 <= self.len() + self.offset()); assert!(lane < self.n_lanes()); let start = self.lane_offsets().execute_scalar( @@ -149,12 +149,12 @@ pub trait PatchedArrayExt: PatchedArraySlotsExt { let begin = (chunks.start * 1024).saturating_sub(self.offset()); let end = (chunks.end * 1024) .saturating_sub(self.offset()) - .min(self.as_ref().len()); + .min(self.len()); let offset = if chunks.start == 0 { self.offset() } else { 0 }; let inner = self.inner().slice(begin..end)?; let len = inner.len(); - let dtype = self.as_ref().dtype().clone(); + let dtype = self.dtype().clone(); let slots = PatchedSlots { inner, lane_offsets: sliced_lane_offsets, diff --git a/vortex-array/src/arrays/primitive/array/mod.rs b/vortex-array/src/arrays/primitive/array/mod.rs index 7e3dd9b4ce7..f6533fc6adc 100644 --- a/vortex-array/src/arrays/primitive/array/mod.rs +++ b/vortex-array/src/arrays/primitive/array/mod.rs @@ -106,28 +106,25 @@ pub struct PrimitiveDataParts { pub trait PrimitiveArrayExt: TypedArrayRef { fn ptype(&self) -> PType { - match self.as_ref().dtype() { + match self.dtype() { DType::Primitive(ptype, _) => *ptype, _ => unreachable!("PrimitiveArrayExt requires a primitive dtype"), } } fn nullability(&self) -> Nullability { - match self.as_ref().dtype() { + match self.dtype() { DType::Primitive(_, nullability) => *nullability, _ => unreachable!("PrimitiveArrayExt requires a primitive dtype"), } } fn validity_child(&self) -> Option<&ArrayRef> { - self.as_ref().slots()[VALIDITY_SLOT].as_ref() + self.slots()[VALIDITY_SLOT].as_ref() } fn validity(&self) -> Validity { - child_to_validity( - self.as_ref().slots()[VALIDITY_SLOT].as_ref(), - self.nullability(), - ) + child_to_validity(self.slots()[VALIDITY_SLOT].as_ref(), self.nullability()) } fn buffer_handle(&self) -> &BufferHandle { @@ -178,7 +175,7 @@ pub trait PrimitiveArrayExt: TypedArrayRef { return Ok(self.to_owned()); }; - let nullability = self.as_ref().dtype().nullability(); + let nullability = self.dtype().nullability(); if min < 0 || max < 0 { // Signed diff --git a/vortex-array/src/arrays/primitive/array/top_value.rs b/vortex-array/src/arrays/primitive/array/top_value.rs index d3ee5eb5a65..30448b0d8ff 100644 --- a/vortex-array/src/arrays/primitive/array/top_value.rs +++ b/vortex-array/src/arrays/primitive/array/top_value.rs @@ -33,8 +33,8 @@ impl PrimitiveArray { match_each_native_ptype!(self.ptype(), |P| { let (top, count) = typed_top_value( self.as_slice::

(), - self.as_ref().validity()?.execute_mask( - self.as_ref().len(), + self.validity()?.execute_mask( + self.len(), &mut LEGACY_SESSION.create_execution_ctx(), )?, ); diff --git a/vortex-array/src/arrays/scalar_fn/array.rs b/vortex-array/src/arrays/scalar_fn/array.rs index 65f378967b1..9b8267923c5 100644 --- a/vortex-array/src/arrays/scalar_fn/array.rs +++ b/vortex-array/src/arrays/scalar_fn/array.rs @@ -56,13 +56,13 @@ pub trait ScalarFnArrayExt: TypedArrayRef { } fn child_at(&self, idx: usize) -> &ArrayRef { - self.as_ref().slots()[idx] + self.slots()[idx] .as_ref() .vortex_expect("ScalarFnArray child slot") } fn child_count(&self) -> usize { - self.as_ref().slots().len() + self.slots().len() } fn nchildren(&self) -> usize { diff --git a/vortex-array/src/arrays/shared/array.rs b/vortex-array/src/arrays/shared/array.rs index 9619018ff89..93b896746ab 100644 --- a/vortex-array/src/arrays/shared/array.rs +++ b/vortex-array/src/arrays/shared/array.rs @@ -45,7 +45,7 @@ impl Display for SharedData { #[expect(async_fn_in_trait)] pub trait SharedArrayExt: TypedArrayRef { fn source(&self) -> &ArrayRef { - self.as_ref().slots()[SOURCE_SLOT] + self.slots()[SOURCE_SLOT] .as_ref() .vortex_expect("validated shared source slot") } diff --git a/vortex-array/src/arrays/slice/array.rs b/vortex-array/src/arrays/slice/array.rs index 9a353937456..bd761808c7b 100644 --- a/vortex-array/src/arrays/slice/array.rs +++ b/vortex-array/src/arrays/slice/array.rs @@ -38,7 +38,7 @@ pub struct SliceDataParts { pub trait SliceArrayExt: TypedArrayRef { fn child(&self) -> &ArrayRef { - self.as_ref().slots()[CHILD_SLOT] + self.slots()[CHILD_SLOT] .as_ref() .vortex_expect("validated slice child slot") } diff --git a/vortex-array/src/arrays/struct_/array.rs b/vortex-array/src/arrays/struct_/array.rs index 7feffb5484c..4167cb4224b 100644 --- a/vortex-array/src/arrays/struct_/array.rs +++ b/vortex-array/src/arrays/struct_/array.rs @@ -174,25 +174,22 @@ pub(super) fn make_struct_slots( pub trait StructArrayExt: TypedArrayRef { fn nullability(&self) -> crate::dtype::Nullability { - match self.as_ref().dtype() { + match self.dtype() { DType::Struct(_, nullability) => *nullability, _ => unreachable!("StructArrayExt requires a struct dtype"), } } fn names(&self) -> &FieldNames { - self.as_ref().dtype().as_struct_fields().names() + self.dtype().as_struct_fields().names() } fn struct_validity(&self) -> Validity { - child_to_validity( - self.as_ref().slots()[VALIDITY_SLOT].as_ref(), - self.nullability(), - ) + child_to_validity(self.slots()[VALIDITY_SLOT].as_ref(), self.nullability()) } fn iter_unmasked_fields(&self) -> impl Iterator + '_ { - self.as_ref().slots()[FIELDS_OFFSET..] + self.slots()[FIELDS_OFFSET..] .iter() .map(|s| s.as_ref().vortex_expect("StructArray field slot")) } @@ -202,7 +199,7 @@ pub trait StructArrayExt: TypedArrayRef { } fn unmasked_field(&self, idx: usize) -> &ArrayRef { - self.as_ref().slots()[FIELDS_OFFSET + idx] + self.slots()[FIELDS_OFFSET + idx] .as_ref() .vortex_expect("StructArray field slot") } @@ -225,7 +222,7 @@ pub trait StructArrayExt: TypedArrayRef { } fn struct_fields(&self) -> &StructFields { - self.as_ref().dtype().as_struct_fields() + self.dtype().as_struct_fields() } } impl> StructArrayExt for T {} diff --git a/vortex-array/src/arrays/varbin/array.rs b/vortex-array/src/arrays/varbin/array.rs index deaef952d5e..2a43706aca0 100644 --- a/vortex-array/src/arrays/varbin/array.rs +++ b/vortex-array/src/arrays/varbin/array.rs @@ -292,17 +292,17 @@ impl VarBinData { pub trait VarBinArrayExt: TypedArrayRef { fn offsets(&self) -> &ArrayRef { - self.as_ref().slots()[OFFSETS_SLOT] + self.slots()[OFFSETS_SLOT] .as_ref() .vortex_expect("VarBinArray offsets slot") } fn validity_child(&self) -> Option<&ArrayRef> { - self.as_ref().slots()[VALIDITY_SLOT].as_ref() + self.slots()[VALIDITY_SLOT].as_ref() } fn dtype_parts(&self) -> (bool, Nullability) { - match self.as_ref().dtype() { + match self.dtype() { DType::Utf8(nullability) => (true, *nullability), DType::Binary(nullability) => (false, *nullability), _ => unreachable!("VarBinArrayExt requires a utf8 or binary dtype"), @@ -318,17 +318,14 @@ pub trait VarBinArrayExt: TypedArrayRef { } fn varbin_validity(&self) -> Validity { - child_to_validity( - self.as_ref().slots()[VALIDITY_SLOT].as_ref(), - self.nullability(), - ) + child_to_validity(self.slots()[VALIDITY_SLOT].as_ref(), self.nullability()) } fn offset_at(&self, index: usize) -> usize { assert!( - index <= self.as_ref().len(), + index <= self.len(), "Index {index} out of bounds 0..={}", - self.as_ref().len() + self.len() ); (&self @@ -347,7 +344,7 @@ pub trait VarBinArrayExt: TypedArrayRef { fn sliced_bytes(&self) -> ByteBuffer { let first_offset: usize = self.offset_at(0); - let last_offset = self.offset_at(self.as_ref().len()); + let last_offset = self.offset_at(self.len()); self.bytes().slice(first_offset..last_offset) } } diff --git a/vortex-array/src/arrays/varbinview/array.rs b/vortex-array/src/arrays/varbinview/array.rs index e302c9341d2..d7fe2c00440 100644 --- a/vortex-array/src/arrays/varbinview/array.rs +++ b/vortex-array/src/arrays/varbinview/array.rs @@ -534,7 +534,7 @@ impl VarBinViewData { pub trait VarBinViewArrayExt: TypedArrayRef { fn dtype_parts(&self) -> (bool, Nullability) { - match self.as_ref().dtype() { + match self.dtype() { DType::Utf8(nullability) => (true, *nullability), DType::Binary(nullability) => (false, *nullability), _ => unreachable!("VarBinViewArrayExt requires a utf8 or binary dtype"), @@ -542,10 +542,7 @@ pub trait VarBinViewArrayExt: TypedArrayRef { } fn varbinview_validity(&self) -> Validity { - child_to_validity( - self.as_ref().slots()[VALIDITY_SLOT].as_ref(), - self.dtype_parts().1, - ) + child_to_validity(self.slots()[VALIDITY_SLOT].as_ref(), self.dtype_parts().1) } } impl> VarBinViewArrayExt for T {} diff --git a/vortex-array/src/arrays/varbinview/compact.rs b/vortex-array/src/arrays/varbinview/compact.rs index 6effc7c656a..a6a1c1c5bd9 100644 --- a/vortex-array/src/arrays/varbinview/compact.rs +++ b/vortex-array/src/arrays/varbinview/compact.rs @@ -65,8 +65,8 @@ impl VarBinViewArray { where F: FnMut(&Ref), { - match self.as_ref().validity()?.execute_mask( - self.as_ref().len(), + match self.validity()?.execute_mask( + self.len(), &mut LEGACY_SESSION.create_execution_ctx(), )? { Mask::AllTrue(_) => { diff --git a/vortex-array/src/arrays/variant/mod.rs b/vortex-array/src/arrays/variant/mod.rs index 7d7f23e34c5..a396d831ead 100644 --- a/vortex-array/src/arrays/variant/mod.rs +++ b/vortex-array/src/arrays/variant/mod.rs @@ -35,7 +35,7 @@ pub(super) const SLOT_NAMES: [&str; NUM_SLOTS] = ["core_storage", "shredded"]; pub trait VariantArrayExt: TypedArrayRef { /// Returns the logical variant storage that preserves the full value for every row. fn core_storage(&self) -> &ArrayRef { - self.as_ref().slots()[CORE_STORAGE_SLOT] + self.slots()[CORE_STORAGE_SLOT] .as_ref() .vortex_expect("validated variant core_storage slot") } @@ -44,7 +44,7 @@ pub trait VariantArrayExt: TypedArrayRef { /// This functions returns `Some` only if the array was canonicalized and the shredded data /// was pulled out of the underlying variant storage. fn shredded(&self) -> Option<&ArrayRef> { - self.as_ref().slots()[SHREDDED_SLOT].as_ref() + self.slots()[SHREDDED_SLOT].as_ref() } } impl> VariantArrayExt for T {} diff --git a/vortex-array/src/iter.rs b/vortex-array/src/iter.rs index b4cd01d875d..0841810ee9d 100644 --- a/vortex-array/src/iter.rs +++ b/vortex-array/src/iter.rs @@ -27,7 +27,7 @@ pub trait ArrayIterator: Iterator> { impl ArrayIterator for Box { #[inline] fn dtype(&self) -> &DType { - self.as_ref().dtype() + self.dtype() } } diff --git a/vortex-array/src/serde.rs b/vortex-array/src/serde.rs index 637b57324c0..d6032b090ef 100644 --- a/vortex-array/src/serde.rs +++ b/vortex-array/src/serde.rs @@ -279,7 +279,7 @@ impl> ArrayChildren for T { } fn len(&self) -> usize { - self.as_ref().len() + self.len() } } From ca9c07999a7104fa86d0190d05f8b2d725f54cf1 Mon Sep 17 00:00:00 2001 From: Robert Kruszewski Date: Fri, 29 May 2026 01:05:50 +0100 Subject: [PATCH 12/17] fixes Signed-off-by: Robert Kruszewski --- encodings/alp/src/alp_rd/array.rs | 6 +-- encodings/parquet-variant/src/array.rs | 2 +- vortex-array/src/array/typed.rs | 45 ++++++++++++++----- vortex-array/src/array/view.rs | 5 +++ vortex-array/src/arrays/bool/array.rs | 5 +-- vortex-array/src/arrays/bool/test_harness.rs | 10 +---- vortex-array/src/arrays/chunked/array.rs | 5 +-- .../src/arrays/chunked/paired_chunks.rs | 2 +- vortex-array/src/arrays/extension/array.rs | 3 +- .../src/arrays/primitive/array/mod.rs | 8 +++- .../src/arrays/primitive/array/top_value.rs | 6 +-- vortex-array/src/arrays/varbinview/compact.rs | 8 ++-- vortex-array/src/iter.rs | 2 +- vortex-array/src/serde.rs | 2 +- 14 files changed, 63 insertions(+), 46 deletions(-) diff --git a/encodings/alp/src/alp_rd/array.rs b/encodings/alp/src/alp_rd/array.rs index d948921b6f4..b186d49a02a 100644 --- a/encodings/alp/src/alp_rd/array.rs +++ b/encodings/alp/src/alp_rd/array.rs @@ -591,11 +591,7 @@ pub trait ALPRDArrayExt: TypedArrayRef { } fn left_parts_patches(&self) -> Option { - patches_from_slots( - self.slots(), - self.patches_data.as_ref(), - self.len(), - ) + patches_from_slots(self.slots(), self.patches_data.as_ref(), self.len()) } fn left_parts_dictionary(&self) -> &Buffer { diff --git a/encodings/parquet-variant/src/array.rs b/encodings/parquet-variant/src/array.rs index aea9b48b7d9..0c34dd67bc3 100644 --- a/encodings/parquet-variant/src/array.rs +++ b/encodings/parquet-variant/src/array.rs @@ -342,7 +342,7 @@ pub trait ParquetVariantArrayExt: TypedArrayRef { fn to_arrow(&self, ctx: &mut ExecutionCtx) -> VortexResult { let metadata = self.metadata_array(); let len = metadata.len(); - let nulls = to_arrow_null_buffer(self.validity(), len, ctx)?; + let nulls = to_arrow_null_buffer(ParquetVariantArrayExt::validity(self), len, ctx)?; let mut fields = Vec::with_capacity(3); let mut arrays: Vec = Vec::with_capacity(3); diff --git a/vortex-array/src/array/typed.rs b/vortex-array/src/array/typed.rs index 9862f8008bb..584b33dd021 100644 --- a/vortex-array/src/array/typed.rs +++ b/vortex-array/src/array/typed.rs @@ -86,10 +86,12 @@ impl ArrayParts { /// /// Extension traits use this to share typed array logic while still exposing the backing /// [`ArrayRef`] and the encoding-specific [`VTable::TypedArrayData`]. -pub trait TypedArrayRef: Deref { +pub trait TypedArrayRef: AsRef + Deref { /// Returns an owned [`Array`] from the reference. fn to_owned(&self) -> Array; + fn is_empty(&self) -> bool; + fn slots(&self) -> &[Option]; fn len(&self) -> usize; @@ -104,42 +106,60 @@ impl TypedArrayRef for Array { self.clone() } + #[allow(clippy::same_name_method)] + fn is_empty(&self) -> bool { + self.inner.is_empty() + } + + #[allow(clippy::same_name_method)] fn slots(&self) -> &[Option] { - self.slots() + Array::::slots(self) } + #[allow(clippy::same_name_method)] fn len(&self) -> usize { - self.len() + Array::::len(self) } + #[allow(clippy::same_name_method)] fn dtype(&self) -> &DType { - self.dtype() + Array::::dtype(self) } + #[allow(clippy::same_name_method)] fn validity(&self) -> VortexResult { - self.validity() + Array::::validity(self) } } impl TypedArrayRef for ArrayView<'_, V> { fn to_owned(&self) -> Array { - self.array().clone().downcast() + self.into_owned() + } + + #[allow(clippy::same_name_method)] + fn is_empty(&self) -> bool { + ArrayView::is_empty(self) } + #[allow(clippy::same_name_method)] fn slots(&self) -> &[Option] { - self.slots() + ArrayView::slots(self) } + #[allow(clippy::same_name_method)] fn len(&self) -> usize { - self.len() + ArrayView::len(self) } + #[allow(clippy::same_name_method)] fn dtype(&self) -> &DType { - self.dtype() + ArrayView::dtype(self) } + #[allow(clippy::same_name_method)] fn validity(&self) -> VortexResult { - self.validity() + ArrayView::validity(self) } } // ============================================================================= @@ -300,16 +320,19 @@ impl Array { } /// Returns the dtype. + #[allow(clippy::same_name_method)] pub fn dtype(&self) -> &DType { self.inner.dtype() } /// Returns the length. + #[allow(clippy::same_name_method)] pub fn len(&self) -> usize { self.inner.len() } /// Returns whether this array is empty. + #[allow(clippy::same_name_method)] pub fn is_empty(&self) -> bool { self.inner.len() == 0 } @@ -368,6 +391,7 @@ impl Array { } /// Returns the array slots. + #[allow(clippy::same_name_method)] pub fn slots(&self) -> &[Option] { self.inner.slots() } @@ -428,6 +452,7 @@ impl Array { self.inner.take(indices) } + #[allow(clippy::same_name_method)] pub fn validity(&self) -> VortexResult { self.inner.validity() } diff --git a/vortex-array/src/array/view.rs b/vortex-array/src/array/view.rs index ff869180d35..239bd53e32b 100644 --- a/vortex-array/src/array/view.rs +++ b/vortex-array/src/array/view.rs @@ -99,21 +99,25 @@ impl<'a, V: VTable> ArrayView<'a, V> { } #[inline] + #[allow(clippy::same_name_method)] pub fn slots(&self) -> &'a [Option] { self.slots } #[inline] + #[allow(clippy::same_name_method)] pub fn dtype(&self) -> &'a DType { self.dtype } #[inline] + #[allow(clippy::same_name_method)] pub fn len(&self) -> usize { self.len } #[inline] + #[allow(clippy::same_name_method)] pub fn is_empty(&self) -> bool { self.len == 0 } @@ -129,6 +133,7 @@ impl<'a, V: VTable> ArrayView<'a, V> { } /// Returns the array's validity. Forces stack-backed views to materialize. + #[allow(clippy::same_name_method)] pub fn validity(&self) -> VortexResult { self.array().validity() } diff --git a/vortex-array/src/arrays/bool/array.rs b/vortex-array/src/arrays/bool/array.rs index 9e5bc22cba5..0e9ec94a9e7 100644 --- a/vortex-array/src/arrays/bool/array.rs +++ b/vortex-array/src/arrays/bool/array.rs @@ -102,7 +102,7 @@ pub trait BoolArrayExt: TypedArrayRef { } fn maybe_execute_mask(&self, ctx: &mut ExecutionCtx) -> VortexResult> { - let all_valid = match &self.validity() { + let all_valid = match &BoolArrayExt::validity(self) { Validity::NonNullable | Validity::AllValid => true, Validity::AllInvalid => false, Validity::Array(a) => a.statistics().compute_min::(ctx).unwrap_or(false), @@ -117,8 +117,7 @@ pub trait BoolArrayExt: TypedArrayRef { } fn to_mask_fill_null_false(&self, ctx: &mut ExecutionCtx) -> Mask { - let validity_mask = self - .validity() + let validity_mask = BoolArrayExt::validity(self) .execute_mask(self.len(), ctx) .vortex_expect("Failed to compute validity mask"); let buffer = match validity_mask { diff --git a/vortex-array/src/arrays/bool/test_harness.rs b/vortex-array/src/arrays/bool/test_harness.rs index 8c5209a04d4..efcd95c5b05 100644 --- a/vortex-array/src/arrays/bool/test_harness.rs +++ b/vortex-array/src/arrays/bool/test_harness.rs @@ -13,10 +13,7 @@ impl BoolArray { pub fn opt_bool_vec(&self) -> Vec> { self.validity() .vortex_expect("failed to get validity") - .execute_mask( - self.len(), - &mut LEGACY_SESSION.create_execution_ctx(), - ) + .execute_mask(self.len(), &mut LEGACY_SESSION.create_execution_ctx()) .vortex_expect("Failed to compute validity mask") .to_bit_buffer() .iter() @@ -28,10 +25,7 @@ impl BoolArray { pub fn bool_vec(&self) -> Vec { self.validity() .vortex_expect("failed to get validity") - .execute_mask( - self.len(), - &mut LEGACY_SESSION.create_execution_ctx(), - ) + .execute_mask(self.len(), &mut LEGACY_SESSION.create_execution_ctx()) .vortex_expect("Failed to compute validity mask") .to_bit_buffer() .iter() diff --git a/vortex-array/src/arrays/chunked/array.rs b/vortex-array/src/arrays/chunked/array.rs index 89e5acc84c4..47747607907 100644 --- a/vortex-array/src/arrays/chunked/array.rs +++ b/vortex-array/src/arrays/chunked/array.rs @@ -87,10 +87,7 @@ pub trait ChunkedArrayExt: TypedArrayRef { } fn find_chunk_idx(&self, index: usize) -> VortexResult<(usize, usize)> { - assert!( - index <= self.len(), - "Index out of bounds of the array" - ); + assert!(index <= self.len(), "Index out of bounds of the array"); let chunk_offsets = self.chunk_offsets(); let index_chunk = chunk_offsets .search_sorted(&index, SearchSortedSide::Right)? diff --git a/vortex-array/src/arrays/chunked/paired_chunks.rs b/vortex-array/src/arrays/chunked/paired_chunks.rs index 4c9f4fa7691..6821bf66195 100644 --- a/vortex-array/src/arrays/chunked/paired_chunks.rs +++ b/vortex-array/src/arrays/chunked/paired_chunks.rs @@ -71,7 +71,7 @@ pub(crate) trait PairedChunksExt: ChunkedArrayExt { fn paired_chunks(&self, other: &T) -> PairedChunks { assert_eq!( self.len(), - other.as_ref().len(), + other.len(), "paired_chunks requires arrays of equal length" ); PairedChunks { diff --git a/vortex-array/src/arrays/extension/array.rs b/vortex-array/src/arrays/extension/array.rs index 8bee9b344fa..8cb376a5311 100644 --- a/vortex-array/src/arrays/extension/array.rs +++ b/vortex-array/src/arrays/extension/array.rs @@ -24,8 +24,7 @@ pub(super) const SLOT_NAMES: [&str; NUM_SLOTS] = ["storage"]; pub trait ExtensionArrayExt: TypedArrayRef { fn ext_dtype(&self) -> &ExtDTypeRef { - self.as_ref() - .dtype() + self.dtype() .as_extension_opt() .vortex_expect("extension array somehow did not have an extension dtype") } diff --git a/vortex-array/src/arrays/primitive/array/mod.rs b/vortex-array/src/arrays/primitive/array/mod.rs index f6533fc6adc..06aea0c48fa 100644 --- a/vortex-array/src/arrays/primitive/array/mod.rs +++ b/vortex-array/src/arrays/primitive/array/mod.rs @@ -142,7 +142,11 @@ pub trait PrimitiveArrayExt: TypedArrayRef { "can't reinterpret cast between integers of two different widths" ); - PrimitiveArray::from_buffer_handle(self.buffer_handle().clone(), ptype, self.validity()) + PrimitiveArray::from_buffer_handle( + self.buffer_handle().clone(), + ptype, + PrimitiveArrayExt::validity(self), + ) } /// Narrow the array to the smallest possible integer type that can represent all values. @@ -154,7 +158,7 @@ pub trait PrimitiveArrayExt: TypedArrayRef { let Some(min_max) = min_max(self.as_ref(), ctx)? else { return Ok(PrimitiveArray::new( Buffer::::zeroed(self.len()), - self.validity(), + PrimitiveArrayExt::validity(self), )); }; diff --git a/vortex-array/src/arrays/primitive/array/top_value.rs b/vortex-array/src/arrays/primitive/array/top_value.rs index 30448b0d8ff..7932b28572f 100644 --- a/vortex-array/src/arrays/primitive/array/top_value.rs +++ b/vortex-array/src/arrays/primitive/array/top_value.rs @@ -33,10 +33,8 @@ impl PrimitiveArray { match_each_native_ptype!(self.ptype(), |P| { let (top, count) = typed_top_value( self.as_slice::

(), - self.validity()?.execute_mask( - self.len(), - &mut LEGACY_SESSION.create_execution_ctx(), - )?, + self.validity()? + .execute_mask(self.len(), &mut LEGACY_SESSION.create_execution_ctx())?, ); Ok(Some((top.into(), count))) }) diff --git a/vortex-array/src/arrays/varbinview/compact.rs b/vortex-array/src/arrays/varbinview/compact.rs index a6a1c1c5bd9..89bd96da19a 100644 --- a/vortex-array/src/arrays/varbinview/compact.rs +++ b/vortex-array/src/arrays/varbinview/compact.rs @@ -65,10 +65,10 @@ impl VarBinViewArray { where F: FnMut(&Ref), { - match self.validity()?.execute_mask( - self.len(), - &mut LEGACY_SESSION.create_execution_ctx(), - )? { + match self + .validity()? + .execute_mask(self.len(), &mut LEGACY_SESSION.create_execution_ctx())? + { Mask::AllTrue(_) => { for &view in self.views().iter() { if !view.is_inlined() { diff --git a/vortex-array/src/iter.rs b/vortex-array/src/iter.rs index 0841810ee9d..b4cd01d875d 100644 --- a/vortex-array/src/iter.rs +++ b/vortex-array/src/iter.rs @@ -27,7 +27,7 @@ pub trait ArrayIterator: Iterator> { impl ArrayIterator for Box { #[inline] fn dtype(&self) -> &DType { - self.dtype() + self.as_ref().dtype() } } diff --git a/vortex-array/src/serde.rs b/vortex-array/src/serde.rs index d6032b090ef..637b57324c0 100644 --- a/vortex-array/src/serde.rs +++ b/vortex-array/src/serde.rs @@ -279,7 +279,7 @@ impl> ArrayChildren for T { } fn len(&self) -> usize { - self.len() + self.as_ref().len() } } From e343bfbeb8292d56ca27664657a7885a9f2102da Mon Sep 17 00:00:00 2001 From: Robert Kruszewski Date: Fri, 29 May 2026 01:32:53 +0100 Subject: [PATCH 13/17] shorter Signed-off-by: Robert Kruszewski --- encodings/datetime-parts/src/compute/rules.rs | 9 ++-- vortex-array/src/array/parent.rs | 2 +- vortex-array/src/arrays/dict/array.rs | 32 +++++++++-- vortex-array/src/arrays/dict/compute/like.rs | 12 ++--- vortex-array/src/arrays/filter/array.rs | 2 +- vortex-array/src/arrays/scalar_fn/array.rs | 2 +- .../src/arrays/scalar_fn/vtable/mod.rs | 45 ++++++++-------- vortex-array/src/arrays/slice/array.rs | 5 +- vortex-array/src/builtins.rs | 53 ++++++------------- vortex-array/src/validity.rs | 7 +-- vortex-layout/src/layouts/dict/reader.rs | 11 ++-- 11 files changed, 85 insertions(+), 95 deletions(-) diff --git a/encodings/datetime-parts/src/compute/rules.rs b/encodings/datetime-parts/src/compute/rules.rs index a8b1b2eca83..60c50cdab75 100644 --- a/encodings/datetime-parts/src/compute/rules.rs +++ b/encodings/datetime-parts/src/compute/rules.rs @@ -4,6 +4,7 @@ use vortex_array::ArrayRef; use vortex_array::ArrayView; use vortex_array::IntoArray; +use vortex_array::ParentRef; use vortex_array::arrays::Constant; use vortex_array::arrays::ConstantArray; use vortex_array::arrays::Filter; @@ -16,7 +17,6 @@ use vortex_array::arrays::slice::SliceReduceAdaptor; use vortex_array::builtins::ArrayBuiltins; use vortex_array::dtype::DType; use vortex_array::extension::datetime::Timestamp; -use vortex_array::optimizer::ArrayOptimizer; use vortex_array::optimizer::rules::ArrayParentReduceRule; use vortex_array::optimizer::rules::ParentRuleSet; use vortex_array::scalar_fn::fns::between::Between; @@ -133,10 +133,9 @@ impl ArrayParentReduceRule for DTPComparisonPushDownRule { } } - let result = - ScalarFnArray::try_new(parent.scalar_fn().clone(), new_children, parent.len())? - .into_array() - .optimize()?; + let parts = + ScalarFnArray::try_new_parts(parent.scalar_fn().clone(), new_children, parent.len())?; + let result = ParentRef::from_parts(&parts).optimize()?; Ok(Some(result)) } diff --git a/vortex-array/src/array/parent.rs b/vortex-array/src/array/parent.rs index 5ca1c03538e..dead35d5519 100644 --- a/vortex-array/src/array/parent.rs +++ b/vortex-array/src/array/parent.rs @@ -117,7 +117,7 @@ impl<'a> ParentRef<'a> { /// The returned [`ParentRef`] owns the cache slot for the lazily materialized /// [`ArrayRef`], so callers don't need to thread an external scratch through. #[inline] - pub(crate) fn from_parts(parts: &'a ArrayParts) -> Self { + pub fn from_parts(parts: &'a ArrayParts) -> Self { Self { encoding_id: parts.vtable.id(), dtype: &parts.dtype, diff --git a/vortex-array/src/arrays/dict/array.rs b/vortex-array/src/arrays/dict/array.rs index 148040b9251..2003f31619a 100644 --- a/vortex-array/src/arrays/dict/array.rs +++ b/vortex-array/src/arrays/dict/array.rs @@ -239,10 +239,7 @@ impl Array { /// Build the [`ArrayParts`]. The parts can then be optimized through /// [`ParentRef::optimize`](crate::array::ParentRef::optimize) or materialized /// directly with [`ArrayParts::into_array`]. - pub(crate) fn try_new_parts( - codes: ArrayRef, - values: ArrayRef, - ) -> VortexResult> { + pub fn try_new_parts(codes: ArrayRef, values: ArrayRef) -> VortexResult> { let dtype = values .dtype() .union_nullability(codes.dtype().nullability()); @@ -254,6 +251,33 @@ impl Array { ) } + /// Build the [`ArrayParts`] without validating codes or values, recording whether + /// all values are referenced by at least one code. + /// + /// The parts can then be optimized through + /// [`ParentRef::optimize`](crate::array::ParentRef::optimize) or materialized directly + /// with [`ArrayParts::into_array`]. Unlike + /// [`set_all_values_referenced`](Self::set_all_values_referenced), this does not run the + /// debug-only `all_values_referenced` validation, so it is intended for callers that + /// have externally guaranteed the flag (for example a layout validated at write time). + /// + /// # Safety + /// + /// See [`DictData::new_unchecked`] and [`DictData::set_all_values_referenced`]. + pub unsafe fn new_unchecked_parts( + codes: ArrayRef, + values: ArrayRef, + all_values_referenced: bool, + ) -> ArrayParts { + let dtype = values + .dtype() + .union_nullability(codes.dtype().nullability()); + let len = codes.len(); + let data = + unsafe { DictData::new_unchecked().set_all_values_referenced(all_values_referenced) }; + ArrayParts::new(Dict, dtype, len, data).with_slots(smallvec![Some(codes), Some(values)]) + } + /// Build a new `DictArray` without validating the codes or values. /// /// # Safety diff --git a/vortex-array/src/arrays/dict/compute/like.rs b/vortex-array/src/arrays/dict/compute/like.rs index 183570394a7..d74af84db03 100644 --- a/vortex-array/src/arrays/dict/compute/like.rs +++ b/vortex-array/src/arrays/dict/compute/like.rs @@ -10,10 +10,9 @@ use crate::IntoArray; use crate::ParentRef; use crate::array::ArrayView; use crate::arrays::ConstantArray; -use crate::arrays::ScalarFnArray; use crate::arrays::dict::DictArrayExt; use crate::arrays::dict::DictArraySlotsExt; -use crate::scalar_fn::ScalarFnVTableExt; +use crate::arrays::scalar_fn::ScalarFnFactoryExt; use crate::scalar_fn::fns::like::Like; use crate::scalar_fn::fns::like::LikeOptions; use crate::scalar_fn::fns::like::LikeReduce; @@ -31,11 +30,10 @@ impl LikeReduce for Dict { if let Some(pattern) = pattern.as_constant() { let pattern = ConstantArray::new(pattern, array.values().len()).into_array(); - let len = pattern.len(); - let parts = ScalarFnArray::try_new_parts( - Like.bind(options), - vec![array.values().clone(), pattern], - len, + let parts = Like.try_new_array_parts( + pattern.len(), + options, + [array.values().clone(), pattern], )?; let values = ParentRef::from_parts(&parts).optimize()?; diff --git a/vortex-array/src/arrays/filter/array.rs b/vortex-array/src/arrays/filter/array.rs index 2c12abda035..6b4d494f36f 100644 --- a/vortex-array/src/arrays/filter/array.rs +++ b/vortex-array/src/arrays/filter/array.rs @@ -109,7 +109,7 @@ impl Array { /// Builds the [`ArrayParts`]. The parts can then be optimized through /// [`ParentRef::optimize`](crate::array::ParentRef::optimize) or materialized /// directly with [`ArrayParts::into_array`]. - pub(crate) fn try_new_parts(array: ArrayRef, mask: Mask) -> VortexResult> { + pub fn try_new_parts(array: ArrayRef, mask: Mask) -> VortexResult> { let dtype = array.dtype().clone(); let len = mask.true_count(); let data = FilterData::try_new(array.len(), mask)?; diff --git a/vortex-array/src/arrays/scalar_fn/array.rs b/vortex-array/src/arrays/scalar_fn/array.rs index 9b8267923c5..a37f89f3cf8 100644 --- a/vortex-array/src/arrays/scalar_fn/array.rs +++ b/vortex-array/src/arrays/scalar_fn/array.rs @@ -99,7 +99,7 @@ impl Array { /// `ArrayRef`, so callers can drive the parts through /// [`ParentRef::from_parts`](crate::ParentRef::from_parts)`.optimize()` and only pay /// the wrapper allocation when no reduction fires. - pub(crate) fn try_new_parts( + pub fn try_new_parts( scalar_fn: ScalarFnRef, children: Vec, len: usize, diff --git a/vortex-array/src/arrays/scalar_fn/vtable/mod.rs b/vortex-array/src/arrays/scalar_fn/vtable/mod.rs index c12e391199c..ef6f9f522ea 100644 --- a/vortex-array/src/arrays/scalar_fn/vtable/mod.rs +++ b/vortex-array/src/arrays/scalar_fn/vtable/mod.rs @@ -21,8 +21,6 @@ use vortex_session::registry::CachedId; use crate::ArrayEq; use crate::ArrayHash; use crate::ArrayRef; -use crate::ArraySlots; -use crate::IntoArray; use crate::Precision; use crate::array::Array; use crate::array::ArrayId; @@ -171,34 +169,33 @@ impl VTable for ScalarFn { /// Array factory functions for scalar functions. pub trait ScalarFnFactoryExt: scalar_fn::ScalarFnVTable { - fn try_new_array( + /// Build the [`ArrayParts`] for this scalar function applied to `children`. + /// + /// Stops short of allocating the backing `ArrayRef`, so callers can drive the parts + /// through [`ParentRef::from_parts`]`.optimize()` and only pay the wrapper allocation + /// when no reduction fires. + fn try_new_array_parts( &self, len: usize, options: Self::Options, children: impl Into>, - ) -> VortexResult { + ) -> VortexResult> { let scalar_fn = scalar_fn::TypedScalarFnInstance::new(self.clone(), options).erased(); + Array::::try_new_parts(scalar_fn, children.into(), len) + } - let children = children.into(); - vortex_ensure!( - children.iter().all(|c| c.len() == len), - "All child arrays must have the same length as the scalar function array" - ); - - let child_dtypes = children.iter().map(|c| c.dtype().clone()).collect_vec(); - let dtype = scalar_fn.return_dtype(&child_dtypes)?; - - let data = ScalarFnData { - scalar_fn: scalar_fn.clone(), - }; - let vtable = ScalarFn { id: scalar_fn.id() }; - Ok(unsafe { - Array::from_parts_unchecked( - ArrayParts::new(vtable, dtype, len, data) - .with_slots(children.into_iter().map(Some).collect::()), - ) - } - .into_array()) + /// Build a materialized scalar-function array for this scalar function applied to + /// `children`. Equivalent to [`try_new_array_parts`](Self::try_new_array_parts) followed + /// by [`ArrayParts::into_array`]. + fn try_new_array( + &self, + len: usize, + options: Self::Options, + children: impl Into>, + ) -> VortexResult { + Ok(self + .try_new_array_parts(len, options, children)? + .into_array()) } } impl ScalarFnFactoryExt for V {} diff --git a/vortex-array/src/arrays/slice/array.rs b/vortex-array/src/arrays/slice/array.rs index bd761808c7b..70f608b202a 100644 --- a/vortex-array/src/arrays/slice/array.rs +++ b/vortex-array/src/arrays/slice/array.rs @@ -102,10 +102,7 @@ impl Array { /// Builds the [`ArrayParts`] for a slice. The parts can then be /// optimized through [`ParentRef::optimize`](crate::array::ParentRef::optimize) /// or materialized directly with [`ArrayParts::into_array`]. - pub(crate) fn try_new_parts( - child: ArrayRef, - range: Range, - ) -> VortexResult> { + pub fn try_new_parts(child: ArrayRef, range: Range) -> VortexResult> { let len = range.len(); let dtype = child.dtype().clone(); let data = SliceData::try_new(child.len(), range)?; diff --git a/vortex-array/src/builtins.rs b/vortex-array/src/builtins.rs index 6177edf965a..04a6c7b7b9b 100644 --- a/vortex-array/src/builtins.rs +++ b/vortex-array/src/builtins.rs @@ -15,7 +15,6 @@ use crate::ArrayRef; use crate::IntoArray; use crate::ParentRef; use crate::arrays::ConstantArray; -use crate::arrays::ScalarFnArray; use crate::arrays::scalar_fn::ScalarFnFactoryExt; use crate::dtype::DType; use crate::dtype::FieldName; @@ -161,7 +160,7 @@ impl ArrayBuiltins for ArrayRef { if self.dtype() == &dtype { return Ok(self.clone()); } - let parts = ScalarFnArray::try_new_parts(Cast.bind(dtype), vec![self.clone()], self.len())?; + let parts = Cast.try_new_array_parts(self.len(), dtype, [self.clone()])?; ParentRef::from_parts(&parts).optimize() } @@ -170,72 +169,56 @@ impl ArrayBuiltins for ArrayRef { if !self.dtype().is_nullable() { return self.cast(fill_value.dtype().clone()); } - let parts = ScalarFnArray::try_new_parts( - FillNull.bind(EmptyOptions), - vec![ + let parts = FillNull.try_new_array_parts( + self.len(), + EmptyOptions, + [ self.clone(), ConstantArray::new(fill_value, self.len()).into_array(), ], - self.len(), )?; ParentRef::from_parts(&parts).optimize() } fn get_item(&self, field_name: impl Into) -> VortexResult { - let parts = ScalarFnArray::try_new_parts( - GetItem.bind(field_name.into()), - vec![self.clone()], - self.len(), - )?; + let parts = GetItem.try_new_array_parts(self.len(), field_name.into(), [self.clone()])?; ParentRef::from_parts(&parts).optimize() } fn is_null(&self) -> VortexResult { - let parts = ScalarFnArray::try_new_parts( - IsNull.bind(EmptyOptions), - vec![self.clone()], - self.len(), - )?; + let parts = IsNull.try_new_array_parts(self.len(), EmptyOptions, [self.clone()])?; ParentRef::from_parts(&parts).optimize() } fn is_not_null(&self) -> VortexResult { - let parts = ScalarFnArray::try_new_parts( - IsNotNull.bind(EmptyOptions), - vec![self.clone()], - self.len(), - )?; + let parts = IsNotNull.try_new_array_parts(self.len(), EmptyOptions, [self.clone()])?; ParentRef::from_parts(&parts).optimize() } fn mask(self, mask: ArrayRef) -> VortexResult { - let len = self.len(); - let parts = ScalarFnArray::try_new_parts(Mask.bind(EmptyOptions), vec![self, mask], len)?; + let parts = Mask.try_new_array_parts(self.len(), EmptyOptions, [self, mask])?; ParentRef::from_parts(&parts).optimize() } fn not(&self) -> VortexResult { - let parts = - ScalarFnArray::try_new_parts(Not.bind(EmptyOptions), vec![self.clone()], self.len())?; + let parts = Not.try_new_array_parts(self.len(), EmptyOptions, [self.clone()])?; ParentRef::from_parts(&parts).optimize() } fn zip(&self, if_true: ArrayRef, if_false: ArrayRef) -> VortexResult { - Zip.try_new_array(self.len(), EmptyOptions, [if_true, if_false, self.clone()]) + let parts = + Zip.try_new_array_parts(self.len(), EmptyOptions, [if_true, if_false, self.clone()])?; + ParentRef::from_parts(&parts).optimize() } fn list_contains(&self, value: ArrayRef) -> VortexResult { - let parts = ScalarFnArray::try_new_parts( - ListContains.bind(EmptyOptions), - vec![self.clone(), value], - self.len(), - )?; + let parts = + ListContains.try_new_array_parts(self.len(), EmptyOptions, [self.clone(), value])?; ParentRef::from_parts(&parts).optimize() } fn binary(&self, rhs: ArrayRef, op: Operator) -> VortexResult { - let parts = - ScalarFnArray::try_new_parts(Binary.bind(op), vec![self.clone(), rhs], self.len())?; + let parts = Binary.try_new_array_parts(self.len(), op, [self.clone(), rhs])?; ParentRef::from_parts(&parts).optimize() } @@ -245,9 +228,7 @@ impl ArrayBuiltins for ArrayRef { upper: ArrayRef, options: BetweenOptions, ) -> VortexResult { - let len = self.len(); - let parts = - ScalarFnArray::try_new_parts(Between.bind(options), vec![self, lower, upper], len)?; + let parts = Between.try_new_array_parts(self.len(), options, [self, lower, upper])?; ParentRef::from_parts(&parts).optimize() } } diff --git a/vortex-array/src/validity.rs b/vortex-array/src/validity.rs index 6f20748cb33..c60a164a5d6 100644 --- a/vortex-array/src/validity.rs +++ b/vortex-array/src/validity.rs @@ -25,13 +25,12 @@ use crate::VortexSessionExecute; use crate::arrays::BoolArray; use crate::arrays::ChunkedArray; use crate::arrays::ConstantArray; -use crate::arrays::ScalarFnArray; +use crate::arrays::scalar_fn::ScalarFnFactoryExt; use crate::builtins::ArrayBuiltins; use crate::dtype::DType; use crate::dtype::Nullability; use crate::patches::Patches; use crate::scalar::Scalar; -use crate::scalar_fn::ScalarFnVTableExt; use crate::scalar_fn::fns::binary::Binary; use crate::scalar_fn::fns::operators::Operator; @@ -267,9 +266,7 @@ impl Validity { | (Validity::AllValid, Validity::AllValid) => Validity::AllValid, // Here we actually have to do some work (Validity::Array(lhs), Validity::Array(rhs)) => { - let len = lhs.len(); - let parts = - ScalarFnArray::try_new_parts(Binary.bind(Operator::And), vec![lhs, rhs], len)?; + let parts = Binary.try_new_array_parts(lhs.len(), Operator::And, [lhs, rhs])?; Validity::Array(ParentRef::from_parts(&parts).optimize()?) } }) diff --git a/vortex-layout/src/layouts/dict/reader.rs b/vortex-layout/src/layouts/dict/reader.rs index 96f12d53ece..1bb286a3b6c 100644 --- a/vortex-layout/src/layouts/dict/reader.rs +++ b/vortex-layout/src/layouts/dict/reader.rs @@ -14,6 +14,7 @@ use futures::try_join; use vortex_array::ArrayRef; use vortex_array::IntoArray; use vortex_array::MaskFuture; +use vortex_array::ParentRef; use vortex_array::VortexSessionExecute; use vortex_array::arrays::DictArray; use vortex_array::arrays::SharedArray; @@ -21,7 +22,6 @@ use vortex_array::dtype::DType; use vortex_array::dtype::FieldMask; use vortex_array::expr::Expression; use vortex_array::expr::root; -use vortex_array::optimizer::ArrayOptimizer; use vortex_error::VortexError; use vortex_error::VortexExpect; use vortex_error::VortexResult; @@ -241,12 +241,9 @@ impl LayoutReader for DictReader { // * The codes child reader ensures the correct dtype. // * The layout stores `all_values_referenced` and if this is malicious then it must // only affect correctness not memory safety. - let array = unsafe { - DictArray::new_unchecked(codes, values) - .set_all_values_referenced(all_values_referenced) - } - .into_array() - .optimize()?; + let parts = + unsafe { DictArray::new_unchecked_parts(codes, values, all_values_referenced) }; + let array = ParentRef::from_parts(&parts).optimize()?; array.apply(&expr) } From 94594c687d1e3e5d543fcb5dd15615a5b376e3a7 Mon Sep 17 00:00:00 2001 From: Robert Kruszewski Date: Sat, 30 May 2026 17:37:17 +0100 Subject: [PATCH 14/17] more Signed-off-by: Robert Kruszewski --- vortex-array/src/arrays/scalar_fn/rules.rs | 52 +++++++++++++++------- vortex-array/src/expr/optimize.rs | 14 +++--- vortex-array/src/scalar_fn/fns/merge.rs | 4 +- vortex-array/src/scalar_fn/vtable.rs | 27 ++++++++--- 4 files changed, 66 insertions(+), 31 deletions(-) diff --git a/vortex-array/src/arrays/scalar_fn/rules.rs b/vortex-array/src/arrays/scalar_fn/rules.rs index 1e9563cf9de..e8ea5e6f5dc 100644 --- a/vortex-array/src/arrays/scalar_fn/rules.rs +++ b/vortex-array/src/arrays/scalar_fn/rules.rs @@ -1,7 +1,6 @@ // SPDX-License-Identifier: Apache-2.0 // SPDX-FileCopyrightText: Copyright the Vortex contributors -use std::any::Any; use std::sync::Arc; use itertools::Itertools; @@ -44,7 +43,7 @@ pub(super) const PARENT_RULES: ParentRuleSet = ParentRuleSet::new(&[ struct ScalarFnPackToStructRule; impl ArrayReduceRule for ScalarFnPackToStructRule { fn reduce(&self, array: ArrayView<'_, ScalarFn>) -> VortexResult> { - let Some(pack_options) = array.scalar_fn().as_opt::() else { + let Some(pack_options) = ScalarFnArrayExt::scalar_fn(&array).as_opt::() else { return Ok(None); }; @@ -84,7 +83,12 @@ impl ArrayParentReduceRule for ScalarFnSliceReduceRule { .collect::>()?; Ok(Some( - ScalarFnArray::try_new(array.scalar_fn().clone(), children, range.len())?.into_array(), + ScalarFnArray::try_new( + ScalarFnArrayExt::scalar_fn(&array).clone(), + children, + range.len(), + )? + .into_array(), )) } } @@ -93,9 +97,8 @@ impl ArrayParentReduceRule for ScalarFnSliceReduceRule { struct ScalarFnAbstractReduceRule; impl ArrayReduceRule for ScalarFnAbstractReduceRule { fn reduce(&self, array: ArrayView<'_, ScalarFn>) -> VortexResult> { - if let Some(reduced) = array - .scalar_fn() - .reduce(array.as_ref(), &ArrayReduceCtx { len: array.len() })? + if let Some(reduced) = ScalarFnArrayExt::scalar_fn(&array) + .reduce(&array, &ArrayReduceCtx { len: array.len() })? { return Ok(Some( reduced @@ -109,11 +112,25 @@ impl ArrayReduceRule for ScalarFnAbstractReduceRule { } } -impl ReduceNode for ArrayRef { - fn as_any(&self) -> &dyn Any { - self +impl ReduceNode for ArrayView<'_, ScalarFn> { + fn node_dtype(&self) -> VortexResult { + Ok(self.dtype().clone()) + } + + fn scalar_fn(&self) -> Option<&ScalarFnRef> { + Some(ScalarFnArrayExt::scalar_fn(self)) + } + + fn child(&self, idx: usize) -> ReduceNodeRef { + ReduceNodeRef::new(Arc::new(self.child_at(idx).clone())) + } + + fn child_count(&self) -> usize { + ScalarFnArrayExt::nchildren(self) } +} +impl ReduceNode for ArrayRef { fn node_dtype(&self) -> VortexResult { Ok(self.dtype().clone()) } @@ -123,7 +140,9 @@ impl ReduceNode for ArrayRef { } fn child(&self, idx: usize) -> ReduceNodeRef { - Arc::new(self.nth_child(idx).vortex_expect("child idx out of bounds")) + ReduceNodeRef::new(Arc::new( + self.nth_child(idx).vortex_expect("child idx out of bounds"), + )) } fn child_count(&self) -> usize { @@ -141,7 +160,7 @@ impl ReduceCtx for ArrayReduceCtx { scalar_fn: ScalarFnRef, children: &[ReduceNodeRef], ) -> VortexResult { - Ok(Arc::new( + Ok(ReduceNodeRef::new(Arc::new( ScalarFnArray::try_new( scalar_fn, children @@ -156,7 +175,7 @@ impl ReduceCtx for ArrayReduceCtx { self.len, )? .into_array(), - )) + ))) } } @@ -190,9 +209,12 @@ impl ArrayParentReduceRule for ScalarFnUnaryFilterPushDownRule { }) .try_collect()?; - let new_array = - ScalarFnArray::try_new(child.scalar_fn().clone(), new_children, parent.len())? - .into_array(); + let new_array = ScalarFnArray::try_new( + ScalarFnArrayExt::scalar_fn(&child).clone(), + new_children, + parent.len(), + )? + .into_array(); return Ok(Some(new_array)); } diff --git a/vortex-array/src/expr/optimize.rs b/vortex-array/src/expr/optimize.rs index 27959a96070..a47b58ffbfb 100644 --- a/vortex-array/src/expr/optimize.rs +++ b/vortex-array/src/expr/optimize.rs @@ -1,7 +1,6 @@ // SPDX-License-Identifier: Apache-2.0 // SPDX-FileCopyrightText: Copyright the Vortex contributors -use std::any::Any; use std::cell::RefCell; use std::ops::Deref; use std::sync::Arc; @@ -258,10 +257,6 @@ struct ExpressionReduceNode { } impl ReduceNode for ExpressionReduceNode { - fn as_any(&self) -> &dyn Any { - self - } - fn node_dtype(&self) -> VortexResult { self.expression.return_dtype(&self.scope) } @@ -271,10 +266,10 @@ impl ReduceNode for ExpressionReduceNode { } fn child(&self, idx: usize) -> ReduceNodeRef { - Arc::new(ExpressionReduceNode { + ReduceNodeRef::new(Arc::new(ExpressionReduceNode { expression: self.expression.child(idx).clone(), scope: self.scope.clone(), - }) + })) } fn child_count(&self) -> usize { @@ -285,6 +280,7 @@ impl ReduceNode for ExpressionReduceNode { struct ExpressionReduceCtx { scope: DType, } + impl ReduceCtx for ExpressionReduceCtx { fn new_node( &self, @@ -305,10 +301,10 @@ impl ReduceCtx for ExpressionReduceCtx { .collect::>(), )?; - Ok(Arc::new(ExpressionReduceNode { + Ok(ReduceNodeRef::new(Arc::new(ExpressionReduceNode { expression, scope: self.scope.clone(), - })) + }))) } } diff --git a/vortex-array/src/scalar_fn/fns/merge.rs b/vortex-array/src/scalar_fn/fns/merge.rs index 608ba69a255..c0b9d62ca6a 100644 --- a/vortex-array/src/scalar_fn/fns/merge.rs +++ b/vortex-array/src/scalar_fn/fns/merge.rs @@ -199,10 +199,10 @@ impl ScalarFnVTable for Merge { for name in child_dtype.names().iter() { if let Some(idx) = names.iter().position(|n| n == name) { duplicate_names.insert(name.clone()); - children[idx] = Arc::clone(&child); + children[idx] = child.clone(); } else { names.push(name.clone()); - children.push(Arc::clone(&child)); + children.push(child.clone()); } } diff --git a/vortex-array/src/scalar_fn/vtable.rs b/vortex-array/src/scalar_fn/vtable.rs index f4862f6876a..7ed7b1512ae 100644 --- a/vortex-array/src/scalar_fn/vtable.rs +++ b/vortex-array/src/scalar_fn/vtable.rs @@ -7,6 +7,7 @@ use std::fmt::Debug; use std::fmt::Display; use std::fmt::Formatter; use std::hash::Hash; +use std::ops::Deref; use std::sync::Arc; use arcref::ArcRef; @@ -116,7 +117,7 @@ pub trait ScalarFnVTable: 'static + Sized + Clone + Send + Sync { /// Implementations may assume correct arity and will panic or return nonsensical results if /// violated. /// - /// [`Expression::try_new`]: crate::expr::Expression::try_new + /// [`Expression::try_new`]: Expression::try_new fn return_dtype(&self, options: &Self::Options, args: &[DType]) -> VortexResult; /// Execute the expression over the input arguments. @@ -278,13 +279,29 @@ pub trait ReduceCtx { ) -> VortexResult; } -pub type ReduceNodeRef = Arc; +#[derive(Clone)] +pub struct ReduceNodeRef(Arc); + +impl ReduceNodeRef { + pub fn new(inner: Arc) -> Self { + Self(inner) + } + + pub fn as_any(&self) -> &dyn Any { + self + } +} + +impl Deref for ReduceNodeRef { + type Target = dyn ReduceNode; + + fn deref(&self) -> &Self::Target { + self.0.as_ref() + } +} /// A node used for implementing abstract reduction rules. pub trait ReduceNode { - /// Downcast to Any. - fn as_any(&self) -> &dyn Any; - /// Return the data type of this node. fn node_dtype(&self) -> VortexResult; From 471077321ba0a612562244728aae94608e89c9cb Mon Sep 17 00:00:00 2001 From: Robert Kruszewski Date: Sat, 30 May 2026 22:28:28 +0100 Subject: [PATCH 15/17] fixes Signed-off-by: Robert Kruszewski --- vortex-array/src/arrays/scalar_fn/rules.rs | 46 +++++++++++----------- vortex-array/src/expr/mod.rs | 1 + vortex-array/src/expr/optimize.rs | 37 +++++++++-------- vortex-array/src/scalar_fn/fns/merge.rs | 17 ++++---- vortex-array/src/scalar_fn/vtable.rs | 28 +++---------- 5 files changed, 56 insertions(+), 73 deletions(-) diff --git a/vortex-array/src/arrays/scalar_fn/rules.rs b/vortex-array/src/arrays/scalar_fn/rules.rs index e8ea5e6f5dc..cac44069384 100644 --- a/vortex-array/src/arrays/scalar_fn/rules.rs +++ b/vortex-array/src/arrays/scalar_fn/rules.rs @@ -6,6 +6,7 @@ use std::sync::Arc; use itertools::Itertools; use vortex_error::VortexExpect; use vortex_error::VortexResult; +use vortex_error::vortex_panic; use crate::ArrayRef; use crate::IntoArray; @@ -19,6 +20,7 @@ use crate::arrays::Slice; use crate::arrays::StructArray; use crate::arrays::scalar_fn::ScalarFnArrayExt; use crate::dtype::DType; +use crate::expr::ExpressionReduceNode; use crate::optimizer::rules::ArrayParentReduceRule; use crate::optimizer::rules::ArrayReduceRule; use crate::optimizer::rules::ParentRuleSet; @@ -100,19 +102,21 @@ impl ArrayReduceRule for ScalarFnAbstractReduceRule { if let Some(reduced) = ScalarFnArrayExt::scalar_fn(&array) .reduce(&array, &ArrayReduceCtx { len: array.len() })? { - return Ok(Some( - reduced - .as_any() - .downcast_ref::() - .vortex_expect("ReduceNode is not an ArrayRef") - .clone(), - )); + return Ok(Some(reduced.as_array())); } Ok(None) } } impl ReduceNode for ArrayView<'_, ScalarFn> { + fn as_array(&self) -> ArrayRef { + self.array().clone() + } + + fn as_expression(&self) -> ExpressionReduceNode { + vortex_panic!("Cannot convert ArrayView to ExpressionReduceNode") + } + fn node_dtype(&self) -> VortexResult { Ok(self.dtype().clone()) } @@ -122,7 +126,7 @@ impl ReduceNode for ArrayView<'_, ScalarFn> { } fn child(&self, idx: usize) -> ReduceNodeRef { - ReduceNodeRef::new(Arc::new(self.child_at(idx).clone())) + Arc::new(self.child_at(idx).clone()) } fn child_count(&self) -> usize { @@ -131,6 +135,14 @@ impl ReduceNode for ArrayView<'_, ScalarFn> { } impl ReduceNode for ArrayRef { + fn as_array(&self) -> ArrayRef { + self.clone() + } + + fn as_expression(&self) -> ExpressionReduceNode { + vortex_panic!("Cannot convert ArrayRef to ExpressionReduceNode") + } + fn node_dtype(&self) -> VortexResult { Ok(self.dtype().clone()) } @@ -140,9 +152,7 @@ impl ReduceNode for ArrayRef { } fn child(&self, idx: usize) -> ReduceNodeRef { - ReduceNodeRef::new(Arc::new( - self.nth_child(idx).vortex_expect("child idx out of bounds"), - )) + Arc::new(self.nth_child(idx).vortex_expect("child idx out of bounds")) } fn child_count(&self) -> usize { @@ -160,22 +170,14 @@ impl ReduceCtx for ArrayReduceCtx { scalar_fn: ScalarFnRef, children: &[ReduceNodeRef], ) -> VortexResult { - Ok(ReduceNodeRef::new(Arc::new( + Ok(Arc::new( ScalarFnArray::try_new( scalar_fn, - children - .iter() - .map(|c| { - c.as_any() - .downcast_ref::() - .vortex_expect("ReduceNode is not an ArrayRef") - .clone() - }) - .collect(), + children.iter().map(|c| c.as_array()).collect(), self.len, )? .into_array(), - ))) + )) } } diff --git a/vortex-array/src/expr/mod.rs b/vortex-array/src/expr/mod.rs index a5d32510443..d759e9be88d 100644 --- a/vortex-array/src/expr/mod.rs +++ b/vortex-array/src/expr/mod.rs @@ -42,6 +42,7 @@ pub mod traversal; pub use analysis::*; pub use expression::*; pub use exprs::*; +pub use optimize::ExpressionReduceNode; pub use pruning::StatsCatalog; pub trait VortexExprExt { diff --git a/vortex-array/src/expr/optimize.rs b/vortex-array/src/expr/optimize.rs index a47b58ffbfb..cb15b40eb5a 100644 --- a/vortex-array/src/expr/optimize.rs +++ b/vortex-array/src/expr/optimize.rs @@ -6,10 +6,11 @@ use std::ops::Deref; use std::sync::Arc; use itertools::Itertools; -use vortex_error::VortexExpect; use vortex_error::VortexResult; +use vortex_error::vortex_panic; use vortex_utils::aliases::hash_map::HashMap; +use crate::ArrayRef; use crate::dtype::DType; use crate::expr::Expression; use crate::expr::transform::match_between::find_between; @@ -82,12 +83,7 @@ impl Expression { scope: scope.clone(), }; if let Some(reduced) = current.scalar_fn().reduce(&reduce_node, &reduce_ctx)? { - let reduced_expr = reduced - .as_any() - .downcast_ref::() - .vortex_expect("ReduceNode not an ExpressionReduceNode") - .expression - .clone(); + let reduced_expr = reduced.as_expression().expression; current = reduced_expr; changed = true; any_optimizations = true; @@ -251,12 +247,21 @@ impl SimplifyCtx for SimplifyCache<'_> { } } -struct ExpressionReduceNode { +#[derive(Clone)] +pub struct ExpressionReduceNode { expression: Expression, scope: DType, } impl ReduceNode for ExpressionReduceNode { + fn as_array(&self) -> ArrayRef { + vortex_panic!("Cannot produce ArrayRef out of Expression node") + } + + fn as_expression(&self) -> ExpressionReduceNode { + self.clone() + } + fn node_dtype(&self) -> VortexResult { self.expression.return_dtype(&self.scope) } @@ -266,10 +271,10 @@ impl ReduceNode for ExpressionReduceNode { } fn child(&self, idx: usize) -> ReduceNodeRef { - ReduceNodeRef::new(Arc::new(ExpressionReduceNode { + Arc::new(ExpressionReduceNode { expression: self.expression.child(idx).clone(), scope: self.scope.clone(), - })) + }) } fn child_count(&self) -> usize { @@ -291,20 +296,14 @@ impl ReduceCtx for ExpressionReduceCtx { scalar_fn, children .iter() - .map(|c| { - c.as_any() - .downcast_ref::() - .vortex_expect("ReduceNode not an ExpressionReduceNode") - .expression - .clone() - }) + .map(|c| c.as_expression().expression) .collect::>(), )?; - Ok(ReduceNodeRef::new(Arc::new(ExpressionReduceNode { + Ok(Arc::new(ExpressionReduceNode { expression, scope: self.scope.clone(), - }))) + })) } } diff --git a/vortex-array/src/scalar_fn/fns/merge.rs b/vortex-array/src/scalar_fn/fns/merge.rs index c0b9d62ca6a..05ea9354eca 100644 --- a/vortex-array/src/scalar_fn/fns/merge.rs +++ b/vortex-array/src/scalar_fn/fns/merge.rs @@ -7,9 +7,9 @@ use std::hash::Hash; use std::sync::Arc; use itertools::Itertools as _; -use vortex_error::VortexExpect; use vortex_error::VortexResult; use vortex_error::vortex_bail; +use vortex_error::vortex_err; use vortex_session::VortexSession; use vortex_utils::aliases::hash_set::HashSet; @@ -185,24 +185,21 @@ impl ScalarFnVTable for Merge { for child in (0..node.child_count()).map(|i| node.child(i)) { let child_dtype = child.node_dtype()?; - if !child_dtype.is_struct() { - vortex_bail!( + + let child_dtype = child_dtype.as_struct_fields_opt().ok_or_else(|| { + vortex_err!( "Merge child must return a non-nullable struct dtype, got {}", child_dtype ) - } - - let child_dtype = child_dtype - .as_struct_fields_opt() - .vortex_expect("expected struct"); + })?; for name in child_dtype.names().iter() { if let Some(idx) = names.iter().position(|n| n == name) { duplicate_names.insert(name.clone()); - children[idx] = child.clone(); + children[idx] = Arc::clone(&child); } else { names.push(name.clone()); - children.push(child.clone()); + children.push(Arc::clone(&child)); } } diff --git a/vortex-array/src/scalar_fn/vtable.rs b/vortex-array/src/scalar_fn/vtable.rs index 7ed7b1512ae..0fa06e833d3 100644 --- a/vortex-array/src/scalar_fn/vtable.rs +++ b/vortex-array/src/scalar_fn/vtable.rs @@ -1,13 +1,11 @@ // SPDX-License-Identifier: Apache-2.0 // SPDX-FileCopyrightText: Copyright the Vortex contributors -use std::any::Any; use std::fmt; use std::fmt::Debug; use std::fmt::Display; use std::fmt::Formatter; use std::hash::Hash; -use std::ops::Deref; use std::sync::Arc; use arcref::ArcRef; @@ -21,6 +19,7 @@ use crate::ArrayRef; use crate::ExecutionCtx; use crate::dtype::DType; use crate::expr::Expression; +use crate::expr::ExpressionReduceNode; use crate::expr::StatsCatalog; use crate::expr::stats::Stat; use crate::expr::traversal::Node; @@ -279,29 +278,14 @@ pub trait ReduceCtx { ) -> VortexResult; } -#[derive(Clone)] -pub struct ReduceNodeRef(Arc); - -impl ReduceNodeRef { - pub fn new(inner: Arc) -> Self { - Self(inner) - } - - pub fn as_any(&self) -> &dyn Any { - self - } -} - -impl Deref for ReduceNodeRef { - type Target = dyn ReduceNode; - - fn deref(&self) -> &Self::Target { - self.0.as_ref() - } -} +pub type ReduceNodeRef = Arc; /// A node used for implementing abstract reduction rules. pub trait ReduceNode { + fn as_array(&self) -> ArrayRef; + + fn as_expression(&self) -> ExpressionReduceNode; + /// Return the data type of this node. fn node_dtype(&self) -> VortexResult; From b67524569ad8103ad9c0f610bd1d58e62bd363af Mon Sep 17 00:00:00 2001 From: Robert Kruszewski Date: Sun, 31 May 2026 00:59:09 +0100 Subject: [PATCH 16/17] more Signed-off-by: Robert Kruszewski --- vortex-array/src/array/erased.rs | 2 +- vortex-array/src/array/mod.rs | 11 +++++++---- vortex-array/src/array/vtable/mod.rs | 4 ++-- vortex-array/src/arrays/patched/vtable/mod.rs | 14 +++++++------- vortex-array/src/arrays/scalar_fn/array.rs | 6 ++++-- vortex-array/src/arrays/scalar_fn/rules.rs | 6 +++++- vortex-array/src/arrays/scalar_fn/vtable/mod.rs | 1 + vortex-python/src/arrays/py/vtable.rs | 2 +- 8 files changed, 28 insertions(+), 18 deletions(-) diff --git a/vortex-array/src/array/erased.rs b/vortex-array/src/array/erased.rs index 0d48a955421..21a1a98807b 100644 --- a/vortex-array/src/array/erased.rs +++ b/vortex-array/src/array/erased.rs @@ -637,7 +637,7 @@ impl ArrayRef { } /// Returns the nth child of the array without allocating a Vec. - pub fn nth_child(&self, idx: usize) -> Option { + pub fn nth_child(&self, idx: usize) -> Option<&ArrayRef> { self.0.data.nth_child(self, idx) } diff --git a/vortex-array/src/array/mod.rs b/vortex-array/src/array/mod.rs index 6b6746ee326..5204d5a9cf7 100644 --- a/vortex-array/src/array/mod.rs +++ b/vortex-array/src/array/mod.rs @@ -96,7 +96,7 @@ pub(crate) trait DynArrayData: 'static + private::Sealed + Send + Sync + Debug { /// Returns the nth child of the array without allocating a Vec. /// /// Returns `None` if the index is out of bounds. - fn nth_child(&self, this: &ArrayRef, idx: usize) -> Option; + fn nth_child<'a>(&'a self, this: &'a ArrayRef, idx: usize) -> Option<&'a ArrayRef>; /// Returns the names of the children of the array. fn children_names(&self, this: &ArrayRef) -> Vec; @@ -281,7 +281,10 @@ impl DynArrayData for ArrayData { fn children(&self, this: &ArrayRef) -> Vec { let view = unsafe { ArrayView::new_unchecked(this, &self.data) }; - (0..V::nchildren(view)).map(|i| V::child(view, i)).collect() + (0..V::nchildren(view)) + .map(|i| V::child(view, i)) + .cloned() + .collect() } fn nchildren(&self, this: &ArrayRef) -> usize { @@ -289,7 +292,7 @@ impl DynArrayData for ArrayData { V::nchildren(view) } - fn nth_child(&self, this: &ArrayRef, idx: usize) -> Option { + fn nth_child<'a>(&'a self, this: &'a ArrayRef, idx: usize) -> Option<&'a ArrayRef> { let view = unsafe { ArrayView::new_unchecked(this, &self.data) }; (idx < V::nchildren(view)).then(|| V::child(view, idx)) } @@ -304,7 +307,7 @@ impl DynArrayData for ArrayData { fn named_children(&self, this: &ArrayRef) -> Vec<(String, ArrayRef)> { let view = unsafe { ArrayView::new_unchecked(this, &self.data) }; (0..V::nchildren(view)) - .map(|i| (V::child_name(view, i), V::child(view, i))) + .map(|i| (V::child_name(view, i), V::child(view, i).clone())) .collect() } diff --git a/vortex-array/src/array/vtable/mod.rs b/vortex-array/src/array/vtable/mod.rs index a62fd2bfe62..00aef6d9c0a 100644 --- a/vortex-array/src/array/vtable/mod.rs +++ b/vortex-array/src/array/vtable/mod.rs @@ -97,11 +97,11 @@ pub trait VTable: 'static + Clone + Sized + Send + Sync + Debug { /// /// # Panics /// Panics if `idx >= nchildren(array)`. - fn child(array: ArrayView<'_, Self>, idx: usize) -> ArrayRef { + fn child(array: ArrayView<'_, Self>, idx: usize) -> &ArrayRef { array .slots() .iter() - .filter_map(|s| s.clone()) + .filter_map(|s| s.as_ref()) .nth(idx) .vortex_expect("child index out of bounds") } diff --git a/vortex-array/src/arrays/patched/vtable/mod.rs b/vortex-array/src/arrays/patched/vtable/mod.rs index a89c1539985..c8ae1076fa5 100644 --- a/vortex-array/src/arrays/patched/vtable/mod.rs +++ b/vortex-array/src/arrays/patched/vtable/mod.rs @@ -127,14 +127,14 @@ impl VTable for Patched { vortex_panic!("invalid buffer index for PatchedArray: {idx}"); } - fn child(array: ArrayView<'_, Self>, idx: usize) -> ArrayRef { - match idx { - PatchedSlots::INNER => array.inner().clone(), - PatchedSlots::LANE_OFFSETS => array.lane_offsets().clone(), - PatchedSlots::PATCH_INDICES => array.patch_indices().clone(), - PatchedSlots::PATCH_VALUES => array.patch_values().clone(), - _ => vortex_panic!("invalid child index for PatchedArray: {idx}"), + fn child(array: ArrayView<'_, Self>, idx: usize) -> &ArrayRef { + if idx > PatchedSlots::PATCH_VALUES { + vortex_panic!("invalid child index for PatchedArray: {idx}") } + + array.slots()[idx] + .as_ref() + .vortex_expect("child slot is None") } fn serialize( diff --git a/vortex-array/src/arrays/scalar_fn/array.rs b/vortex-array/src/arrays/scalar_fn/array.rs index a37f89f3cf8..577eb88f590 100644 --- a/vortex-array/src/arrays/scalar_fn/array.rs +++ b/vortex-array/src/arrays/scalar_fn/array.rs @@ -99,6 +99,7 @@ impl Array { /// `ArrayRef`, so callers can drive the parts through /// [`ParentRef::from_parts`](crate::ParentRef::from_parts)`.optimize()` and only pay /// the wrapper allocation when no reduction fires. + #[inline] pub fn try_new_parts( scalar_fn: ScalarFnRef, children: Vec, @@ -106,8 +107,9 @@ impl Array { ) -> VortexResult> { let arg_dtypes: Vec<_> = children.iter().map(|c| c.dtype().clone()).collect(); let dtype = scalar_fn.return_dtype(&arg_dtypes)?; - let data = ScalarFnData::build(scalar_fn.clone(), children.clone(), len)?; - let vtable = ScalarFn { id: scalar_fn.id() }; + let id = scalar_fn.id(); + let data = ScalarFnData::build(scalar_fn, children.clone(), len)?; + let vtable = ScalarFn { id }; Ok(ArrayParts::new(vtable, dtype, len, data) .with_slots(children.into_iter().map(Some).collect::())) } diff --git a/vortex-array/src/arrays/scalar_fn/rules.rs b/vortex-array/src/arrays/scalar_fn/rules.rs index cac44069384..c4b9fec64a8 100644 --- a/vortex-array/src/arrays/scalar_fn/rules.rs +++ b/vortex-array/src/arrays/scalar_fn/rules.rs @@ -152,7 +152,11 @@ impl ReduceNode for ArrayRef { } fn child(&self, idx: usize) -> ReduceNodeRef { - Arc::new(self.nth_child(idx).vortex_expect("child idx out of bounds")) + Arc::new( + self.nth_child(idx) + .vortex_expect("child idx out of bounds") + .clone(), + ) } fn child_count(&self) -> usize { diff --git a/vortex-array/src/arrays/scalar_fn/vtable/mod.rs b/vortex-array/src/arrays/scalar_fn/vtable/mod.rs index ef6f9f522ea..276d6b9220a 100644 --- a/vortex-array/src/arrays/scalar_fn/vtable/mod.rs +++ b/vortex-array/src/arrays/scalar_fn/vtable/mod.rs @@ -174,6 +174,7 @@ pub trait ScalarFnFactoryExt: scalar_fn::ScalarFnVTable { /// Stops short of allocating the backing `ArrayRef`, so callers can drive the parts /// through [`ParentRef::from_parts`]`.optimize()` and only pay the wrapper allocation /// when no reduction fires. + #[inline] fn try_new_array_parts( &self, len: usize, diff --git a/vortex-python/src/arrays/py/vtable.rs b/vortex-python/src/arrays/py/vtable.rs index 9e1461f3c51..b138233fb6c 100644 --- a/vortex-python/src/arrays/py/vtable.rs +++ b/vortex-python/src/arrays/py/vtable.rs @@ -87,7 +87,7 @@ impl VTable for PythonVTable { 0 } - fn child(_array: ArrayView<'_, Self>, idx: usize) -> ArrayRef { + fn child(_array: ArrayView<'_, Self>, idx: usize) -> &ArrayRef { vortex_panic!("PythonArray child index {idx} out of bounds") } From da6d4561c6358f0add4bf5722df4c91b5e4d443d Mon Sep 17 00:00:00 2001 From: Robert Kruszewski Date: Sun, 31 May 2026 01:53:30 +0100 Subject: [PATCH 17/17] owned Signed-off-by: Robert Kruszewski --- encodings/datetime-parts/src/compute/rules.rs | 3 +- vortex-array/src/array/erased.rs | 6 +-- vortex-array/src/array/parent.rs | 38 +++++++++++++++++++ .../src/arrays/chunked/compute/rules.rs | 5 +-- vortex-array/src/arrays/dict/compute/like.rs | 3 +- vortex-array/src/arrays/dict/compute/rules.rs | 5 +-- vortex-array/src/arrays/dict/take.rs | 3 +- vortex-array/src/arrays/filter/kernel.rs | 3 +- vortex-array/src/arrays/scalar_fn/array.rs | 23 +++++------ .../src/arrays/scalar_fn/vtable/mod.rs | 4 +- vortex-array/src/builtins.rs | 23 ++++++----- vortex-array/src/validity.rs | 3 +- vortex-layout/src/layouts/dict/reader.rs | 3 +- 13 files changed, 73 insertions(+), 49 deletions(-) diff --git a/encodings/datetime-parts/src/compute/rules.rs b/encodings/datetime-parts/src/compute/rules.rs index 60c50cdab75..81efb3d5f6f 100644 --- a/encodings/datetime-parts/src/compute/rules.rs +++ b/encodings/datetime-parts/src/compute/rules.rs @@ -4,7 +4,6 @@ use vortex_array::ArrayRef; use vortex_array::ArrayView; use vortex_array::IntoArray; -use vortex_array::ParentRef; use vortex_array::arrays::Constant; use vortex_array::arrays::ConstantArray; use vortex_array::arrays::Filter; @@ -135,7 +134,7 @@ impl ArrayParentReduceRule for DTPComparisonPushDownRule { let parts = ScalarFnArray::try_new_parts(parent.scalar_fn().clone(), new_children, parent.len())?; - let result = ParentRef::from_parts(&parts).optimize()?; + let result = parts.optimize()?; Ok(Some(result)) } diff --git a/vortex-array/src/array/erased.rs b/vortex-array/src/array/erased.rs index 21a1a98807b..53311cb104f 100644 --- a/vortex-array/src/array/erased.rs +++ b/vortex-array/src/array/erased.rs @@ -229,7 +229,7 @@ impl ArrayRef { } let sliced = SliceArray::try_new_parts(self.clone(), range)?; - let sliced = ParentRef::from_parts(&sliced).optimize()?; + let sliced = sliced.optimize()?; // Propagate some stats from the original array to the sliced array. if !sliced.is::() { @@ -255,13 +255,13 @@ impl ArrayRef { /// Wraps the array in a [`FilterArray`] such that it is logically filtered by the given mask. pub fn filter(&self, mask: Mask) -> VortexResult { let parts = FilterArray::try_new_parts(self.clone(), mask)?; - ParentRef::from_parts(&parts).optimize() + parts.optimize() } /// Wraps the array in a [`DictArray`] such that it is logically taken by the given indices. pub fn take(&self, indices: ArrayRef) -> VortexResult { let parts = DictArray::try_new_parts(indices, self.clone())?; - ParentRef::from_parts(&parts).optimize() + parts.optimize() } /// Fetch the scalar at the given index. diff --git a/vortex-array/src/array/parent.rs b/vortex-array/src/array/parent.rs index dead35d5519..22ae43865cb 100644 --- a/vortex-array/src/array/parent.rs +++ b/vortex-array/src/array/parent.rs @@ -241,6 +241,14 @@ impl<'a> ParentRef<'a> { } } + /// Consume this `ParentRef` and return the cached materialization, if one exists. + /// + /// This is used by owned [`ArrayParts::optimize`] to avoid materializing twice when + /// a stack-backed parent was forced into an [`ArrayRef`] by a rule that did not fire. + fn into_cached_array_ref(self) -> Option { + self.cache.into_inner() + } + /// Returns `true` if this parent's encoding matches `V`. /// /// Cheap encoding-id check that works for both heap- and stack-backed parents @@ -306,6 +314,36 @@ impl<'a> ParentRef<'a> { } } +impl ArrayParts { + /// Optimize already-valid construction parts, consuming the original parts on a miss. + /// + /// This mirrors [`ParentRef::optimize`], but keeps ownership of the original + /// [`ArrayParts`] until it knows whether a reduction fired. If no rule applies and + /// the stack-backed parent was not materialized by a rule, the result is built with + /// [`ArrayParts::into_array`] directly rather than cloning the parts through + /// [`ParentRef::into_array_ref`]. + pub fn optimize(self) -> VortexResult { + let parent = ParentRef::from_parts(&self); + if let Some(reduced) = parent.reduce()? { + return reduced.optimize(); + } + + for (slot_idx, slot) in parent.slots.iter().enumerate() { + let Some(child) = slot else { continue }; + + if let Some(reduced) = child.reduce_parent(&parent, slot_idx)? { + return reduced.optimize(); + } + } + + if let Some(cached) = parent.into_cached_array_ref() { + return Ok(cached); + } + + Ok(self.into_array()) + } +} + impl Debug for ParentRef<'_> { fn fmt(&self, f: &mut Formatter<'_>) -> std::fmt::Result { let heap_backed = matches!(self.data, ParentData::Heap { .. }); diff --git a/vortex-array/src/arrays/chunked/compute/rules.rs b/vortex-array/src/arrays/chunked/compute/rules.rs index 973879c1590..ba2311cdcb9 100644 --- a/vortex-array/src/arrays/chunked/compute/rules.rs +++ b/vortex-array/src/arrays/chunked/compute/rules.rs @@ -6,7 +6,6 @@ use vortex_error::VortexResult; use crate::ArrayRef; use crate::IntoArray; -use crate::ParentRef; use crate::array::ArrayView; use crate::arrays::Chunked; use crate::arrays::ChunkedArray; @@ -53,7 +52,7 @@ impl ArrayParentReduceRule for ChunkedUnaryScalarFnPushDownRule { vec![chunk.clone()], chunk.len(), )?; - ParentRef::from_parts(&parts).optimize() + parts.optimize() }) .try_collect()?; @@ -108,7 +107,7 @@ impl ArrayParentReduceRule for ChunkedConstantScalarFnPushDownRule { new_children, chunk.len(), )?; - ParentRef::from_parts(&parts).optimize() + parts.optimize() }) .try_collect()?; diff --git a/vortex-array/src/arrays/dict/compute/like.rs b/vortex-array/src/arrays/dict/compute/like.rs index d74af84db03..76ca090362d 100644 --- a/vortex-array/src/arrays/dict/compute/like.rs +++ b/vortex-array/src/arrays/dict/compute/like.rs @@ -7,7 +7,6 @@ use super::Dict; use super::DictArray; use crate::ArrayRef; use crate::IntoArray; -use crate::ParentRef; use crate::array::ArrayView; use crate::arrays::ConstantArray; use crate::arrays::dict::DictArrayExt; @@ -35,7 +34,7 @@ impl LikeReduce for Dict { options, [array.values().clone(), pattern], )?; - let values = ParentRef::from_parts(&parts).optimize()?; + let values = parts.optimize()?; // SAFETY: LIKE preserves the len of the values, so codes are still pointing at // valid positions. diff --git a/vortex-array/src/arrays/dict/compute/rules.rs b/vortex-array/src/arrays/dict/compute/rules.rs index a18f7e17d37..aadbd09b002 100644 --- a/vortex-array/src/arrays/dict/compute/rules.rs +++ b/vortex-array/src/arrays/dict/compute/rules.rs @@ -6,7 +6,6 @@ use vortex_error::VortexResult; use crate::ArrayEq; use crate::ArrayRef; use crate::IntoArray; -use crate::ParentRef; use crate::Precision; use crate::array::ArrayView; use crate::array::VTable; @@ -128,7 +127,7 @@ impl ArrayParentReduceRule for DictionaryScalarFnValuesPushDownRule { let parts = ScalarFnArray::try_new_parts(parent.scalar_fn().clone(), new_children, values_len)?; - let new_values = ParentRef::from_parts(&parts).optimize()?; + let new_values = parts.optimize()?; // We can only push down null-sensitive functions when we have all-valid codes. // In these cases, we cannot have the codes influence the nullability of the output DType. @@ -197,7 +196,7 @@ impl ArrayParentReduceRule for DictionaryScalarFnCodesPullUpRule { new_children, array.values().len(), )?; - let new_values = ParentRef::from_parts(&parts).optimize()?; + let new_values = parts.optimize()?; let new_dict = unsafe { DictArray::new_unchecked(array.codes().clone(), new_values) }.into_array(); diff --git a/vortex-array/src/arrays/dict/take.rs b/vortex-array/src/arrays/dict/take.rs index 7ff1b659337..391dc88c6d6 100644 --- a/vortex-array/src/arrays/dict/take.rs +++ b/vortex-array/src/arrays/dict/take.rs @@ -176,7 +176,6 @@ mod tests { use vortex_error::VortexResult; use crate::IntoArray; - use crate::ParentRef; use crate::arrays::Constant; use crate::arrays::ConstantArray; use crate::arrays::DictArray; @@ -188,7 +187,7 @@ mod tests { let values = ConstantArray::new(7i32, 1).into_array(); let parts = DictArray::try_new_parts(indices, values)?; - let reduced = ParentRef::from_parts(&parts).optimize()?; + let reduced = parts.optimize()?; assert!(reduced.is::()); assert_eq!(reduced.len(), 3); diff --git a/vortex-array/src/arrays/filter/kernel.rs b/vortex-array/src/arrays/filter/kernel.rs index 0f75770999d..3cdbdcd6a7a 100644 --- a/vortex-array/src/arrays/filter/kernel.rs +++ b/vortex-array/src/arrays/filter/kernel.rs @@ -137,7 +137,6 @@ mod tests { use vortex_mask::Mask; use crate::IntoArray; - use crate::ParentRef; use crate::arrays::Constant; use crate::arrays::ConstantArray; use crate::arrays::FilterArray; @@ -147,7 +146,7 @@ mod tests { let child = ConstantArray::new(7i32, 4).into_array(); let parts = FilterArray::try_new_parts(child, Mask::from_iter([true, false, true, false]))?; - let reduced = ParentRef::from_parts(&parts).optimize()?; + let reduced = parts.optimize()?; assert!(reduced.is::()); assert_eq!(reduced.len(), 2); diff --git a/vortex-array/src/arrays/scalar_fn/array.rs b/vortex-array/src/arrays/scalar_fn/array.rs index 577eb88f590..627f1515811 100644 --- a/vortex-array/src/arrays/scalar_fn/array.rs +++ b/vortex-array/src/arrays/scalar_fn/array.rs @@ -31,16 +31,8 @@ impl Display for ScalarFnData { impl ScalarFnData { /// Create a new ScalarFnArray from a scalar function and its children. - pub fn build( - scalar_fn: ScalarFnRef, - children: Vec, - len: usize, - ) -> VortexResult { - vortex_ensure!( - children.iter().all(|c| c.len() == len), - "ScalarFnArray must have children equal to the array length" - ); - Ok(Self { scalar_fn }) + fn build(scalar_fn: ScalarFnRef) -> Self { + Self { scalar_fn } } /// Get the scalar function bound to this array. @@ -96,19 +88,22 @@ impl Array { /// Build the [`ArrayParts`] for a ScalarFnArray without materializing it. /// /// Mirrors [`try_new`](Self::try_new) but stops short of allocating the backing - /// `ArrayRef`, so callers can drive the parts through - /// [`ParentRef::from_parts`](crate::ParentRef::from_parts)`.optimize()` and only pay - /// the wrapper allocation when no reduction fires. + /// `ArrayRef`, so callers can drive the parts through [`ArrayParts::optimize`] and + /// only pay the wrapper allocation when no reduction fires. #[inline] pub fn try_new_parts( scalar_fn: ScalarFnRef, children: Vec, len: usize, ) -> VortexResult> { + vortex_ensure!( + children.iter().all(|c| c.len() == len), + "ScalarFnArray must have children equal to the array length" + ); let arg_dtypes: Vec<_> = children.iter().map(|c| c.dtype().clone()).collect(); let dtype = scalar_fn.return_dtype(&arg_dtypes)?; let id = scalar_fn.id(); - let data = ScalarFnData::build(scalar_fn, children.clone(), len)?; + let data = ScalarFnData::build(scalar_fn); let vtable = ScalarFn { id }; Ok(ArrayParts::new(vtable, dtype, len, data) .with_slots(children.into_iter().map(Some).collect::())) diff --git a/vortex-array/src/arrays/scalar_fn/vtable/mod.rs b/vortex-array/src/arrays/scalar_fn/vtable/mod.rs index 276d6b9220a..a86a933b08e 100644 --- a/vortex-array/src/arrays/scalar_fn/vtable/mod.rs +++ b/vortex-array/src/arrays/scalar_fn/vtable/mod.rs @@ -172,8 +172,8 @@ pub trait ScalarFnFactoryExt: scalar_fn::ScalarFnVTable { /// Build the [`ArrayParts`] for this scalar function applied to `children`. /// /// Stops short of allocating the backing `ArrayRef`, so callers can drive the parts - /// through [`ParentRef::from_parts`]`.optimize()` and only pay the wrapper allocation - /// when no reduction fires. + /// through [`ArrayParts::optimize`] and only pay the wrapper allocation when no + /// reduction fires. #[inline] fn try_new_array_parts( &self, diff --git a/vortex-array/src/builtins.rs b/vortex-array/src/builtins.rs index 04a6c7b7b9b..37ceead9aa7 100644 --- a/vortex-array/src/builtins.rs +++ b/vortex-array/src/builtins.rs @@ -13,7 +13,6 @@ use vortex_error::VortexResult; use crate::ArrayRef; use crate::IntoArray; -use crate::ParentRef; use crate::arrays::ConstantArray; use crate::arrays::scalar_fn::ScalarFnFactoryExt; use crate::dtype::DType; @@ -161,7 +160,7 @@ impl ArrayBuiltins for ArrayRef { return Ok(self.clone()); } let parts = Cast.try_new_array_parts(self.len(), dtype, [self.clone()])?; - ParentRef::from_parts(&parts).optimize() + parts.optimize() } fn fill_null(&self, fill_value: impl Into) -> VortexResult { @@ -177,49 +176,49 @@ impl ArrayBuiltins for ArrayRef { ConstantArray::new(fill_value, self.len()).into_array(), ], )?; - ParentRef::from_parts(&parts).optimize() + parts.optimize() } fn get_item(&self, field_name: impl Into) -> VortexResult { let parts = GetItem.try_new_array_parts(self.len(), field_name.into(), [self.clone()])?; - ParentRef::from_parts(&parts).optimize() + parts.optimize() } fn is_null(&self) -> VortexResult { let parts = IsNull.try_new_array_parts(self.len(), EmptyOptions, [self.clone()])?; - ParentRef::from_parts(&parts).optimize() + parts.optimize() } fn is_not_null(&self) -> VortexResult { let parts = IsNotNull.try_new_array_parts(self.len(), EmptyOptions, [self.clone()])?; - ParentRef::from_parts(&parts).optimize() + parts.optimize() } fn mask(self, mask: ArrayRef) -> VortexResult { let parts = Mask.try_new_array_parts(self.len(), EmptyOptions, [self, mask])?; - ParentRef::from_parts(&parts).optimize() + parts.optimize() } fn not(&self) -> VortexResult { let parts = Not.try_new_array_parts(self.len(), EmptyOptions, [self.clone()])?; - ParentRef::from_parts(&parts).optimize() + parts.optimize() } fn zip(&self, if_true: ArrayRef, if_false: ArrayRef) -> VortexResult { let parts = Zip.try_new_array_parts(self.len(), EmptyOptions, [if_true, if_false, self.clone()])?; - ParentRef::from_parts(&parts).optimize() + parts.optimize() } fn list_contains(&self, value: ArrayRef) -> VortexResult { let parts = ListContains.try_new_array_parts(self.len(), EmptyOptions, [self.clone(), value])?; - ParentRef::from_parts(&parts).optimize() + parts.optimize() } fn binary(&self, rhs: ArrayRef, op: Operator) -> VortexResult { let parts = Binary.try_new_array_parts(self.len(), op, [self.clone(), rhs])?; - ParentRef::from_parts(&parts).optimize() + parts.optimize() } fn between( @@ -229,6 +228,6 @@ impl ArrayBuiltins for ArrayRef { options: BetweenOptions, ) -> VortexResult { let parts = Between.try_new_array_parts(self.len(), options, [self, lower, upper])?; - ParentRef::from_parts(&parts).optimize() + parts.optimize() } } diff --git a/vortex-array/src/validity.rs b/vortex-array/src/validity.rs index c60a164a5d6..314b9992fed 100644 --- a/vortex-array/src/validity.rs +++ b/vortex-array/src/validity.rs @@ -20,7 +20,6 @@ use crate::Canonical; use crate::ExecutionCtx; use crate::IntoArray; use crate::LEGACY_SESSION; -use crate::ParentRef; use crate::VortexSessionExecute; use crate::arrays::BoolArray; use crate::arrays::ChunkedArray; @@ -267,7 +266,7 @@ impl Validity { // Here we actually have to do some work (Validity::Array(lhs), Validity::Array(rhs)) => { let parts = Binary.try_new_array_parts(lhs.len(), Operator::And, [lhs, rhs])?; - Validity::Array(ParentRef::from_parts(&parts).optimize()?) + Validity::Array(parts.optimize()?) } }) } diff --git a/vortex-layout/src/layouts/dict/reader.rs b/vortex-layout/src/layouts/dict/reader.rs index 1bb286a3b6c..424eb635c41 100644 --- a/vortex-layout/src/layouts/dict/reader.rs +++ b/vortex-layout/src/layouts/dict/reader.rs @@ -14,7 +14,6 @@ use futures::try_join; use vortex_array::ArrayRef; use vortex_array::IntoArray; use vortex_array::MaskFuture; -use vortex_array::ParentRef; use vortex_array::VortexSessionExecute; use vortex_array::arrays::DictArray; use vortex_array::arrays::SharedArray; @@ -243,7 +242,7 @@ impl LayoutReader for DictReader { // only affect correctness not memory safety. let parts = unsafe { DictArray::new_unchecked_parts(codes, values, all_values_referenced) }; - let array = ParentRef::from_parts(&parts).optimize()?; + let array = parts.optimize()?; array.apply(&expr) }