diff --git a/encodings/alp/src/alp/array.rs b/encodings/alp/src/alp/array.rs index cac93d1d27d..f3ed1831d6a 100644 --- a/encodings/alp/src/alp/array.rs +++ b/encodings/alp/src/alp/array.rs @@ -19,6 +19,7 @@ use vortex_array::ArrayView; use vortex_array::ExecutionCtx; use vortex_array::ExecutionResult; use vortex_array::IntoArray; +use vortex_array::ParentRef; use vortex_array::Precision; use vortex_array::TypedArrayRef; use vortex_array::array_slots; @@ -183,7 +184,7 @@ impl VTable for ALP { fn reduce_parent( array: ArrayView<'_, Self>, - parent: &ArrayRef, + parent: &ParentRef<'_>, child_idx: usize, ) -> VortexResult> { RULES.evaluate(array, parent, child_idx) @@ -421,8 +422,8 @@ pub trait ALPArrayExt: ALPArraySlotsExt { fn patches(&self) -> Option { PatchesData::patches_from_slots( self.patches_data.as_ref(), - self.as_ref().len(), - self.as_ref().slots(), + self.len(), + self.slots(), PATCH_SLOTS, ) } diff --git a/encodings/alp/src/alp_rd/array.rs b/encodings/alp/src/alp_rd/array.rs index 099b9e1a3ea..b186d49a02a 100644 --- a/encodings/alp/src/alp_rd/array.rs +++ b/encodings/alp/src/alp_rd/array.rs @@ -22,6 +22,7 @@ use vortex_array::ExecutionCtx; use vortex_array::ExecutionResult; use vortex_array::IntoArray; use vortex_array::LEGACY_SESSION; +use vortex_array::ParentRef; use vortex_array::Precision; use vortex_array::TypedArrayRef; use vortex_array::VortexSessionExecute; @@ -302,7 +303,7 @@ impl VTable for ALPRD { fn reduce_parent( array: ArrayView<'_, Self>, - parent: &ArrayRef, + parent: &ParentRef<'_>, child_idx: usize, ) -> VortexResult> { RULES.evaluate(array, parent, child_idx) @@ -578,11 +579,11 @@ fn validate_parts( pub trait ALPRDArrayExt: TypedArrayRef { fn left_parts(&self) -> &ArrayRef { - left_parts_from_slots(self.as_ref().slots()) + left_parts_from_slots(self.slots()) } fn right_parts(&self) -> &ArrayRef { - right_parts_from_slots(self.as_ref().slots()) + right_parts_from_slots(self.slots()) } fn right_bit_width(&self) -> u8 { @@ -590,11 +591,7 @@ pub trait ALPRDArrayExt: TypedArrayRef { } fn left_parts_patches(&self) -> Option { - patches_from_slots( - self.as_ref().slots(), - self.patches_data.as_ref(), - self.as_ref().len(), - ) + patches_from_slots(self.slots(), self.patches_data.as_ref(), self.len()) } fn left_parts_dictionary(&self) -> &Buffer { diff --git a/encodings/bytebool/src/array.rs b/encodings/bytebool/src/array.rs index 40da3809d01..a574bd24516 100644 --- a/encodings/bytebool/src/array.rs +++ b/encodings/bytebool/src/array.rs @@ -17,6 +17,7 @@ use vortex_array::ArrayView; use vortex_array::ExecutionCtx; use vortex_array::ExecutionResult; use vortex_array::IntoArray; +use vortex_array::ParentRef; use vortex_array::Precision; use vortex_array::TypedArrayRef; use vortex_array::arrays::BoolArray; @@ -143,7 +144,7 @@ impl VTable for ByteBool { fn reduce_parent( array: ArrayView<'_, Self>, - parent: &ArrayRef, + parent: &ParentRef<'_>, child_idx: usize, ) -> VortexResult> { crate::rules::RULES.evaluate(array, parent, child_idx) @@ -187,8 +188,8 @@ impl Display for ByteBoolData { pub trait ByteBoolArrayExt: TypedArrayRef { fn validity(&self) -> Validity { child_to_validity( - self.as_ref().slots()[VALIDITY_SLOT].as_ref(), - self.as_ref().dtype().nullability(), + self.slots()[VALIDITY_SLOT].as_ref(), + self.dtype().nullability(), ) } } diff --git a/encodings/datetime-parts/src/array.rs b/encodings/datetime-parts/src/array.rs index e603b1ddfe5..db773010bbf 100644 --- a/encodings/datetime-parts/src/array.rs +++ b/encodings/datetime-parts/src/array.rs @@ -18,6 +18,7 @@ use vortex_array::ArrayView; use vortex_array::ExecutionCtx; use vortex_array::ExecutionResult; use vortex_array::IntoArray; +use vortex_array::ParentRef; use vortex_array::Precision; use vortex_array::array_slots; use vortex_array::arrays::Primitive; @@ -195,7 +196,7 @@ impl VTable for DateTimeParts { fn reduce_parent( array: ArrayView<'_, Self>, - parent: &ArrayRef, + parent: &ParentRef<'_>, child_idx: usize, ) -> VortexResult> { PARENT_RULES.evaluate(array, parent, child_idx) diff --git a/encodings/datetime-parts/src/compute/rules.rs b/encodings/datetime-parts/src/compute/rules.rs index a8b1b2eca83..81efb3d5f6f 100644 --- a/encodings/datetime-parts/src/compute/rules.rs +++ b/encodings/datetime-parts/src/compute/rules.rs @@ -16,7 +16,6 @@ use vortex_array::arrays::slice::SliceReduceAdaptor; use vortex_array::builtins::ArrayBuiltins; use vortex_array::dtype::DType; use vortex_array::extension::datetime::Timestamp; -use vortex_array::optimizer::ArrayOptimizer; use vortex_array::optimizer::rules::ArrayParentReduceRule; use vortex_array::optimizer::rules::ParentRuleSet; use vortex_array::scalar_fn::fns::between::Between; @@ -133,10 +132,9 @@ impl ArrayParentReduceRule for DTPComparisonPushDownRule { } } - let result = - ScalarFnArray::try_new(parent.scalar_fn().clone(), new_children, parent.len())? - .into_array() - .optimize()?; + let parts = + ScalarFnArray::try_new_parts(parent.scalar_fn().clone(), new_children, parent.len())?; + let result = parts.optimize()?; Ok(Some(result)) } diff --git a/encodings/decimal-byte-parts/src/decimal_byte_parts/mod.rs b/encodings/decimal-byte-parts/src/decimal_byte_parts/mod.rs index 76349f40910..e56f5cbb02b 100644 --- a/encodings/decimal-byte-parts/src/decimal_byte_parts/mod.rs +++ b/encodings/decimal-byte-parts/src/decimal_byte_parts/mod.rs @@ -20,6 +20,7 @@ use vortex_array::ArrayRef; use vortex_array::ExecutionCtx; use vortex_array::ExecutionResult; use vortex_array::IntoArray; +use vortex_array::ParentRef; use vortex_array::Precision; use vortex_array::TypedArrayRef; use vortex_array::arrays::DecimalArray; @@ -156,7 +157,7 @@ impl VTable for DecimalByteParts { fn reduce_parent( array: ArrayView<'_, Self>, - parent: &ArrayRef, + parent: &ParentRef<'_>, child_idx: usize, ) -> VortexResult> { PARENT_RULES.evaluate(array, parent, child_idx) @@ -206,7 +207,7 @@ pub struct DecimalBytePartsDataParts { pub trait DecimalBytePartsArrayExt: TypedArrayRef { fn msp(&self) -> &ArrayRef { - self.as_ref().slots()[MSP_SLOT] + self.slots()[MSP_SLOT] .as_ref() .vortex_expect("DecimalBytePartsArray msp slot") } diff --git a/encodings/fastlanes/src/bitpacking/array/mod.rs b/encodings/fastlanes/src/bitpacking/array/mod.rs index e5c64252fbc..2c13ea38363 100644 --- a/encodings/fastlanes/src/bitpacking/array/mod.rs +++ b/encodings/fastlanes/src/bitpacking/array/mod.rs @@ -295,15 +295,15 @@ pub trait BitPackedArrayExt: BitPackedArraySlotsExt { fn patches(&self) -> Option { PatchesData::patches_from_slots( self.patches_data.as_ref(), - self.as_ref().len(), - self.as_ref().slots(), + self.len(), + self.slots(), PATCH_SLOTS, ) } #[inline] fn validity(&self) -> Validity { - child_to_validity(self.validity_child(), self.as_ref().dtype().nullability()) + child_to_validity(self.validity_child(), self.dtype().nullability()) } #[inline] @@ -313,7 +313,7 @@ pub trait BitPackedArrayExt: BitPackedArraySlotsExt { #[inline] fn unpacked_chunks(&self) -> VortexResult> { - BitPackedData::unpacked_chunks::(self, self.as_ref().dtype(), self.as_ref().len()) + BitPackedData::unpacked_chunks::(self, self.dtype(), self.len()) } } diff --git a/encodings/fastlanes/src/bitpacking/compute/slice.rs b/encodings/fastlanes/src/bitpacking/compute/slice.rs index c019cccd003..1426795f27f 100644 --- a/encodings/fastlanes/src/bitpacking/compute/slice.rs +++ b/encodings/fastlanes/src/bitpacking/compute/slice.rs @@ -73,6 +73,7 @@ fn slice_bitpacked( mod tests { use vortex_array::IntoArray; use vortex_array::LEGACY_SESSION; + use vortex_array::ParentRef; use vortex_array::VortexSessionExecute; use vortex_array::arrays::PrimitiveArray; use vortex_array::arrays::SliceArray; @@ -87,11 +88,11 @@ mod tests { let values = PrimitiveArray::from_iter(0u32..2048); let bitpacked = bitpack_encode(&values, 11, None, &mut ctx)?; - let slice_array = SliceArray::new(bitpacked.clone().into_array(), 500..1500); + let slice_array = SliceArray::new(bitpacked.clone().into_array(), 500..1500).into_array(); let bitpacked_ref = bitpacked.into_array(); let reduced = bitpacked_ref - .reduce_parent(&slice_array.into_array(), 0)? + .reduce_parent(&ParentRef::from_array_ref(&slice_array), 0)? .expect("expected slice kernel to execute"); assert!(reduced.is::()); diff --git a/encodings/fastlanes/src/bitpacking/vtable/mod.rs b/encodings/fastlanes/src/bitpacking/vtable/mod.rs index 912dd4ff44b..4a850bdc343 100644 --- a/encodings/fastlanes/src/bitpacking/vtable/mod.rs +++ b/encodings/fastlanes/src/bitpacking/vtable/mod.rs @@ -16,6 +16,7 @@ use vortex_array::ArrayView; use vortex_array::ExecutionCtx; use vortex_array::ExecutionResult; use vortex_array::IntoArray; +use vortex_array::ParentRef; use vortex_array::Precision; use vortex_array::buffer::BufferHandle; use vortex_array::builders::ArrayBuilder; @@ -279,7 +280,7 @@ impl VTable for BitPacked { fn reduce_parent( array: ArrayView<'_, Self>, - parent: &ArrayRef, + parent: &ParentRef<'_>, child_idx: usize, ) -> VortexResult> { RULES.evaluate(array, parent, child_idx) diff --git a/encodings/fastlanes/src/bitpacking/vtable/operations.rs b/encodings/fastlanes/src/bitpacking/vtable/operations.rs index 4c277163719..01540cf210a 100644 --- a/encodings/fastlanes/src/bitpacking/vtable/operations.rs +++ b/encodings/fastlanes/src/bitpacking/vtable/operations.rs @@ -35,6 +35,7 @@ mod test { use vortex_array::ArrayRef; use vortex_array::IntoArray; use vortex_array::LEGACY_SESSION; + use vortex_array::ParentRef; use vortex_array::VortexSessionExecute; use vortex_array::arrays::PrimitiveArray; use vortex_array::arrays::SliceArray; @@ -63,9 +64,9 @@ mod test { fn slice_via_reduce(array: &BitPackedArray, range: Range) -> BitPackedArray { let array_ref = array.clone().into_array(); - let slice_array = SliceArray::new(array_ref.clone(), range); + let slice_array = SliceArray::new(array_ref.clone(), range).into_array(); let sliced = array_ref - .reduce_parent(&slice_array.into_array(), 0) + .reduce_parent(&ParentRef::from_array_ref(&slice_array), 0) .expect("execute_parent failed") .expect("expected slice kernel to execute"); sliced.as_::().into_owned() diff --git a/encodings/fastlanes/src/delta/array/mod.rs b/encodings/fastlanes/src/delta/array/mod.rs index 33ece0deddd..ffc8dfc49be 100644 --- a/encodings/fastlanes/src/delta/array/mod.rs +++ b/encodings/fastlanes/src/delta/array/mod.rs @@ -90,13 +90,13 @@ impl Display for DeltaData { pub trait DeltaArrayExt: TypedArrayRef { fn bases(&self) -> &ArrayRef { - self.as_ref().slots()[BASES_SLOT] + self.slots()[BASES_SLOT] .as_ref() .vortex_expect("DeltaArray bases slot") } fn deltas(&self) -> &ArrayRef { - self.as_ref().slots()[DELTAS_SLOT] + self.slots()[DELTAS_SLOT] .as_ref() .vortex_expect("DeltaArray deltas slot") } diff --git a/encodings/fastlanes/src/delta/vtable/mod.rs b/encodings/fastlanes/src/delta/vtable/mod.rs index b5e68791ceb..30b51dfcf58 100644 --- a/encodings/fastlanes/src/delta/vtable/mod.rs +++ b/encodings/fastlanes/src/delta/vtable/mod.rs @@ -15,6 +15,7 @@ use vortex_array::ArrayView; use vortex_array::ExecutionCtx; use vortex_array::ExecutionResult; use vortex_array::IntoArray; +use vortex_array::ParentRef; use vortex_array::Precision; use vortex_array::arrays::PrimitiveArray; use vortex_array::buffer::BufferHandle; @@ -110,7 +111,7 @@ impl VTable for Delta { fn reduce_parent( array: ArrayView<'_, Self>, - parent: &ArrayRef, + parent: &ParentRef<'_>, child_idx: usize, ) -> VortexResult> { rules::RULES.evaluate(array, parent, child_idx) diff --git a/encodings/fastlanes/src/for/array/mod.rs b/encodings/fastlanes/src/for/array/mod.rs index 691888ef5da..9554402abec 100644 --- a/encodings/fastlanes/src/for/array/mod.rs +++ b/encodings/fastlanes/src/for/array/mod.rs @@ -31,7 +31,7 @@ pub struct FoRData { pub trait FoRArrayExt: TypedArrayRef { fn encoded(&self) -> &ArrayRef { - self.as_ref().slots()[ENCODED_SLOT] + self.slots()[ENCODED_SLOT] .as_ref() .vortex_expect("FoRArray encoded slot") } diff --git a/encodings/fastlanes/src/for/vtable/mod.rs b/encodings/fastlanes/src/for/vtable/mod.rs index 899276341de..0a86b30c166 100644 --- a/encodings/fastlanes/src/for/vtable/mod.rs +++ b/encodings/fastlanes/src/for/vtable/mod.rs @@ -15,6 +15,7 @@ use vortex_array::ArrayView; use vortex_array::ExecutionCtx; use vortex_array::ExecutionResult; use vortex_array::IntoArray; +use vortex_array::ParentRef; use vortex_array::Precision; use vortex_array::arrays::PrimitiveArray; use vortex_array::buffer::BufferHandle; @@ -141,7 +142,7 @@ impl VTable for FoR { fn reduce_parent( array: ArrayView<'_, Self>, - parent: &ArrayRef, + parent: &ParentRef<'_>, child_idx: usize, ) -> VortexResult> { PARENT_RULES.evaluate(array, parent, child_idx) diff --git a/encodings/fastlanes/src/rle/array/mod.rs b/encodings/fastlanes/src/rle/array/mod.rs index 8793179c7d3..923399a61b8 100644 --- a/encodings/fastlanes/src/rle/array/mod.rs +++ b/encodings/fastlanes/src/rle/array/mod.rs @@ -82,21 +82,21 @@ impl RLEData { pub trait RLEArrayExt: TypedArrayRef { #[inline] fn values(&self) -> &ArrayRef { - self.as_ref().slots()[VALUES_SLOT] + self.slots()[VALUES_SLOT] .as_ref() .vortex_expect("RLEArray values slot must be populated") } #[inline] fn indices(&self) -> &ArrayRef { - self.as_ref().slots()[INDICES_SLOT] + self.slots()[INDICES_SLOT] .as_ref() .vortex_expect("RLEArray indices slot must be populated") } #[inline] fn values_idx_offsets(&self) -> &ArrayRef { - self.as_ref().slots()[VALUES_IDX_OFFSETS_SLOT] + self.slots()[VALUES_IDX_OFFSETS_SLOT] .as_ref() .vortex_expect("RLEArray values_idx_offsets slot must be populated") } diff --git a/encodings/fastlanes/src/rle/vtable/mod.rs b/encodings/fastlanes/src/rle/vtable/mod.rs index 989b462ab99..995c2ca5646 100644 --- a/encodings/fastlanes/src/rle/vtable/mod.rs +++ b/encodings/fastlanes/src/rle/vtable/mod.rs @@ -15,6 +15,7 @@ use vortex_array::ArrayView; use vortex_array::ExecutionCtx; use vortex_array::ExecutionResult; use vortex_array::IntoArray; +use vortex_array::ParentRef; use vortex_array::Precision; use vortex_array::arrays::Primitive; use vortex_array::buffer::BufferHandle; @@ -124,7 +125,7 @@ impl VTable for RLE { fn reduce_parent( array: ArrayView<'_, Self>, - parent: &ArrayRef, + parent: &ParentRef<'_>, child_idx: usize, ) -> VortexResult> { RULES.evaluate(array, parent, child_idx) diff --git a/encodings/fsst/src/array.rs b/encodings/fsst/src/array.rs index 617908e94dd..26349cec8f6 100644 --- a/encodings/fsst/src/array.rs +++ b/encodings/fsst/src/array.rs @@ -25,6 +25,7 @@ use vortex_array::ExecutionCtx; use vortex_array::ExecutionResult; use vortex_array::IntoArray; use vortex_array::LEGACY_SESSION; +use vortex_array::ParentRef; use vortex_array::Precision; use vortex_array::TypedArrayRef; use vortex_array::VortexSessionExecute; @@ -319,7 +320,7 @@ impl VTable for FSST { fn reduce_parent( array: ArrayView<'_, Self>, - parent: &ArrayRef, + parent: &ParentRef<'_>, child_idx: usize, ) -> VortexResult> { RULES.evaluate(array, parent, child_idx) @@ -706,7 +707,7 @@ fn uncompressed_lengths_from_slots(slots: &[Option]) -> &ArrayRef { pub trait FSSTArrayExt: TypedArrayRef { fn uncompressed_lengths(&self) -> &ArrayRef { - uncompressed_lengths_from_slots(self.as_ref().slots()) + uncompressed_lengths_from_slots(self.slots()) } fn uncompressed_lengths_dtype(&self) -> &DType { @@ -716,13 +717,13 @@ pub trait FSSTArrayExt: TypedArrayRef { /// Reconstruct a [`VarBinArray`] for the compressed codes by combining the bytes /// from [`FSSTData`] with the offsets and validity stored in the array's slots. fn codes(&self) -> VarBinArray { - let offsets = self.as_ref().slots()[CODES_OFFSETS_SLOT] + let offsets = self.slots()[CODES_OFFSETS_SLOT] .as_ref() .vortex_expect("FSSTArray codes_offsets slot") .clone(); let validity = child_to_validity( - self.as_ref().slots()[CODES_VALIDITY_SLOT].as_ref(), - self.as_ref().dtype().nullability(), + self.slots()[CODES_VALIDITY_SLOT].as_ref(), + self.dtype().nullability(), ); let codes_bytes = self.codes_bytes_handle().clone(); // SAFETY: components were validated at construction time. @@ -730,7 +731,7 @@ pub trait FSSTArrayExt: TypedArrayRef { VarBinArray::new_unchecked_from_handle( offsets, codes_bytes, - DType::Binary(self.as_ref().dtype().nullability()), + DType::Binary(self.dtype().nullability()), validity, ) } @@ -738,7 +739,7 @@ pub trait FSSTArrayExt: TypedArrayRef { /// Get the DType of the codes array. fn codes_dtype(&self) -> DType { - DType::Binary(self.as_ref().dtype().nullability()) + DType::Binary(self.dtype().nullability()) } } diff --git a/encodings/parquet-variant/src/array.rs b/encodings/parquet-variant/src/array.rs index d7004af4ef8..0c34dd67bc3 100644 --- a/encodings/parquet-variant/src/array.rs +++ b/encodings/parquet-variant/src/array.rs @@ -311,7 +311,7 @@ fn inferred_shredded_field_validity( pub trait ParquetVariantArrayExt: TypedArrayRef { /// Returns the non-nullable Parquet Variant metadata child. fn metadata_array(&self) -> &ArrayRef { - self.as_ref().slots()[METADATA_SLOT] + self.slots()[METADATA_SLOT] .as_ref() .vortex_expect("ParquetVariantArray metadata slot") } @@ -319,19 +319,19 @@ pub trait ParquetVariantArrayExt: TypedArrayRef { /// Returns the outer row validity for the Variant values. fn validity(&self) -> Validity { child_to_validity( - self.as_ref().slots()[VALIDITY_SLOT].as_ref(), - self.as_ref().dtype().nullability(), + self.slots()[VALIDITY_SLOT].as_ref(), + self.dtype().nullability(), ) } /// Returns the optional raw Parquet Variant `value` child. fn value_array(&self) -> Option<&ArrayRef> { - self.as_ref().slots()[VALUE_SLOT].as_ref() + self.slots()[VALUE_SLOT].as_ref() } /// Returns the optional shredded Parquet Variant `typed_value` child. fn typed_value_array(&self) -> Option<&ArrayRef> { - self.as_ref().slots()[TYPED_VALUE_SLOT].as_ref() + self.slots()[TYPED_VALUE_SLOT].as_ref() } /// Converts this storage array to Arrow's canonical Parquet Variant extension storage. @@ -342,7 +342,7 @@ pub trait ParquetVariantArrayExt: TypedArrayRef { fn to_arrow(&self, ctx: &mut ExecutionCtx) -> VortexResult { let metadata = self.metadata_array(); let len = metadata.len(); - let nulls = to_arrow_null_buffer(self.validity(), len, ctx)?; + let nulls = to_arrow_null_buffer(ParquetVariantArrayExt::validity(self), len, ctx)?; let mut fields = Vec::with_capacity(3); let mut arrays: Vec = Vec::with_capacity(3); diff --git a/encodings/pco/src/array.rs b/encodings/pco/src/array.rs index 94db1014c79..47780f7fe43 100644 --- a/encodings/pco/src/array.rs +++ b/encodings/pco/src/array.rs @@ -28,6 +28,7 @@ use vortex_array::ArrayView; use vortex_array::ExecutionCtx; use vortex_array::ExecutionResult; use vortex_array::IntoArray; +use vortex_array::ParentRef; use vortex_array::Precision; use vortex_array::arrays::Primitive; use vortex_array::arrays::PrimitiveArray; @@ -231,7 +232,7 @@ impl VTable for Pco { fn reduce_parent( array: ArrayView<'_, Self>, - parent: &ArrayRef, + parent: &ParentRef<'_>, child_idx: usize, ) -> VortexResult> { crate::rules::RULES.evaluate(array, parent, child_idx) diff --git a/encodings/runend/src/array.rs b/encodings/runend/src/array.rs index 943f4881806..68384559a1b 100644 --- a/encodings/runend/src/array.rs +++ b/encodings/runend/src/array.rs @@ -19,6 +19,7 @@ use vortex_array::ExecutionCtx; use vortex_array::ExecutionResult; use vortex_array::IntoArray; use vortex_array::LEGACY_SESSION; +use vortex_array::ParentRef; use vortex_array::Precision; use vortex_array::TypedArrayRef; use vortex_array::VortexSessionExecute; @@ -166,7 +167,7 @@ impl VTable for RunEnd { fn reduce_parent( array: ArrayView<'_, Self>, - parent: &ArrayRef, + parent: &ParentRef<'_>, child_idx: usize, ) -> VortexResult> { RULES.evaluate(array, parent, child_idx) @@ -216,13 +217,13 @@ pub trait RunEndArrayExt: TypedArrayRef { } fn ends(&self) -> &ArrayRef { - self.as_ref().slots()[ENDS_SLOT] + self.slots()[ENDS_SLOT] .as_ref() .vortex_expect("RunEndArray ends slot") } fn values(&self) -> &ArrayRef { - self.as_ref().slots()[VALUES_SLOT] + self.slots()[VALUES_SLOT] .as_ref() .vortex_expect("RunEndArray values slot") } diff --git a/encodings/sequence/src/array.rs b/encodings/sequence/src/array.rs index a572a5fc165..ddf1f8eb960 100644 --- a/encodings/sequence/src/array.rs +++ b/encodings/sequence/src/array.rs @@ -18,6 +18,7 @@ use vortex_array::ArrayRef; use vortex_array::ArrayView; use vortex_array::ExecutionCtx; use vortex_array::ExecutionResult; +use vortex_array::ParentRef; use vortex_array::Precision; use vortex_array::buffer::BufferHandle; use vortex_array::dtype::DType; @@ -342,7 +343,7 @@ impl VTable for Sequence { fn reduce_parent( array: ArrayView<'_, Self>, - parent: &ArrayRef, + parent: &ParentRef<'_>, child_idx: usize, ) -> VortexResult> { RULES.evaluate(array, parent, child_idx) diff --git a/encodings/sparse/src/lib.rs b/encodings/sparse/src/lib.rs index 42b5cd46724..ccb8e9daadd 100644 --- a/encodings/sparse/src/lib.rs +++ b/encodings/sparse/src/lib.rs @@ -22,6 +22,7 @@ use vortex_array::Canonical; use vortex_array::ExecutionCtx; use vortex_array::ExecutionResult; use vortex_array::IntoArray; +use vortex_array::ParentRef; use vortex_array::Precision; use vortex_array::arrays::BoolArray; use vortex_array::arrays::ConstantArray; @@ -140,13 +141,13 @@ impl SparseOwnedExt for Array { let patches = Patches::new( self.len(), self.patches().offset(), - self.as_ref().slots()[SparseSlots::PATCH_INDICES] + self.slots()[SparseSlots::PATCH_INDICES] .clone() .vortex_expect("indices"), - self.as_ref().slots()[SparseSlots::PATCH_VALUES] + self.slots()[SparseSlots::PATCH_VALUES] .clone() .vortex_expect("values"), - self.as_ref().slots()[SparseSlots::PATCH_CHUNK_OFFSETS].clone(), + self.slots()[SparseSlots::PATCH_CHUNK_OFFSETS].clone(), )?; Ok(SparseParts { patches, @@ -288,7 +289,7 @@ impl VTable for Sparse { fn reduce_parent( array: ArrayView<'_, Self>, - parent: &ArrayRef, + parent: &ParentRef<'_>, child_idx: usize, ) -> VortexResult> { RULES.evaluate(array, parent, child_idx) diff --git a/encodings/zigzag/src/array.rs b/encodings/zigzag/src/array.rs index 26d3bf984e2..90bc9640e49 100644 --- a/encodings/zigzag/src/array.rs +++ b/encodings/zigzag/src/array.rs @@ -15,6 +15,7 @@ use vortex_array::ArrayView; use vortex_array::ExecutionCtx; use vortex_array::ExecutionResult; use vortex_array::IntoArray; +use vortex_array::ParentRef; use vortex_array::Precision; use vortex_array::TypedArrayRef; use vortex_array::buffer::BufferHandle; @@ -138,7 +139,7 @@ impl VTable for ZigZag { fn reduce_parent( array: ArrayView<'_, Self>, - parent: &ArrayRef, + parent: &ParentRef<'_>, child_idx: usize, ) -> VortexResult> { RULES.evaluate(array, parent, child_idx) @@ -180,7 +181,7 @@ impl Display for ZigZagData { pub trait ZigZagArrayExt: TypedArrayRef { fn encoded(&self) -> &ArrayRef { - self.as_ref().slots()[ENCODED_SLOT] + self.slots()[ENCODED_SLOT] .as_ref() .vortex_expect("ZigZagArray encoded slot") } diff --git a/encodings/zstd/src/array.rs b/encodings/zstd/src/array.rs index b327a6a2a95..6a7823ccef0 100644 --- a/encodings/zstd/src/array.rs +++ b/encodings/zstd/src/array.rs @@ -21,6 +21,7 @@ use vortex_array::Canonical; use vortex_array::ExecutionCtx; use vortex_array::ExecutionResult; use vortex_array::IntoArray; +use vortex_array::ParentRef; use vortex_array::Precision; use vortex_array::accessor::ArrayAccessor; use vortex_array::arrays::ConstantArray; @@ -244,7 +245,7 @@ impl VTable for Zstd { fn reduce_parent( array: ArrayView<'_, Self>, - parent: &ArrayRef, + parent: &ParentRef<'_>, child_idx: usize, ) -> VortexResult> { crate::rules::RULES.evaluate(array, parent, child_idx) diff --git a/vortex-array-macros/src/lib.rs b/vortex-array-macros/src/lib.rs index 3105dac4add..00ef2ff2678 100644 --- a/vortex-array-macros/src/lib.rs +++ b/vortex-array-macros/src/lib.rs @@ -206,7 +206,7 @@ fn expand_array_slots( #[doc = "Returns a borrowed view of all slots."] fn slots_view(&self) -> #view_ident<'_> { - #view_ident::from_slots(self.as_ref().slots()) + #view_ident::from_slots(self.slots()) } } @@ -347,7 +347,7 @@ impl SlotField { #[inline] fn #field_ident(&self) -> &::vortex_array::ArrayRef { ::vortex_error::VortexExpect::vortex_expect( - self.as_ref().slots()[#struct_ident::#const_ident].as_ref(), + self.slots()[#struct_ident::#const_ident].as_ref(), #expect_message, ) } @@ -355,7 +355,7 @@ impl SlotField { SlotFieldType::Optional => quote! { #[inline] fn #field_ident(&self) -> Option<&::vortex_array::ArrayRef> { - self.as_ref().slots()[#struct_ident::#const_ident].as_ref() + self.slots()[#struct_ident::#const_ident].as_ref() } }, } diff --git a/vortex-array/src/array/erased.rs b/vortex-array/src/array/erased.rs index 77800377f1c..53311cb104f 100644 --- a/vortex-array/src/array/erased.rs +++ b/vortex-array/src/array/erased.rs @@ -21,6 +21,7 @@ use crate::AnyCanonical; use crate::Array; use crate::ArrayEq; use crate::ArrayHash; +use crate::ArraySlots; use crate::ArrayView; use crate::Canonical; use crate::ExecutionCtx; @@ -33,17 +34,12 @@ use crate::aggregate_fn::fns::sum::sum; use crate::array::ArrayData; use crate::array::ArrayId; use crate::array::ArrayInner; -use crate::array::ArraySlots; use crate::array::DynArrayData; -use crate::arrays::Bool; +use crate::array::ParentRef; use crate::arrays::Constant; use crate::arrays::DictArray; use crate::arrays::FilterArray; -use crate::arrays::Null; -use crate::arrays::Primitive; use crate::arrays::SliceArray; -use crate::arrays::VarBin; -use crate::arrays::VarBinView; use crate::buffer::BufferHandle; use crate::builders::ArrayBuilder; use crate::dtype::DType; @@ -52,7 +48,6 @@ use crate::expr::stats::Precision; use crate::expr::stats::Stat; use crate::expr::stats::StatsProviderExt; use crate::matcher::Matcher; -use crate::optimizer::ArrayOptimizer; use crate::scalar::Scalar; use crate::stats::StatsSetRef; use crate::validity::Validity; @@ -94,6 +89,11 @@ impl ArrayRef { &self.0.data } + #[inline(always)] + pub(crate) fn inner(&self) -> &ArrayInner { + &self.0 + } + /// Returns a mutable reference to the inner if this is the sole owner. #[inline(always)] pub(crate) fn inner_mut(&mut self) -> Option<&mut ArrayInner> { @@ -228,9 +228,8 @@ impl ArrayRef { return Ok(Canonical::empty(self.dtype()).into_array()); } - let sliced = SliceArray::try_new(self.clone(), range)? - .into_array() - .optimize()?; + let sliced = SliceArray::try_new_parts(self.clone(), range)?; + let sliced = sliced.optimize()?; // Propagate some stats from the original array to the sliced array. if !sliced.is::() { @@ -255,16 +254,14 @@ impl ArrayRef { /// Wraps the array in a [`FilterArray`] such that it is logically filtered by the given mask. pub fn filter(&self, mask: Mask) -> VortexResult { - FilterArray::try_new(self.clone(), mask)? - .into_array() - .optimize() + let parts = FilterArray::try_new_parts(self.clone(), mask)?; + parts.optimize() } /// Wraps the array in a [`DictArray`] such that it is logically taken by the given indices. pub fn take(&self, indices: ArrayRef) -> VortexResult { - DictArray::try_new(indices, self.clone())? - .into_array() - .optimize() + let parts = DictArray::try_new_parts(indices, self.clone())?; + parts.optimize() } /// Fetch the scalar at the given index. @@ -391,7 +388,7 @@ impl ArrayRef { /// Does the array match the given matcher. pub fn is(&self) -> bool { - M::matches(self) + M::matches_ref(self) } /// Returns the array downcast by the given matcher. @@ -400,8 +397,12 @@ impl ArrayRef { } /// Returns the array downcast by the given matcher. + /// + /// Routes through the heap-array entry points (`Matcher::matches_ref` / + /// `Matcher::try_match_ref`) so matchers with a cheap, direct downcast — like + /// the blanket `VTable` matcher — don't pay for a [`ParentRef`] construction here. pub fn as_opt(&self) -> Option> { - M::try_match(self) + M::try_match_ref(self) } /// Returns the array downcast to the given `Array` as an owned typed handle. @@ -441,15 +442,6 @@ impl ArrayRef { nbytes } - /// Returns whether this array is an arrow encoding. - pub fn is_arrow(&self) -> bool { - self.is::() - || self.is::() - || self.is::() - || self.is::() - || self.is::() - } - /// Whether the array is of a canonical encoding. pub fn is_canonical(&self) -> bool { self.is::() @@ -595,7 +587,7 @@ impl ArrayRef { pub fn reduce_parent( &self, - parent: &ArrayRef, + parent: &ParentRef<'_>, child_idx: usize, ) -> VortexResult> { self.0.data.reduce_parent(self, parent, child_idx) @@ -645,7 +637,7 @@ impl ArrayRef { } /// Returns the nth child of the array without allocating a Vec. - pub fn nth_child(&self, idx: usize) -> Option { + pub fn nth_child(&self, idx: usize) -> Option<&ArrayRef> { self.0.data.nth_child(self, idx) } @@ -738,13 +730,34 @@ impl IntoArray for ArrayRef { impl Matcher for V { type Match<'a> = ArrayView<'a, V>; - fn matches(array: &ArrayRef) -> bool { + /// Match by encoding id (no materialization). Equivalent to + /// [`Matcher::try_match`].is_some() but avoids constructing an + /// [`ArrayView`] for parents that do not need one. + fn matches(parent: &ParentRef<'_>) -> bool { + parent.is_encoding::() + } + + /// Returns an [`ArrayView`] for the parent if its encoding is `V`. + /// + /// The returned [`ArrayView`] is stack-backed when the parent is stack-backed, + /// so no `Arc>` is allocated until a downstream consumer reaches + /// for [`ArrayView::array`]. + fn try_match<'a>(parent: &'a ParentRef<'_>) -> Option> { + parent.as_view::() + } + + /// Fast encoding-id check that skips [`ParentRef`] construction. The hot + /// `ArrayRef::is::()` path goes through here, so any extra work shows up in + /// downstream micro-benchmarks (`patches_lookup`, `chunk_array_builder`, ...). + #[inline] + fn matches_ref(array: &ArrayRef) -> bool { array.0.data.as_any().is::>() } - fn try_match(array: &'_ ArrayRef) -> Option> { - let inner = array.0.data.as_any().downcast_ref::>()?; - // # Safety checked by `downcast_ref`. - Some(unsafe { ArrayView::new_unchecked(array, &inner.data) }) + /// Direct downcast — same fast path as [`Matcher::matches_ref`] but also produces + /// the [`ArrayView`] when it matches. + #[inline] + fn try_match_ref(array: &ArrayRef) -> Option> { + array.as_typed::() } } diff --git a/vortex-array/src/array/mod.rs b/vortex-array/src/array/mod.rs index 23ad9499766..5204d5a9cf7 100644 --- a/vortex-array/src/array/mod.rs +++ b/vortex-array/src/array/mod.rs @@ -34,6 +34,9 @@ pub use plugin::*; mod foreign; pub(crate) use foreign::*; +mod parent; +pub use parent::*; + mod typed; pub use typed::*; @@ -60,7 +63,11 @@ pub type ArraySlots = SmallVec<[Option; 4]>; #[doc(hidden)] pub(crate) trait DynArrayData: 'static + private::Sealed + Send + Sync + Debug { /// Returns the array as a reference to a generic [`Any`] trait object. - fn as_any(&self) -> &dyn Any; + /// + /// The `+ Send + Sync` bound is preserved so [`ParentRef`] — which carries + /// this reference as `&dyn Any` to stay type-erased over `V` — stays + /// `Send + Sync` for use across `.await` boundaries. + fn as_any(&self) -> &(dyn Any + Send + Sync); /// Returns the array as a mutable reference to a generic [`Any`] trait object. fn as_any_mut(&mut self) -> &mut dyn Any; @@ -89,7 +96,7 @@ pub(crate) trait DynArrayData: 'static + private::Sealed + Send + Sync + Debug { /// Returns the nth child of the array without allocating a Vec. /// /// Returns `None` if the index is out of bounds. - fn nth_child(&self, this: &ArrayRef, idx: usize) -> Option; + fn nth_child<'a>(&'a self, this: &'a ArrayRef, idx: usize) -> Option<&'a ArrayRef>; /// Returns the names of the children of the array. fn children_names(&self, this: &ArrayRef) -> Vec; @@ -148,7 +155,7 @@ pub(crate) trait DynArrayData: 'static + private::Sealed + Send + Sync + Debug { fn reduce_parent( &self, this: &ArrayRef, - parent: &ArrayRef, + parent: &ParentRef<'_>, child_idx: usize, ) -> VortexResult>; @@ -219,7 +226,7 @@ mod private { /// This is self-contained: identity methods use `ArrayData`'s own fields (dtype, len, stats), /// while data-access methods delegate to VTable methods on the inner `V::TypedArrayData`. impl DynArrayData for ArrayData { - fn as_any(&self) -> &dyn Any { + fn as_any(&self) -> &(dyn Any + Send + Sync) { self } @@ -274,7 +281,10 @@ impl DynArrayData for ArrayData { fn children(&self, this: &ArrayRef) -> Vec { let view = unsafe { ArrayView::new_unchecked(this, &self.data) }; - (0..V::nchildren(view)).map(|i| V::child(view, i)).collect() + (0..V::nchildren(view)) + .map(|i| V::child(view, i)) + .cloned() + .collect() } fn nchildren(&self, this: &ArrayRef) -> usize { @@ -282,7 +292,7 @@ impl DynArrayData for ArrayData { V::nchildren(view) } - fn nth_child(&self, this: &ArrayRef, idx: usize) -> Option { + fn nth_child<'a>(&'a self, this: &'a ArrayRef, idx: usize) -> Option<&'a ArrayRef> { let view = unsafe { ArrayView::new_unchecked(this, &self.data) }; (idx < V::nchildren(view)).then(|| V::child(view, idx)) } @@ -297,7 +307,7 @@ impl DynArrayData for ArrayData { fn named_children(&self, this: &ArrayRef) -> Vec<(String, ArrayRef)> { let view = unsafe { ArrayView::new_unchecked(this, &self.data) }; (0..V::nchildren(view)) - .map(|i| (V::child_name(view, i), V::child(view, i))) + .map(|i| (V::child_name(view, i), V::child(view, i).clone())) .collect() } @@ -410,7 +420,7 @@ impl DynArrayData for ArrayData { fn reduce_parent( &self, this: &ArrayRef, - parent: &ArrayRef, + parent: &ParentRef<'_>, child_idx: usize, ) -> VortexResult> { let view = unsafe { ArrayView::new_unchecked(this, &self.data) }; diff --git a/vortex-array/src/array/parent.rs b/vortex-array/src/array/parent.rs new file mode 100644 index 00000000000..22ae43865cb --- /dev/null +++ b/vortex-array/src/array/parent.rs @@ -0,0 +1,538 @@ +// SPDX-License-Identifier: Apache-2.0 +// SPDX-FileCopyrightText: Copyright the Vortex contributors + +//! Stack-allocatable parent representation used by the `reduce_parent` dispatch chain. +//! +//! [`ParentRef`] either borrows an existing heap-allocated [`ArrayRef`], or borrows +//! stack-allocated construction state. The construction-side optimizer can borrow +//! `ArrayParts` before materializing an `ArrayInner`, so matchers and parent-reduce +//! rules can attempt reduction without first allocating an `Arc>`. +//! +//! Stack-backed parents lazily materialize an `ArrayRef` into an internal [`OnceLock`] +//! when a downstream consumer asks for one through [`ArrayBacking::array_ref`], so +//! [`ParentRef`] can stand in anywhere an [`ArrayView`] is needed. + +use std::any::Any; +use std::fmt::Debug; +use std::fmt::Formatter; +use std::sync::OnceLock; + +use vortex_error::VortexExpect; +use vortex_error::VortexResult; +use vortex_error::vortex_ensure; + +use crate::ArrayRef; +use crate::array::ArrayData; +use crate::array::ArrayId; +use crate::array::ArrayParts; +use crate::array::ArraySlots; +use crate::array::ArrayView; +use crate::array::VTable; +use crate::dtype::DType; +use crate::matcher::Matcher; +use crate::optimizer::ArrayOptimizer; + +/// A parent array, possibly stack-allocated, used by the `reduce_parent` dispatch chain. +/// +/// Carries the metadata needed to dispatch parent-reduce rules (encoding id, dtype, +/// length, encoding-specific data, slots) regardless of whether the parent is backed +/// by an existing [`ArrayRef`] or by borrowed [`ArrayParts`]. Stack-backed parents +/// materialize an [`ArrayRef`] into an internal cache on first call to +/// [`ArrayBacking::array_ref`]. +pub struct ParentRef<'a> { + encoding_id: ArrayId, + dtype: &'a DType, + len: usize, + slots: &'a [Option], + data: ParentData<'a>, + /// Lazily-populated materialization slot used by stack-backed parents. + /// Heap-backed parents return their borrowed [`ArrayRef`] directly and never + /// touch this cache. + cache: OnceLock, +} + +/// Type-erased payload for [`ParentRef`]. +/// +/// Carries `&dyn Any` rather than `&V`/`&V::TypedArrayData` so [`ParentRef`] is not +/// itself generic over `V`. The `+ Send + Sync` bound mirrors the bounds on +/// [`VTable`](array::VTable) and `V::TypedArrayData`, keeping [`ParentRef`] +/// and the [`ArrayView`] built on top of it `Send + Sync`. +type AnyRef<'a> = &'a (dyn Any + Send + Sync); + +enum ParentData<'a> { + Heap { + array: &'a ArrayRef, + data: AnyRef<'a>, + }, + Parts { + vtable: AnyRef<'a>, + data: AnyRef<'a>, + materialize: MaterializeFn, + reduce: ReduceFn, + }, +} + +/// Function pointer that materializes stack-borrowed parts into an owned [`ArrayRef`]. +/// +/// The `vtable` and `data` arguments are the borrowed `&V` and `&V::TypedArrayData` +/// previously stashed as `&dyn Any` in [`ParentData::Parts`]. The implementation +/// downcasts them, clones into owned values, and produces an `ArrayRef`. +type MaterializeFn = fn( + vtable: &(dyn Any + Send + Sync), + data: &(dyn Any + Send + Sync), + dtype: &DType, + len: usize, + slots: &[Option], +) -> ArrayRef; + +/// Function pointer that runs encoding `V`'s self-reduce rules against a (possibly +/// stack-borrowed) parent. +/// +/// Stored alongside [`MaterializeFn`] in [`ParentData::Parts`] so [`ParentRef::optimize`] +/// can dispatch `V::reduce` without being generic over `V`. The implementation builds a +/// stack-backed [`ArrayView`] over the borrowed parts, so a rule that only inspects +/// metadata never forces a materialization. +type ReduceFn = fn(parent: &ParentRef<'_>) -> VortexResult>; + +impl<'a> ParentRef<'a> { + /// Build a [`ParentRef`] borrowing a heap-allocated [`ArrayRef`]. + #[inline] + pub fn from_array_ref(array: &'a ArrayRef) -> Self { + let inner = array.inner(); + Self { + encoding_id: inner.encoding_id, + dtype: &inner.dtype, + len: inner.len, + slots: &inner.slots, + data: ParentData::Heap { + array, + data: inner.data.as_any(), + }, + cache: OnceLock::new(), + } + } + + /// Build a [`ParentRef`] borrowing construction parts before materialization. + /// + /// The returned [`ParentRef`] owns the cache slot for the lazily materialized + /// [`ArrayRef`], so callers don't need to thread an external scratch through. + #[inline] + pub fn from_parts(parts: &'a ArrayParts) -> Self { + Self { + encoding_id: parts.vtable.id(), + dtype: &parts.dtype, + len: parts.len, + slots: &parts.slots, + data: ParentData::Parts { + vtable: &parts.vtable, + data: &parts.data, + materialize: materialize_parts::, + reduce: reduce_parts::, + }, + cache: OnceLock::new(), + } + } + + /// Optimize this parent, materializing the parts if no stack reduction fires. + /// + /// Mirrors one iteration of [`ArrayRef::optimize`](crate::optimizer::ArrayOptimizer): + /// the parent's own `reduce` rules are tried first, then `reduce_parent` on each child + /// slot. Both run against the (possibly stack-borrowed) parent, so a reduction that + /// only inspects metadata never allocates an `Arc>`. When a rule fires + /// the result is re-driven through the full [`ArrayRef::optimize`] fixpoint. + /// + /// Running `reduce` first is what makes this equivalent to materializing the parts and + /// calling `ArrayRef::optimize`: the two paths differ only in whether the wrapper is + /// heap-allocated when no reduction applies. + pub fn optimize(self) -> VortexResult { + if let Some(reduced) = self.reduce()? { + return reduced.optimize(); + } + + for (slot_idx, slot) in self.slots.iter().enumerate() { + let Some(child) = slot else { continue }; + + if let Some(reduced) = child.reduce_parent(&self, slot_idx)? { + return reduced.optimize(); + } + } + + Ok(self.into_array_ref()) + } + + /// Run the parent encoding's self-reduce rules against the parent. + /// + /// Mirrors [`ArrayRef::reduce`](crate::ArrayRef::reduce) for the `ParentRef` dispatch + /// chain. Heap-backed parents delegate to the existing array; stack-backed parents + /// dispatch through the stored [`ReduceFn`] so the borrowed parts only materialize if a + /// rule reaches for an [`ArrayRef`]. The reduced array is validated to preserve the + /// parent's len and dtype, matching the heap path. + fn reduce(&self) -> VortexResult> { + let reduced = match self.data { + ParentData::Heap { array, .. } => return array.reduce(), + ParentData::Parts { reduce, .. } => reduce(self)?, + }; + let Some(reduced) = reduced else { + return Ok(None); + }; + vortex_ensure!( + reduced.len() == self.len, + "Reduced array length mismatch from {} to {}", + self.encoding_id, + reduced.encoding_id() + ); + vortex_ensure!( + reduced.dtype() == self.dtype, + "Reduced array dtype mismatch from {} to {}", + self.encoding_id, + reduced.encoding_id() + ); + Ok(Some(reduced)) + } + + /// Returns the encoding id of the parent. + #[inline] + pub fn encoding_id(&self) -> ArrayId { + self.encoding_id + } + + /// Returns the dtype of the parent. + #[inline] + pub fn dtype(&self) -> &'a DType { + self.dtype + } + + /// Returns the length of the parent. + #[inline] + pub fn len(&self) -> usize { + self.len + } + + /// Returns whether the parent is empty. + #[inline] + pub fn is_empty(&self) -> bool { + self.len == 0 + } + + /// Returns the slots of the parent. + #[inline] + pub fn slots(&self) -> &'a [Option] { + self.slots + } + + /// Consume this `ParentRef` and return an owned [`ArrayRef`]. + /// + /// Cheap for heap-backed parents (clones the existing `Arc`); for stack-backed + /// parents this materializes the borrowed parts into a fresh + /// `Arc>`, reusing the cached materialization if one was already + /// produced by [`ArrayBacking::array_ref`]. + pub fn into_array_ref(self) -> ArrayRef { + if let Some(cached) = self.cache.into_inner() { + return cached; + } + match self.data { + ParentData::Heap { array, .. } => array.clone(), + ParentData::Parts { + vtable, + data, + materialize, + .. + } => materialize(vtable, data, self.dtype, self.len, self.slots), + } + } + + /// Consume this `ParentRef` and return the cached materialization, if one exists. + /// + /// This is used by owned [`ArrayParts::optimize`] to avoid materializing twice when + /// a stack-backed parent was forced into an [`ArrayRef`] by a rule that did not fire. + fn into_cached_array_ref(self) -> Option { + self.cache.into_inner() + } + + /// Returns `true` if this parent's encoding matches `V`. + /// + /// Cheap encoding-id check that works for both heap- and stack-backed parents + /// without forcing materialization. + #[inline] + pub(crate) fn is_encoding(&self) -> bool { + match self.data { + ParentData::Heap { data, .. } => data.is::>(), + ParentData::Parts { vtable, .. } => vtable.is::(), + } + } + + #[inline] + pub(crate) fn typed_data(&self) -> Option<&'a V::TypedArrayData> { + match self.data { + ParentData::Heap { data, .. } => data + .downcast_ref::>() + .map(|array_data| &array_data.data), + ParentData::Parts { data, .. } => data.downcast_ref::(), + } + } + + /// Try to extract an [`ArrayView`] for the parent's encoding `V`. + /// + /// Returns `None` if the parent's encoding is not `V`. The returned view is + /// stack-backed when the parent is stack-backed — no materialization happens + /// up front. Materialization is deferred to [`ArrayView::array`], which goes + /// through [`ArrayBacking::array_ref`] on the parent's internal cache. + /// + /// This is the low-level entry point used by the blanket `VTable` matcher + /// implementation. Prefer [`Self::as_opt`] for matcher-based downcasts. + pub fn as_view(&self) -> Option> { + let data = self.typed_data::()?; + // SAFETY: `typed_data::()` returned Some, so the parent's encoding is + // `V` and `data` is the `V::TypedArrayData` reachable through `self`. + Some(unsafe { ArrayView::new_from_parent(self, data) }) + } + + /// Does the parent match the given matcher. + /// + /// Mirrors [`ArrayRef::is`](ArrayRef::is) for the parent-side dispatch + /// chain. Routes through [`Matcher::matches`] so matchers that can answer with + /// a cheap encoding-id check don't force a downcast. + pub fn is(&self) -> bool { + M::matches(self) + } + + /// Returns the parent downcast by the given matcher, or `None` if it doesn't match. + /// + /// Mirrors [`ArrayRef::as_opt`](ArrayRef::as_opt) for the parent-side + /// dispatch chain. The returned `Match` borrows from `self`, so stack-backed + /// parents stay on the stack until a consumer reaches for + /// [`ArrayView::array`]. + pub fn as_opt(&self) -> Option> { + M::try_match(self) + } + + /// Returns the parent downcast by the given matcher, panicking if it doesn't match. + /// + /// Mirrors [`ArrayRef::as_`](ArrayRef::as_). + pub fn as_(&self) -> M::Match<'_> { + self.as_opt::().vortex_expect("Failed to downcast") + } +} + +impl ArrayParts { + /// Optimize already-valid construction parts, consuming the original parts on a miss. + /// + /// This mirrors [`ParentRef::optimize`], but keeps ownership of the original + /// [`ArrayParts`] until it knows whether a reduction fired. If no rule applies and + /// the stack-backed parent was not materialized by a rule, the result is built with + /// [`ArrayParts::into_array`] directly rather than cloning the parts through + /// [`ParentRef::into_array_ref`]. + pub fn optimize(self) -> VortexResult { + let parent = ParentRef::from_parts(&self); + if let Some(reduced) = parent.reduce()? { + return reduced.optimize(); + } + + for (slot_idx, slot) in parent.slots.iter().enumerate() { + let Some(child) = slot else { continue }; + + if let Some(reduced) = child.reduce_parent(&parent, slot_idx)? { + return reduced.optimize(); + } + } + + if let Some(cached) = parent.into_cached_array_ref() { + return Ok(cached); + } + + Ok(self.into_array()) + } +} + +impl Debug for ParentRef<'_> { + fn fmt(&self, f: &mut Formatter<'_>) -> std::fmt::Result { + let heap_backed = matches!(self.data, ParentData::Heap { .. }); + f.debug_struct("ParentRef") + .field("encoding", &self.encoding_id()) + .field("dtype", self.dtype()) + .field("len", &self.len()) + .field("heap_backed", &heap_backed) + .finish() + } +} + +impl<'a> From<&'a ArrayRef> for ParentRef<'a> { + fn from(array: &'a ArrayRef) -> Self { + Self::from_array_ref(array) + } +} + +/// Trait providing an [`ArrayRef`] view of an array-like backing, materializing on +/// demand for stack-allocated parents. +/// +/// Implemented for [`ArrayRef`] (returns `self`) and [`ParentRef`] (returns the +/// borrowed `ArrayRef` for heap parents or the lazily-materialized cache for stack +/// parents). [`ArrayView`] stores `&dyn ArrayBacking` so its hot accessors stay +/// branch-free while the cold [`ArrayView::array`] path can still produce an +/// `ArrayRef` whichever way the view was constructed. +pub trait ArrayBacking: Send + Sync { + /// Returns an [`ArrayRef`] borrowing the array-like data. + /// + /// For heap-backed views this is a cheap reference return. For stack-backed + /// views this triggers materialization on first call, caching the result in + /// the parent so subsequent calls reuse it. + fn array_ref(&self) -> &ArrayRef; +} + +impl ArrayBacking for ArrayRef { + #[inline] + fn array_ref(&self) -> &ArrayRef { + self + } +} + +impl ArrayBacking for ParentRef<'_> { + #[inline] + fn array_ref(&self) -> &ArrayRef { + match self.data { + ParentData::Heap { array, .. } => array, + ParentData::Parts { + vtable, + data, + materialize, + .. + } => self + .cache + .get_or_init(|| materialize(vtable, data, self.dtype, self.len, self.slots)), + } + } +} + +/// Materializes stack-borrowed parts of encoding `V` into an owned [`ArrayRef`]. +/// +/// Used as the function pointer stored inside [`ParentData::Parts`]. The +/// `vtable`/`data` arguments are `&V` and `&V::TypedArrayData` erased to `&dyn Any`; +/// they are downcast and cloned into a fresh `ArrayParts` which is then turned +/// into an `ArrayRef`. Validation is skipped: stack-borrowed parts were validated +/// when the originating `ArrayParts` was constructed. +fn materialize_parts( + vtable: &(dyn Any + Send + Sync), + data: &(dyn Any + Send + Sync), + dtype: &DType, + len: usize, + slots: &[Option], +) -> ArrayRef { + let vtable = vtable + .downcast_ref::() + .vortex_expect("ParentRef materialize: vtable type mismatch"); + let data = data + .downcast_ref::() + .vortex_expect("ParentRef materialize: data type mismatch"); + let slots: ArraySlots = slots.iter().cloned().collect(); + ArrayParts::new(vtable.clone(), dtype.clone(), len, data.clone()) + .with_slots(slots) + .into_array() +} + +/// Runs encoding `V`'s self-reduce rules against a (possibly stack-borrowed) parent. +/// +/// Used as the [`ReduceFn`] stored inside [`ParentData::Parts`]. Builds a stack-backed +/// [`ArrayView`] over the borrowed parts and dispatches to [`VTable::reduce`]; the view +/// only materializes if a rule reaches for an [`ArrayRef`]. +fn reduce_parts(parent: &ParentRef<'_>) -> VortexResult> { + let view = parent + .as_view::() + .vortex_expect("ParentRef reduce: encoding mismatch"); + V::reduce(view) +} + +#[cfg(test)] +mod tests { + use vortex_error::VortexResult; + + use super::ParentRef; + use crate::IntoArray; + use crate::arrays::BoolArray; + use crate::arrays::PrimitiveArray; + use crate::arrays::ScalarFnArray; + use crate::arrays::Slice; + use crate::arrays::SliceArray; + use crate::arrays::Struct; + use crate::assert_arrays_eq; + use crate::dtype::Nullability; + use crate::optimizer::ArrayOptimizer; + use crate::scalar_fn::ScalarFnVTableExt; + use crate::scalar_fn::fns::pack::Pack; + use crate::scalar_fn::fns::pack::PackOptions; + + #[test] + fn parts_parent_ref_exposes_array_view() -> VortexResult<()> { + let child = BoolArray::from_iter([true, false, true]).into_array(); + let parts = SliceArray::try_new_parts(child, 1..3)?; + let parent = ParentRef::from_parts(&parts); + + let view = parent + .as_opt::() + .expect("Slice parts should match a Slice array view"); + + assert_eq!(view.slice_range(), &(1..3)); + assert_eq!(view.len(), 2); + + Ok(()) + } + + #[test] + fn parts_parent_ref_array_method_materializes() -> VortexResult<()> { + let child = BoolArray::from_iter([true, false, true]).into_array(); + let parts = SliceArray::try_new_parts(child, 1..3)?; + let parent = ParentRef::from_parts(&parts); + + let view = parent + .as_opt::() + .expect("Slice parts should match a Slice array view"); + + // Reading metadata through the view does NOT force materialization. + assert_eq!(view.slice_range(), &(1..3)); + assert_eq!(view.len(), 2); + + // But calling array() DOES materialize. + let array_ref = view.array(); + assert_eq!(array_ref.len(), 2); + + Ok(()) + } + + /// Optimizing borrowed parts must produce the same array as materializing them and + /// calling [`ArrayRef::optimize`](crate::optimizer::ArrayOptimizer) — the two paths + /// differ only in whether the wrapper is heap-allocated. + /// + /// Regression test for [`ParentRef::optimize`] skipping the parent's own `reduce` + /// rules. A `Pack` scalar function collapses to a `StructArray` via the `ScalarFn` + /// encoding's self-`reduce`. No `reduce_parent` rule mirrors this, so the reduction is + /// only reachable through self-`reduce`: before `optimize` ran `reduce` first the stack + /// path returned the `ScalarFn` wrapper while materialize-then-optimize returned the + /// struct. + #[test] + fn optimize_matches_heap_path() -> VortexResult<()> { + let a = PrimitiveArray::from_iter([1i32, 2, 3]).into_array(); + let b = PrimitiveArray::from_iter([4i32, 5, 6]).into_array(); + let len = a.len(); + let pack = Pack.bind(PackOptions { + names: ["a", "b"].into(), + nullability: Nullability::NonNullable, + }); + + let heap = ScalarFnArray::try_new(pack.clone(), vec![a.clone(), b.clone()], len)? + .into_array() + .optimize()?; + let parts = ScalarFnArray::try_new_parts(pack, vec![a, b], len)?; + let stack = ParentRef::from_parts(&parts).optimize()?; + + assert!( + heap.is::(), + "heap path should collapse Pack to a struct" + ); + assert!( + stack.is::(), + "stack path should collapse Pack to a struct" + ); + assert_arrays_eq!(stack, heap); + + Ok(()) + } +} diff --git a/vortex-array/src/array/typed.rs b/vortex-array/src/array/typed.rs index 180f771bc9c..584b33dd021 100644 --- a/vortex-array/src/array/typed.rs +++ b/vortex-array/src/array/typed.rs @@ -70,6 +70,15 @@ impl ArrayParts { self.slots = slots; self } + + /// Materialize already-valid parts into an [`ArrayRef`] without attempting reduction. + /// + /// This intentionally skips vtable validation. Use + /// `Array::::try_from_parts(parts)?.into_array()` when constructing parts from unchecked + /// inputs. + pub fn into_array(self) -> ArrayRef { + unsafe { Array::::from_parts_unchecked(self).into_array() } + } } /// Shared bound for helpers that should work over both owned [`Array`] and borrowed @@ -79,14 +88,80 @@ impl ArrayParts { /// [`ArrayRef`] and the encoding-specific [`VTable::TypedArrayData`]. pub trait TypedArrayRef: AsRef + Deref { /// Returns an owned [`Array`] from the reference. + fn to_owned(&self) -> Array; + + fn is_empty(&self) -> bool; + + fn slots(&self) -> &[Option]; + + fn len(&self) -> usize; + + fn dtype(&self) -> &DType; + + fn validity(&self) -> VortexResult; +} + +impl TypedArrayRef for Array { fn to_owned(&self) -> Array { - self.as_ref().clone().downcast() + self.clone() + } + + #[allow(clippy::same_name_method)] + fn is_empty(&self) -> bool { + self.inner.is_empty() + } + + #[allow(clippy::same_name_method)] + fn slots(&self) -> &[Option] { + Array::::slots(self) + } + + #[allow(clippy::same_name_method)] + fn len(&self) -> usize { + Array::::len(self) + } + + #[allow(clippy::same_name_method)] + fn dtype(&self) -> &DType { + Array::::dtype(self) + } + + #[allow(clippy::same_name_method)] + fn validity(&self) -> VortexResult { + Array::::validity(self) } } -impl TypedArrayRef for Array {} +impl TypedArrayRef for ArrayView<'_, V> { + fn to_owned(&self) -> Array { + self.into_owned() + } + + #[allow(clippy::same_name_method)] + fn is_empty(&self) -> bool { + ArrayView::is_empty(self) + } + + #[allow(clippy::same_name_method)] + fn slots(&self) -> &[Option] { + ArrayView::slots(self) + } + + #[allow(clippy::same_name_method)] + fn len(&self) -> usize { + ArrayView::len(self) + } + + #[allow(clippy::same_name_method)] + fn dtype(&self) -> &DType { + ArrayView::dtype(self) + } -impl TypedArrayRef for ArrayView<'_, V> {} + #[allow(clippy::same_name_method)] + fn validity(&self) -> VortexResult { + ArrayView::validity(self) + } +} // ============================================================================= // ArrayData — the concrete type stored inside Arc // ============================================================================= @@ -245,16 +320,19 @@ impl Array { } /// Returns the dtype. + #[allow(clippy::same_name_method)] pub fn dtype(&self) -> &DType { self.inner.dtype() } /// Returns the length. + #[allow(clippy::same_name_method)] pub fn len(&self) -> usize { self.inner.len() } /// Returns whether this array is empty. + #[allow(clippy::same_name_method)] pub fn is_empty(&self) -> bool { self.inner.len() == 0 } @@ -313,6 +391,7 @@ impl Array { } /// Returns the array slots. + #[allow(clippy::same_name_method)] pub fn slots(&self) -> &[Option] { self.inner.slots() } @@ -373,6 +452,7 @@ impl Array { self.inner.take(indices) } + #[allow(clippy::same_name_method)] pub fn validity(&self) -> VortexResult { self.inner.validity() } diff --git a/vortex-array/src/array/view.rs b/vortex-array/src/array/view.rs index 969fd0484a3..239bd53e32b 100644 --- a/vortex-array/src/array/view.rs +++ b/vortex-array/src/array/view.rs @@ -9,16 +9,33 @@ use vortex_error::VortexResult; use crate::ArrayRef; use crate::array::Array; +use crate::array::ArrayBacking; use crate::array::ArrayId; +use crate::array::ParentRef; use crate::array::VTable; use crate::dtype::DType; use crate::stats::StatsSetRef; use crate::validity::Validity; -/// A lightweight, `Copy`-able typed view into an [`ArrayRef`]. +/// A lightweight, `Copy`-able typed view of an array. +/// +/// The view can be either *heap-backed* (sourced from an existing [`ArrayRef`]) or +/// *stack-backed* (sourced from borrowed [`ArrayParts`](crate::array::ArrayParts) +/// reachable through a [`ParentRef`]). Either way the hot accessors (`dtype`, `len`, +/// `slots`, `encoding_id`, `data`) are direct field reads — the cached metadata is +/// flattened into the view at construction. +/// +/// The cold [`Self::array`] path is the only place the heap/stack split surfaces; +/// it goes through a [`ArrayBacking`] trait object so stack-backed views can defer +/// materializing an `Arc>` until a downstream consumer actually needs +/// an [`ArrayRef`]. pub struct ArrayView<'a, V: VTable> { - array: &'a ArrayRef, data: &'a V::TypedArrayData, + dtype: &'a DType, + len: usize, + slots: &'a [Option], + encoding_id: ArrayId, + backing: &'a dyn ArrayBacking, } impl Copy for ArrayView<'_, V> {} @@ -30,58 +47,110 @@ impl Clone for ArrayView<'_, V> { } impl<'a, V: VTable> ArrayView<'a, V> { + /// Construct a heap-backed view. + /// /// # Safety /// Caller must ensure `data` is the `V::TypedArrayData` stored inside `array`. pub(crate) unsafe fn new_unchecked(array: &'a ArrayRef, data: &'a V::TypedArrayData) -> Self { debug_assert!(array.is::()); - Self { array, data } + let inner = array.inner(); + Self { + data, + dtype: &inner.dtype, + len: inner.len, + slots: &inner.slots, + encoding_id: inner.encoding_id, + backing: array, + } } + /// Construct a stack-backed view borrowing parts through `parent`. + /// + /// # Safety + /// Caller must ensure `parent.is_encoding::()` and that `data` is the + /// `V::TypedArrayData` borrowed inside `parent`. + pub(crate) unsafe fn new_from_parent( + parent: &'a ParentRef<'a>, + data: &'a V::TypedArrayData, + ) -> Self { + debug_assert!(parent.is_encoding::()); + Self { + data, + dtype: parent.dtype(), + len: parent.len(), + slots: parent.slots(), + encoding_id: parent.encoding_id(), + backing: parent, + } + } + + /// Returns the underlying [`ArrayRef`], materializing stack parts on first call. + /// + /// For heap-backed views this is a cheap reference return. Stack-backed views + /// build an `Arc>` and cache it on the parent. + #[inline] pub fn array(&self) -> &'a ArrayRef { - self.array + self.backing.array_ref() } + #[inline] pub fn data(&self) -> &'a V::TypedArrayData { self.data } + #[inline] + #[allow(clippy::same_name_method)] pub fn slots(&self) -> &'a [Option] { - self.array.slots() + self.slots } - pub fn dtype(&self) -> &DType { - self.array.dtype() + #[inline] + #[allow(clippy::same_name_method)] + pub fn dtype(&self) -> &'a DType { + self.dtype } + #[inline] + #[allow(clippy::same_name_method)] pub fn len(&self) -> usize { - self.array.len() + self.len } + #[inline] + #[allow(clippy::same_name_method)] pub fn is_empty(&self) -> bool { - self.array.len() == 0 + self.len == 0 } + #[inline] pub fn encoding_id(&self) -> ArrayId { - self.array.encoding_id() + self.encoding_id } + /// Returns the array's statistics. Forces stack-backed views to materialize. pub fn statistics(&self) -> StatsSetRef<'_> { - self.array.statistics() + self.array().statistics() } + /// Returns the array's validity. Forces stack-backed views to materialize. + #[allow(clippy::same_name_method)] pub fn validity(&self) -> VortexResult { - self.array.validity() + self.array().validity() } + /// Returns an owned typed handle. Forces stack-backed views to materialize. pub fn into_owned(self) -> Array { // SAFETY: we are ourselves type checked as 'V' - unsafe { Array::::from_array_ref_unchecked(self.array.clone()) } + unsafe { Array::::from_array_ref_unchecked(self.array().clone()) } } } impl AsRef for ArrayView<'_, V> { fn as_ref(&self) -> &ArrayRef { - self.array + // For heap-backed views this returns the borrowed `ArrayRef` directly. For + // stack-backed views, materialization runs once and the cached `ArrayRef` + // lives as long as the parent. + self.array() } } @@ -96,9 +165,9 @@ impl Deref for ArrayView<'_, V> { impl Debug for ArrayView<'_, V> { fn fmt(&self, f: &mut Formatter<'_>) -> std::fmt::Result { f.debug_struct("ArrayView") - .field("encoding", &self.array.encoding_id()) - .field("dtype", self.array.dtype()) - .field("len", &self.array.len()) + .field("encoding", &self.encoding_id()) + .field("dtype", self.dtype()) + .field("len", &self.len()) .finish() } } diff --git a/vortex-array/src/array/vtable/mod.rs b/vortex-array/src/array/vtable/mod.rs index 6c85bb79d05..00aef6d9c0a 100644 --- a/vortex-array/src/array/vtable/mod.rs +++ b/vortex-array/src/array/vtable/mod.rs @@ -25,6 +25,7 @@ use crate::Canonical; use crate::ExecutionResult; use crate::IntoArray; use crate::Precision; +use crate::array::ParentRef; pub use crate::array::plugin::*; use crate::arrays::ConstantArray; use crate::arrays::constant::Constant; @@ -96,11 +97,11 @@ pub trait VTable: 'static + Clone + Sized + Send + Sync + Debug { /// /// # Panics /// Panics if `idx >= nchildren(array)`. - fn child(array: ArrayView<'_, Self>, idx: usize) -> ArrayRef { + fn child(array: ArrayView<'_, Self>, idx: usize) -> &ArrayRef { array .slots() .iter() - .filter_map(|s| s.clone()) + .filter_map(|s| s.as_ref()) .nth(idx) .vortex_expect("child index out of bounds") } @@ -205,7 +206,7 @@ pub trait VTable: 'static + Clone + Sized + Send + Sync + Debug { /// Attempt to perform a reduction of the parent of this array. fn reduce_parent( array: ArrayView<'_, Self>, - parent: &ArrayRef, + parent: &ParentRef<'_>, child_idx: usize, ) -> VortexResult> { _ = (array, parent, child_idx); diff --git a/vortex-array/src/arrays/bool/array.rs b/vortex-array/src/arrays/bool/array.rs index 6585e705899..0e9ec94a9e7 100644 --- a/vortex-array/src/arrays/bool/array.rs +++ b/vortex-array/src/arrays/bool/array.rs @@ -86,26 +86,23 @@ pub struct BoolDataParts { pub trait BoolArrayExt: TypedArrayRef { fn nullability(&self) -> crate::dtype::Nullability { - match self.as_ref().dtype() { + match self.dtype() { DType::Bool(nullability) => *nullability, _ => unreachable!("BoolArrayExt requires a bool dtype"), } } fn validity(&self) -> Validity { - child_to_validity( - self.as_ref().slots()[VALIDITY_SLOT].as_ref(), - self.nullability(), - ) + child_to_validity(self.slots()[VALIDITY_SLOT].as_ref(), self.nullability()) } fn to_bit_buffer(&self) -> BitBuffer { let buffer = self.bits.as_host().clone(); - BitBuffer::new_with_offset(buffer, self.as_ref().len(), self.offset) + BitBuffer::new_with_offset(buffer, self.len(), self.offset) } fn maybe_execute_mask(&self, ctx: &mut ExecutionCtx) -> VortexResult> { - let all_valid = match &self.validity() { + let all_valid = match &BoolArrayExt::validity(self) { Validity::NonNullable | Validity::AllValid => true, Validity::AllInvalid => false, Validity::Array(a) => a.statistics().compute_min::(ctx).unwrap_or(false), @@ -120,13 +117,12 @@ pub trait BoolArrayExt: TypedArrayRef { } fn to_mask_fill_null_false(&self, ctx: &mut ExecutionCtx) -> Mask { - let validity_mask = self - .validity() - .execute_mask(self.as_ref().len(), ctx) + let validity_mask = BoolArrayExt::validity(self) + .execute_mask(self.len(), ctx) .vortex_expect("Failed to compute validity mask"); let buffer = match validity_mask { Mask::AllTrue(_) => self.to_bit_buffer(), - Mask::AllFalse(_) => return Mask::new_false(self.as_ref().len()), + Mask::AllFalse(_) => return Mask::new_false(self.len()), Mask::Values(validity) => validity.bit_buffer() & self.to_bit_buffer(), }; Mask::from_buffer(buffer) diff --git a/vortex-array/src/arrays/bool/test_harness.rs b/vortex-array/src/arrays/bool/test_harness.rs index 4e13d62e3ef..efcd95c5b05 100644 --- a/vortex-array/src/arrays/bool/test_harness.rs +++ b/vortex-array/src/arrays/bool/test_harness.rs @@ -13,10 +13,7 @@ impl BoolArray { pub fn opt_bool_vec(&self) -> Vec> { self.validity() .vortex_expect("failed to get validity") - .execute_mask( - self.as_ref().len(), - &mut LEGACY_SESSION.create_execution_ctx(), - ) + .execute_mask(self.len(), &mut LEGACY_SESSION.create_execution_ctx()) .vortex_expect("Failed to compute validity mask") .to_bit_buffer() .iter() @@ -28,10 +25,7 @@ impl BoolArray { pub fn bool_vec(&self) -> Vec { self.validity() .vortex_expect("failed to get validity") - .execute_mask( - self.as_ref().len(), - &mut LEGACY_SESSION.create_execution_ctx(), - ) + .execute_mask(self.len(), &mut LEGACY_SESSION.create_execution_ctx()) .vortex_expect("Failed to compute validity mask") .to_bit_buffer() .iter() diff --git a/vortex-array/src/arrays/bool/vtable/mod.rs b/vortex-array/src/arrays/bool/vtable/mod.rs index 35261fa8d89..9cfa80ad4ca 100644 --- a/vortex-array/src/arrays/bool/vtable/mod.rs +++ b/vortex-array/src/arrays/bool/vtable/mod.rs @@ -19,6 +19,7 @@ use crate::ExecutionResult; use crate::array::Array; use crate::array::ArrayParts; use crate::array::ArrayView; +use crate::array::ParentRef; use crate::array::VTable; use crate::array::child_to_validity; use crate::arrays::bool::BoolData; @@ -184,7 +185,7 @@ impl VTable for Bool { fn reduce_parent( array: ArrayView<'_, Self>, - parent: &ArrayRef, + parent: &ParentRef<'_>, child_idx: usize, ) -> VortexResult> { RULES.evaluate(array, parent, child_idx) diff --git a/vortex-array/src/arrays/chunked/array.rs b/vortex-array/src/arrays/chunked/array.rs index a0ed3f98d67..47747607907 100644 --- a/vortex-array/src/arrays/chunked/array.rs +++ b/vortex-array/src/arrays/chunked/array.rs @@ -51,24 +51,24 @@ impl Display for ChunkedData { pub trait ChunkedArrayExt: TypedArrayRef { fn chunk_offsets_array(&self) -> &ArrayRef { - self.as_ref().slots()[CHUNK_OFFSETS_SLOT] + self.slots()[CHUNK_OFFSETS_SLOT] .as_ref() .vortex_expect("validated chunk offsets slot") } fn nchunks(&self) -> usize { - self.as_ref().slots().len().saturating_sub(CHUNKS_OFFSET) + self.slots().len().saturating_sub(CHUNKS_OFFSET) } fn chunk(&self, idx: usize) -> &ArrayRef { - self.as_ref().slots()[CHUNKS_OFFSET + idx] + self.slots()[CHUNKS_OFFSET + idx] .as_ref() .vortex_expect("validated chunk slot") } fn iter_chunks<'a>(&'a self) -> Box + 'a> { Box::new( - self.as_ref().slots()[CHUNKS_OFFSET..] + self.slots()[CHUNKS_OFFSET..] .iter() .map(|slot| slot.as_ref().vortex_expect("validated chunk slot")), ) @@ -87,10 +87,7 @@ pub trait ChunkedArrayExt: TypedArrayRef { } fn find_chunk_idx(&self, index: usize) -> VortexResult<(usize, usize)> { - assert!( - index <= self.as_ref().len(), - "Index out of bounds of the array" - ); + assert!(index <= self.len(), "Index out of bounds of the array"); let chunk_offsets = self.chunk_offsets(); let index_chunk = chunk_offsets .search_sorted(&index, SearchSortedSide::Right)? @@ -103,14 +100,14 @@ pub trait ChunkedArrayExt: TypedArrayRef { fn array_iterator(&self) -> impl ArrayIterator + '_ { ArrayIteratorAdapter::new( - self.as_ref().dtype().clone(), + self.dtype().clone(), self.iter_chunks().map(|chunk| Ok(chunk.clone())), ) } fn array_stream(&self) -> impl ArrayStream + '_ { ArrayStreamAdapter::new( - self.as_ref().dtype().clone(), + self.dtype().clone(), stream::iter(self.iter_chunks().map(|chunk| Ok(chunk.clone()))), ) } diff --git a/vortex-array/src/arrays/chunked/compute/rules.rs b/vortex-array/src/arrays/chunked/compute/rules.rs index d8d324a8e86..ba2311cdcb9 100644 --- a/vortex-array/src/arrays/chunked/compute/rules.rs +++ b/vortex-array/src/arrays/chunked/compute/rules.rs @@ -16,7 +16,6 @@ use crate::arrays::ScalarFnArray; use crate::arrays::chunked::ChunkedArrayExt; use crate::arrays::scalar_fn::AnyScalarFn; use crate::arrays::scalar_fn::ScalarFnArrayExt; -use crate::optimizer::ArrayOptimizer; use crate::optimizer::rules::ArrayParentReduceRule; use crate::optimizer::rules::ParentRuleSet; use crate::scalar_fn::fns::cast::CastReduceAdaptor; @@ -48,13 +47,12 @@ impl ArrayParentReduceRule for ChunkedUnaryScalarFnPushDownRule { let new_chunks: Vec<_> = array .iter_chunks() .map(|chunk| { - ScalarFnArray::try_new( + let parts = ScalarFnArray::try_new_parts( parent.scalar_fn().clone(), vec![chunk.clone()], chunk.len(), - )? - .into_array() - .optimize() + )?; + parts.optimize() }) .try_collect()?; @@ -104,9 +102,12 @@ impl ArrayParentReduceRule for ChunkedConstantScalarFnPushDownRule { }) .collect(); - ScalarFnArray::try_new(parent.scalar_fn().clone(), new_children, chunk.len())? - .into_array() - .optimize() + let parts = ScalarFnArray::try_new_parts( + parent.scalar_fn().clone(), + new_children, + chunk.len(), + )?; + parts.optimize() }) .try_collect()?; diff --git a/vortex-array/src/arrays/chunked/paired_chunks.rs b/vortex-array/src/arrays/chunked/paired_chunks.rs index 2145c88dbbd..6821bf66195 100644 --- a/vortex-array/src/arrays/chunked/paired_chunks.rs +++ b/vortex-array/src/arrays/chunked/paired_chunks.rs @@ -70,15 +70,15 @@ pub(crate) struct PairedChunks { pub(crate) trait PairedChunksExt: ChunkedArrayExt { fn paired_chunks(&self, other: &T) -> PairedChunks { assert_eq!( - self.as_ref().len(), - other.as_ref().len(), + self.len(), + other.len(), "paired_chunks requires arrays of equal length" ); PairedChunks { left: ChunkCursor::new(self.chunks()), right: ChunkCursor::new(other.chunks()), pos: 0, - total_len: self.as_ref().len(), + total_len: self.len(), } } } diff --git a/vortex-array/src/arrays/chunked/vtable/mod.rs b/vortex-array/src/arrays/chunked/vtable/mod.rs index 68679a4c3b5..e8068385ffa 100644 --- a/vortex-array/src/arrays/chunked/vtable/mod.rs +++ b/vortex-array/src/arrays/chunked/vtable/mod.rs @@ -28,6 +28,7 @@ use crate::array::Array; use crate::array::ArrayId; use crate::array::ArrayParts; use crate::array::ArrayView; +use crate::array::ParentRef; use crate::array::VTable; use crate::arrays::chunked::ChunkedArrayExt; use crate::arrays::chunked::ChunkedData; @@ -72,6 +73,7 @@ impl VTable for Chunked { type OperationsVTable = Self; type ValidityVTable = Self; + fn id(&self) -> ArrayId { static ID: CachedId = CachedId::new("vortex.chunked"); *ID @@ -282,7 +284,7 @@ impl VTable for Chunked { fn reduce_parent( array: ArrayView<'_, Self>, - parent: &ArrayRef, + parent: &ParentRef<'_>, child_idx: usize, ) -> VortexResult> { PARENT_RULES.evaluate(array, parent, child_idx) diff --git a/vortex-array/src/arrays/constant/vtable/mod.rs b/vortex-array/src/arrays/constant/vtable/mod.rs index c130c28a95a..5ec1e3d9ef4 100644 --- a/vortex-array/src/arrays/constant/vtable/mod.rs +++ b/vortex-array/src/arrays/constant/vtable/mod.rs @@ -23,6 +23,7 @@ use crate::Precision; use crate::array::Array; use crate::array::ArrayId; use crate::array::ArrayView; +use crate::array::ParentRef; use crate::array::VTable; use crate::arrays::constant::ConstantData; use crate::arrays::constant::compute::rules::PARENT_RULES; @@ -154,7 +155,7 @@ impl VTable for Constant { fn reduce_parent( array: ArrayView<'_, Self>, - parent: &ArrayRef, + parent: &ParentRef<'_>, child_idx: usize, ) -> VortexResult> { PARENT_RULES.evaluate(array, parent, child_idx) diff --git a/vortex-array/src/arrays/decimal/array.rs b/vortex-array/src/arrays/decimal/array.rs index 1f4d6c0c277..88e594ed04f 100644 --- a/vortex-array/src/arrays/decimal/array.rs +++ b/vortex-array/src/arrays/decimal/array.rs @@ -130,28 +130,25 @@ pub struct DecimalDataParts { pub trait DecimalArrayExt: TypedArrayRef { fn decimal_dtype(&self) -> DecimalDType { - match self.as_ref().dtype() { + match self.dtype() { DType::Decimal(decimal_dtype, _) => *decimal_dtype, _ => unreachable!("DecimalArrayExt requires a decimal dtype"), } } fn nullability(&self) -> Nullability { - match self.as_ref().dtype() { + match self.dtype() { DType::Decimal(_, nullability) => *nullability, _ => unreachable!("DecimalArrayExt requires a decimal dtype"), } } fn validity_child(&self) -> Option<&ArrayRef> { - self.as_ref().slots()[VALIDITY_SLOT].as_ref() + self.slots()[VALIDITY_SLOT].as_ref() } fn validity(&self) -> Validity { - child_to_validity( - self.as_ref().slots()[VALIDITY_SLOT].as_ref(), - self.nullability(), - ) + child_to_validity(self.slots()[VALIDITY_SLOT].as_ref(), self.nullability()) } fn values_type(&self) -> DecimalType { diff --git a/vortex-array/src/arrays/decimal/vtable/mod.rs b/vortex-array/src/arrays/decimal/vtable/mod.rs index dac24ecd95f..2351137b63a 100644 --- a/vortex-array/src/arrays/decimal/vtable/mod.rs +++ b/vortex-array/src/arrays/decimal/vtable/mod.rs @@ -17,6 +17,7 @@ use crate::ExecutionCtx; use crate::ExecutionResult; use crate::array::Array; use crate::array::ArrayView; +use crate::array::ParentRef; use crate::array::VTable; use crate::arrays::decimal::DecimalData; use crate::buffer::BufferHandle; @@ -187,7 +188,7 @@ impl VTable for Decimal { fn reduce_parent( array: ArrayView<'_, Self>, - parent: &ArrayRef, + parent: &ParentRef<'_>, child_idx: usize, ) -> VortexResult> { RULES.evaluate(array, parent, child_idx) diff --git a/vortex-array/src/arrays/dict/array.rs b/vortex-array/src/arrays/dict/array.rs index 10108191744..2003f31619a 100644 --- a/vortex-array/src/arrays/dict/array.rs +++ b/vortex-array/src/arrays/dict/array.rs @@ -233,17 +233,51 @@ impl Array { /// Build a new `DictArray` from its components, `codes` and `values`. pub fn try_new(codes: ArrayRef, values: ArrayRef) -> VortexResult { + Array::try_from_parts(Self::try_new_parts(codes, values)?) + } + + /// Build the [`ArrayParts`]. The parts can then be optimized through + /// [`ParentRef::optimize`](crate::array::ParentRef::optimize) or materialized + /// directly with [`ArrayParts::into_array`]. + pub fn try_new_parts(codes: ArrayRef, values: ArrayRef) -> VortexResult> { let dtype = values .dtype() .union_nullability(codes.dtype().nullability()); let len = codes.len(); let data = DictData::try_new(codes.dtype())?; - Array::try_from_parts( + Ok( ArrayParts::new(Dict, dtype, len, data) .with_slots(smallvec![Some(codes), Some(values)]), ) } + /// Build the [`ArrayParts`] without validating codes or values, recording whether + /// all values are referenced by at least one code. + /// + /// The parts can then be optimized through + /// [`ParentRef::optimize`](crate::array::ParentRef::optimize) or materialized directly + /// with [`ArrayParts::into_array`]. Unlike + /// [`set_all_values_referenced`](Self::set_all_values_referenced), this does not run the + /// debug-only `all_values_referenced` validation, so it is intended for callers that + /// have externally guaranteed the flag (for example a layout validated at write time). + /// + /// # Safety + /// + /// See [`DictData::new_unchecked`] and [`DictData::set_all_values_referenced`]. + pub unsafe fn new_unchecked_parts( + codes: ArrayRef, + values: ArrayRef, + all_values_referenced: bool, + ) -> ArrayParts { + let dtype = values + .dtype() + .union_nullability(codes.dtype().nullability()); + let len = codes.len(); + let data = + unsafe { DictData::new_unchecked().set_all_values_referenced(all_values_referenced) }; + ArrayParts::new(Dict, dtype, len, data).with_slots(smallvec![Some(codes), Some(values)]) + } + /// Build a new `DictArray` without validating the codes or values. /// /// # Safety @@ -293,6 +327,8 @@ impl Array { #[cfg(test)] mod test { + use std::sync::LazyLock; + use rand::RngExt; use rand::SeedableRng; use rand::distr::Distribution; @@ -304,12 +340,10 @@ mod test { use vortex_error::VortexResult; use vortex_error::vortex_panic; use vortex_mask::AllOr; + use vortex_session::VortexSession; use crate::ArrayRef; use crate::IntoArray; - use crate::LEGACY_SESSION; - #[expect(deprecated)] - use crate::ToCanonical as _; use crate::VortexSessionExecute; use crate::arrays::ChunkedArray; use crate::arrays::DictArray; @@ -321,8 +355,12 @@ mod test { use crate::dtype::Nullability::NonNullable; use crate::dtype::PType; use crate::dtype::UnsignedPType; + use crate::session::ArraySession; use crate::validity::Validity; + static SESSION: LazyLock = + LazyLock::new(|| VortexSession::empty().with::()); + #[test] fn nullable_codes_validity() { let dict = DictArray::try_new( @@ -338,10 +376,7 @@ mod test { .as_ref() .validity() .unwrap() - .execute_mask( - dict.as_ref().len(), - &mut LEGACY_SESSION.create_execution_ctx(), - ) + .execute_mask(dict.as_ref().len(), &mut SESSION.create_execution_ctx()) .unwrap(); let AllOr::Some(indices) = mask.indices() else { vortex_panic!("Expected indices from mask") @@ -364,10 +399,7 @@ mod test { .as_ref() .validity() .unwrap() - .execute_mask( - dict.as_ref().len(), - &mut LEGACY_SESSION.create_execution_ctx(), - ) + .execute_mask(dict.as_ref().len(), &mut SESSION.create_execution_ctx()) .unwrap(); let AllOr::Some(indices) = mask.indices() else { vortex_panic!("Expected indices from mask") @@ -394,10 +426,7 @@ mod test { .as_ref() .validity() .unwrap() - .execute_mask( - dict.as_ref().len(), - &mut LEGACY_SESSION.create_execution_ctx(), - ) + .execute_mask(dict.as_ref().len(), &mut SESSION.create_execution_ctx()) .unwrap(); let AllOr::Some(indices) = mask.indices() else { vortex_panic!("Expected indices from mask") @@ -420,10 +449,7 @@ mod test { .as_ref() .validity() .unwrap() - .execute_mask( - dict.as_ref().len(), - &mut LEGACY_SESSION.create_execution_ctx(), - ) + .execute_mask(dict.as_ref().len(), &mut SESSION.create_execution_ctx()) .unwrap(); let AllOr::Some(indices) = mask.indices() else { vortex_panic!("Expected indices from mask") @@ -470,10 +496,9 @@ mod test { &DType::Primitive(PType::U64, NonNullable), len * chunk_count, ); - array.append_to_builder(builder.as_mut(), &mut LEGACY_SESSION.create_execution_ctx())?; + array.append_to_builder(builder.as_mut(), &mut SESSION.create_execution_ctx())?; - #[expect(deprecated)] - let into_prim = array.to_primitive(); + let into_prim = array.execute::(&mut SESSION.create_execution_ctx())?; let prim_into = builder.finish_into_canonical().into_primitive(); assert_arrays_eq!(into_prim, prim_into); diff --git a/vortex-array/src/arrays/dict/compute/like.rs b/vortex-array/src/arrays/dict/compute/like.rs index 40bcea8fcf1..76ca090362d 100644 --- a/vortex-array/src/arrays/dict/compute/like.rs +++ b/vortex-array/src/arrays/dict/compute/like.rs @@ -12,7 +12,6 @@ use crate::arrays::ConstantArray; use crate::arrays::dict::DictArrayExt; use crate::arrays::dict::DictArraySlotsExt; use crate::arrays::scalar_fn::ScalarFnFactoryExt; -use crate::optimizer::ArrayOptimizer; use crate::scalar_fn::fns::like::Like; use crate::scalar_fn::fns::like::LikeOptions; use crate::scalar_fn::fns::like::LikeReduce; @@ -30,9 +29,12 @@ impl LikeReduce for Dict { if let Some(pattern) = pattern.as_constant() { let pattern = ConstantArray::new(pattern, array.values().len()).into_array(); - let values = Like - .try_new_array(pattern.len(), options, [array.values().clone(), pattern])? - .optimize()?; + let parts = Like.try_new_array_parts( + pattern.len(), + options, + [array.values().clone(), pattern], + )?; + let values = parts.optimize()?; // SAFETY: LIKE preserves the len of the values, so codes are still pointing at // valid positions. diff --git a/vortex-array/src/arrays/dict/compute/rules.rs b/vortex-array/src/arrays/dict/compute/rules.rs index f6fe816a6cc..aadbd09b002 100644 --- a/vortex-array/src/arrays/dict/compute/rules.rs +++ b/vortex-array/src/arrays/dict/compute/rules.rs @@ -21,7 +21,6 @@ use crate::arrays::scalar_fn::AnyScalarFn; use crate::arrays::scalar_fn::ScalarFnArrayExt; use crate::arrays::slice::SliceReduceAdaptor; use crate::builtins::ArrayBuiltins; -use crate::optimizer::ArrayOptimizer; use crate::optimizer::rules::ArrayParentReduceRule; use crate::optimizer::rules::ParentRuleSet; use crate::scalar_fn::fns::cast::Cast; @@ -126,10 +125,9 @@ impl ArrayParentReduceRule for DictionaryScalarFnValuesPushDownRule { } } - let new_values = - ScalarFnArray::try_new(parent.scalar_fn().clone(), new_children, values_len)? - .into_array() - .optimize()?; + let parts = + ScalarFnArray::try_new_parts(parent.scalar_fn().clone(), new_children, values_len)?; + let new_values = parts.optimize()?; // We can only push down null-sensitive functions when we have all-valid codes. // In these cases, we cannot have the codes influence the nullability of the output DType. @@ -193,13 +191,12 @@ impl ArrayParentReduceRule for DictionaryScalarFnCodesPullUpRule { } } - let new_values = ScalarFnArray::try_new( + let parts = ScalarFnArray::try_new_parts( parent.scalar_fn().clone(), new_children, array.values().len(), - )? - .into_array() - .optimize()?; + )?; + let new_values = parts.optimize()?; let new_dict = unsafe { DictArray::new_unchecked(array.codes().clone(), new_values) }.into_array(); diff --git a/vortex-array/src/arrays/dict/take.rs b/vortex-array/src/arrays/dict/take.rs index b77bef19b39..391dc88c6d6 100644 --- a/vortex-array/src/arrays/dict/take.rs +++ b/vortex-array/src/arrays/dict/take.rs @@ -170,3 +170,27 @@ pub(crate) fn propagate_take_stats( ) }) } + +#[cfg(test)] +mod tests { + use vortex_error::VortexResult; + + use crate::IntoArray; + use crate::arrays::Constant; + use crate::arrays::ConstantArray; + use crate::arrays::DictArray; + use crate::arrays::PrimitiveArray; + + #[test] + fn reduce_adaptor_handles_stack_backed_dict_parent() -> VortexResult<()> { + let indices = PrimitiveArray::from_iter([0u32, 0, 0]).into_array(); + let values = ConstantArray::new(7i32, 1).into_array(); + let parts = DictArray::try_new_parts(indices, values)?; + + let reduced = parts.optimize()?; + + assert!(reduced.is::()); + assert_eq!(reduced.len(), 3); + Ok(()) + } +} diff --git a/vortex-array/src/arrays/dict/vtable/mod.rs b/vortex-array/src/arrays/dict/vtable/mod.rs index 33db223de72..8ed45c501c3 100644 --- a/vortex-array/src/arrays/dict/vtable/mod.rs +++ b/vortex-array/src/arrays/dict/vtable/mod.rs @@ -30,6 +30,7 @@ use crate::array::Array; use crate::array::ArrayId; use crate::array::ArrayParts; use crate::array::ArrayView; +use crate::array::ParentRef; use crate::array::VTable; use crate::arrays::ConstantArray; use crate::arrays::Primitive; @@ -199,7 +200,7 @@ impl VTable for Dict { fn reduce_parent( array: ArrayView<'_, Self>, - parent: &ArrayRef, + parent: &ParentRef<'_>, child_idx: usize, ) -> VortexResult> { PARENT_RULES.evaluate(array, parent, child_idx) diff --git a/vortex-array/src/arrays/extension/array.rs b/vortex-array/src/arrays/extension/array.rs index 0fd57030cdd..8cb376a5311 100644 --- a/vortex-array/src/arrays/extension/array.rs +++ b/vortex-array/src/arrays/extension/array.rs @@ -24,14 +24,13 @@ pub(super) const SLOT_NAMES: [&str; NUM_SLOTS] = ["storage"]; pub trait ExtensionArrayExt: TypedArrayRef { fn ext_dtype(&self) -> &ExtDTypeRef { - self.as_ref() - .dtype() + self.dtype() .as_extension_opt() .vortex_expect("extension array somehow did not have an extension dtype") } fn storage_array(&self) -> &ArrayRef { - self.as_ref().slots()[STORAGE_SLOT] + self.slots()[STORAGE_SLOT] .as_ref() .vortex_expect("ExtensionArray storage slot") } diff --git a/vortex-array/src/arrays/extension/vtable/mod.rs b/vortex-array/src/arrays/extension/vtable/mod.rs index 852593abddb..f8fbf547687 100644 --- a/vortex-array/src/arrays/extension/vtable/mod.rs +++ b/vortex-array/src/arrays/extension/vtable/mod.rs @@ -20,6 +20,7 @@ use crate::array::Array; use crate::array::ArrayId; use crate::array::ArrayParts; use crate::array::ArrayView; +use crate::array::ParentRef; use crate::array::VTable; use crate::array::ValidityVTableFromChild; use crate::arrays::extension::array::SLOT_NAMES; @@ -191,7 +192,7 @@ impl VTable for Extension { fn reduce_parent( array: ArrayView<'_, Self>, - parent: &ArrayRef, + parent: &ParentRef<'_>, child_idx: usize, ) -> VortexResult> { PARENT_RULES.evaluate(array, parent, child_idx) diff --git a/vortex-array/src/arrays/filter/array.rs b/vortex-array/src/arrays/filter/array.rs index dd811ad24a2..6b4d494f36f 100644 --- a/vortex-array/src/arrays/filter/array.rs +++ b/vortex-array/src/arrays/filter/array.rs @@ -44,7 +44,7 @@ pub struct FilterDataParts { pub trait FilterArrayExt: TypedArrayRef { fn child(&self) -> &ArrayRef { - self.as_ref().slots()[CHILD_SLOT] + self.slots()[CHILD_SLOT] .as_ref() .vortex_expect("validated filter child slot") } @@ -52,7 +52,7 @@ pub trait FilterArrayExt: TypedArrayRef { impl> FilterArrayExt for T {} impl FilterData { - pub fn new(mask: Mask) -> Self { + fn new(mask: Mask) -> Self { Self { mask } } @@ -103,13 +103,16 @@ impl Array { /// Constructs a new `FilterArray`. pub fn try_new(array: ArrayRef, mask: Mask) -> VortexResult { + Ok(unsafe { Array::from_parts_unchecked(Self::try_new_parts(array, mask)?) }) + } + + /// Builds the [`ArrayParts`]. The parts can then be optimized through + /// [`ParentRef::optimize`](crate::array::ParentRef::optimize) or materialized + /// directly with [`ArrayParts::into_array`]. + pub fn try_new_parts(array: ArrayRef, mask: Mask) -> VortexResult> { let dtype = array.dtype().clone(); let len = mask.true_count(); let data = FilterData::try_new(array.len(), mask)?; - Ok(unsafe { - Array::from_parts_unchecked( - ArrayParts::new(Filter, dtype, len, data).with_slots(smallvec![Some(array)]), - ) - }) + Ok(ArrayParts::new(Filter, dtype, len, data).with_slots(smallvec![Some(array)])) } } diff --git a/vortex-array/src/arrays/filter/kernel.rs b/vortex-array/src/arrays/filter/kernel.rs index 8c65fa99724..3cdbdcd6a7a 100644 --- a/vortex-array/src/arrays/filter/kernel.rs +++ b/vortex-array/src/arrays/filter/kernel.rs @@ -130,3 +130,26 @@ where ::filter(array, parent.filter_mask(), ctx) } } + +#[cfg(test)] +mod tests { + use vortex_error::VortexResult; + use vortex_mask::Mask; + + use crate::IntoArray; + use crate::arrays::Constant; + use crate::arrays::ConstantArray; + use crate::arrays::FilterArray; + + #[test] + fn reduce_adaptor_handles_stack_backed_filter_parent() -> VortexResult<()> { + let child = ConstantArray::new(7i32, 4).into_array(); + let parts = FilterArray::try_new_parts(child, Mask::from_iter([true, false, true, false]))?; + + let reduced = parts.optimize()?; + + assert!(reduced.is::()); + assert_eq!(reduced.len(), 2); + Ok(()) + } +} diff --git a/vortex-array/src/arrays/filter/vtable.rs b/vortex-array/src/arrays/filter/vtable.rs index 2361c541997..e9b1a69907c 100644 --- a/vortex-array/src/arrays/filter/vtable.rs +++ b/vortex-array/src/arrays/filter/vtable.rs @@ -24,6 +24,7 @@ use crate::array::Array; use crate::array::ArrayId; use crate::array::ArrayView; use crate::array::OperationsVTable; +use crate::array::ParentRef; use crate::array::VTable; use crate::array::ValidityVTable; use crate::arrays::filter::FilterArrayExt; @@ -66,6 +67,7 @@ impl VTable for Filter { type TypedArrayData = FilterData; type OperationsVTable = Self; type ValidityVTable = Self; + fn id(&self) -> ArrayId { static ID: CachedId = CachedId::new("vortex.filter"); *ID @@ -165,7 +167,7 @@ impl VTable for Filter { fn reduce_parent( array: ArrayView<'_, Self>, - parent: &ArrayRef, + parent: &ParentRef<'_>, child_idx: usize, ) -> VortexResult> { PARENT_RULES.evaluate(array, parent, child_idx) diff --git a/vortex-array/src/arrays/fixed_size_list/array.rs b/vortex-array/src/arrays/fixed_size_list/array.rs index cc82551ab82..8460b2db11c 100644 --- a/vortex-array/src/arrays/fixed_size_list/array.rs +++ b/vortex-array/src/arrays/fixed_size_list/array.rs @@ -205,7 +205,7 @@ impl FixedSizeListData { pub trait FixedSizeListArrayExt: TypedArrayRef { fn dtype_parts(&self) -> (&DType, u32, crate::dtype::Nullability) { - match self.as_ref().dtype() { + match self.dtype() { DType::FixedSizeList(element_dtype, list_size, nullability) => { (element_dtype.as_ref(), *list_size, *nullability) } @@ -214,7 +214,7 @@ pub trait FixedSizeListArrayExt: TypedArrayRef { } fn elements(&self) -> &ArrayRef { - self.as_ref().slots()[ELEMENTS_SLOT] + self.slots()[ELEMENTS_SLOT] .as_ref() .vortex_expect("FixedSizeListArray elements slot") } @@ -226,15 +226,15 @@ pub trait FixedSizeListArrayExt: TypedArrayRef { fn fixed_size_list_validity(&self) -> Validity { let (_, _, nullability) = self.dtype_parts(); - child_to_validity(self.as_ref().slots()[VALIDITY_SLOT].as_ref(), nullability) + child_to_validity(self.slots()[VALIDITY_SLOT].as_ref(), nullability) } fn fixed_size_list_elements_at(&self, index: usize) -> VortexResult { debug_assert!( - index < self.as_ref().len(), + index < self.len(), "index {} out of bounds: the len is {}", index, - self.as_ref().len(), + self.len(), ); debug_assert!( self.fixed_size_list_validity() diff --git a/vortex-array/src/arrays/fixed_size_list/vtable/mod.rs b/vortex-array/src/arrays/fixed_size_list/vtable/mod.rs index ca38f3b777b..cc0e4215634 100644 --- a/vortex-array/src/arrays/fixed_size_list/vtable/mod.rs +++ b/vortex-array/src/arrays/fixed_size_list/vtable/mod.rs @@ -22,6 +22,7 @@ use crate::Precision; use crate::array::Array; use crate::array::ArrayId; use crate::array::ArrayView; +use crate::array::ParentRef; use crate::array::VTable; use crate::arrays::fixed_size_list::FixedSizeListData; use crate::arrays::fixed_size_list::array::ELEMENTS_SLOT; @@ -61,6 +62,7 @@ impl VTable for FixedSizeList { type OperationsVTable = Self; type ValidityVTable = Self; + fn id(&self) -> ArrayId { static ID: CachedId = CachedId::new("vortex.fixed_size_list"); *ID @@ -80,7 +82,7 @@ impl VTable for FixedSizeList { fn reduce_parent( array: ArrayView<'_, Self>, - parent: &ArrayRef, + parent: &ParentRef<'_>, child_idx: usize, ) -> VortexResult> { PARENT_RULES.evaluate(array, parent, child_idx) diff --git a/vortex-array/src/arrays/list/array.rs b/vortex-array/src/arrays/list/array.rs index 3e1c43ea3e9..3ca0c7ec1c9 100644 --- a/vortex-array/src/arrays/list/array.rs +++ b/vortex-array/src/arrays/list/array.rs @@ -268,36 +268,33 @@ impl ListData { pub trait ListArrayExt: TypedArrayRef { fn nullability(&self) -> crate::dtype::Nullability { - match self.as_ref().dtype() { + match self.dtype() { DType::List(_, nullability) => *nullability, _ => unreachable!("ListArrayExt requires a list dtype"), } } fn elements(&self) -> &ArrayRef { - self.as_ref().slots()[ELEMENTS_SLOT] + self.slots()[ELEMENTS_SLOT] .as_ref() .vortex_expect("ListArray elements slot") } fn offsets(&self) -> &ArrayRef { - self.as_ref().slots()[OFFSETS_SLOT] + self.slots()[OFFSETS_SLOT] .as_ref() .vortex_expect("ListArray offsets slot") } fn list_validity(&self) -> Validity { - child_to_validity( - self.as_ref().slots()[VALIDITY_SLOT].as_ref(), - self.nullability(), - ) + child_to_validity(self.slots()[VALIDITY_SLOT].as_ref(), self.nullability()) } fn offset_at(&self, index: usize) -> VortexResult { vortex_ensure!( - index <= self.as_ref().len(), + index <= self.len(), "Index {index} out of bounds 0..={}", - self.as_ref().len() + self.len() ); if let Some(p) = self.offsets().as_opt::() { @@ -321,7 +318,7 @@ pub trait ListArrayExt: TypedArrayRef { fn sliced_elements(&self) -> VortexResult { let start = self.offset_at(0)?; - let end = self.offset_at(self.as_ref().len())?; + let end = self.offset_at(self.len())?; self.elements().slice(start..end) } diff --git a/vortex-array/src/arrays/list/vtable/mod.rs b/vortex-array/src/arrays/list/vtable/mod.rs index 1b404e9edbc..02be610e173 100644 --- a/vortex-array/src/arrays/list/vtable/mod.rs +++ b/vortex-array/src/arrays/list/vtable/mod.rs @@ -24,6 +24,7 @@ use crate::array::Array; use crate::array::ArrayId; use crate::array::ArrayParts; use crate::array::ArrayView; +use crate::array::ParentRef; use crate::array::VTable; use crate::arrays::list::ListArrayExt; use crate::arrays::list::ListData; @@ -68,6 +69,7 @@ impl VTable for List { type OperationsVTable = Self; type ValidityVTable = Self; + fn id(&self) -> ArrayId { static ID: CachedId = CachedId::new("vortex.list"); *ID @@ -87,7 +89,7 @@ impl VTable for List { fn reduce_parent( array: ArrayView<'_, Self>, - parent: &ArrayRef, + parent: &ParentRef<'_>, child_idx: usize, ) -> VortexResult> { PARENT_RULES.evaluate(array, parent, child_idx) diff --git a/vortex-array/src/arrays/listview/array.rs b/vortex-array/src/arrays/listview/array.rs index 64e59d1687f..9655fbcd4ae 100644 --- a/vortex-array/src/arrays/listview/array.rs +++ b/vortex-array/src/arrays/listview/array.rs @@ -346,42 +346,39 @@ fn fill_referenced_mask( pub trait ListViewArrayExt: TypedArrayRef { fn nullability(&self) -> crate::dtype::Nullability { - match self.as_ref().dtype() { + match self.dtype() { DType::List(_, nullability) => *nullability, _ => unreachable!("ListViewArrayExt requires a list dtype"), } } fn elements(&self) -> &ArrayRef { - self.as_ref().slots()[ELEMENTS_SLOT] + self.slots()[ELEMENTS_SLOT] .as_ref() .vortex_expect("ListViewArray elements slot") } fn offsets(&self) -> &ArrayRef { - self.as_ref().slots()[OFFSETS_SLOT] + self.slots()[OFFSETS_SLOT] .as_ref() .vortex_expect("ListViewArray offsets slot") } fn sizes(&self) -> &ArrayRef { - self.as_ref().slots()[SIZES_SLOT] + self.slots()[SIZES_SLOT] .as_ref() .vortex_expect("ListViewArray sizes slot") } fn listview_validity(&self) -> Validity { - child_to_validity( - self.as_ref().slots()[VALIDITY_SLOT].as_ref(), - self.nullability(), - ) + child_to_validity(self.slots()[VALIDITY_SLOT].as_ref(), self.nullability()) } fn offset_at(&self, index: usize) -> usize { assert!( - index < self.as_ref().len(), + index < self.len(), "Index {index} out of bounds 0..{}", - self.as_ref().len() + self.len() ); self.offsets() .as_opt::() @@ -398,10 +395,10 @@ pub trait ListViewArrayExt: TypedArrayRef { fn size_at(&self, index: usize) -> usize { assert!( - index < self.as_ref().len(), + index < self.len(), "Index {} out of bounds 0..{}", index, - self.as_ref().len() + self.len() ); self.sizes() .as_opt::() diff --git a/vortex-array/src/arrays/listview/vtable/mod.rs b/vortex-array/src/arrays/listview/vtable/mod.rs index ddfa4aa0e6b..88d6fd26707 100644 --- a/vortex-array/src/arrays/listview/vtable/mod.rs +++ b/vortex-array/src/arrays/listview/vtable/mod.rs @@ -23,6 +23,7 @@ use crate::Precision; use crate::array::Array; use crate::array::ArrayId; use crate::array::ArrayView; +use crate::array::ParentRef; use crate::array::VTable; use crate::arrays::listview::ListViewArrayExt; use crate::arrays::listview::ListViewData; @@ -75,6 +76,7 @@ impl VTable for ListView { type OperationsVTable = Self; type ValidityVTable = Self; + fn id(&self) -> ArrayId { static ID: CachedId = CachedId::new("vortex.listview"); *ID @@ -214,7 +216,7 @@ impl VTable for ListView { fn reduce_parent( array: ArrayView<'_, Self>, - parent: &ArrayRef, + parent: &ParentRef<'_>, child_idx: usize, ) -> VortexResult> { PARENT_RULES.evaluate(array, parent, child_idx) diff --git a/vortex-array/src/arrays/masked/array.rs b/vortex-array/src/arrays/masked/array.rs index 5ba830cc0e5..af3c19eca78 100644 --- a/vortex-array/src/arrays/masked/array.rs +++ b/vortex-array/src/arrays/masked/array.rs @@ -40,8 +40,8 @@ impl Display for MaskedData { pub trait MaskedArrayExt: TypedArrayRef + MaskedArraySlotsExt { fn masked_validity(&self) -> Validity { child_to_validity( - self.as_ref().slots()[MaskedSlots::VALIDITY].as_ref(), - self.as_ref().dtype().nullability(), + self.slots()[MaskedSlots::VALIDITY].as_ref(), + self.dtype().nullability(), ) } } diff --git a/vortex-array/src/arrays/masked/vtable/mod.rs b/vortex-array/src/arrays/masked/vtable/mod.rs index 257448c6eec..c95bc4b61d3 100644 --- a/vortex-array/src/arrays/masked/vtable/mod.rs +++ b/vortex-array/src/arrays/masked/vtable/mod.rs @@ -27,6 +27,7 @@ use crate::VortexSessionExecute; use crate::array::Array; use crate::array::ArrayId; use crate::array::ArrayView; +use crate::array::ParentRef; use crate::array::VTable; use crate::array::validity_to_child; use crate::arrays::ConstantArray; @@ -186,7 +187,7 @@ impl VTable for Masked { fn reduce_parent( array: ArrayView<'_, Self>, - parent: &ArrayRef, + parent: &ParentRef<'_>, child_idx: usize, ) -> VortexResult> { PARENT_RULES.evaluate(array, parent, child_idx) diff --git a/vortex-array/src/arrays/null/mod.rs b/vortex-array/src/arrays/null/mod.rs index dd31ef16853..dfd1a220f38 100644 --- a/vortex-array/src/arrays/null/mod.rs +++ b/vortex-array/src/arrays/null/mod.rs @@ -16,6 +16,7 @@ use crate::array::ArrayParts; use crate::array::ArrayView; use crate::array::EmptyArrayData; use crate::array::OperationsVTable; +use crate::array::ParentRef; use crate::array::VTable; use crate::array::ValidityVTable; use crate::arrays::null::compute::rules::PARENT_RULES; @@ -100,7 +101,7 @@ impl VTable for Null { fn reduce_parent( array: ArrayView<'_, Self>, - parent: &ArrayRef, + parent: &ParentRef<'_>, child_idx: usize, ) -> VortexResult> { PARENT_RULES.evaluate(array, parent, child_idx) diff --git a/vortex-array/src/arrays/patched/array.rs b/vortex-array/src/arrays/patched/array.rs index b1e5367607b..127869bbf39 100644 --- a/vortex-array/src/arrays/patched/array.rs +++ b/vortex-array/src/arrays/patched/array.rs @@ -111,7 +111,7 @@ pub trait PatchedArrayExt: PatchedArraySlotsExt { #[inline] fn lane_range(&self, chunk: usize, lane: usize) -> VortexResult> { - assert!(chunk * 1024 <= self.as_ref().len() + self.offset()); + assert!(chunk * 1024 <= self.len() + self.offset()); assert!(lane < self.n_lanes()); let start = self.lane_offsets().execute_scalar( @@ -149,12 +149,12 @@ pub trait PatchedArrayExt: PatchedArraySlotsExt { let begin = (chunks.start * 1024).saturating_sub(self.offset()); let end = (chunks.end * 1024) .saturating_sub(self.offset()) - .min(self.as_ref().len()); + .min(self.len()); let offset = if chunks.start == 0 { self.offset() } else { 0 }; let inner = self.inner().slice(begin..end)?; let len = inner.len(); - let dtype = self.as_ref().dtype().clone(); + let dtype = self.dtype().clone(); let slots = PatchedSlots { inner, lane_offsets: sliced_lane_offsets, diff --git a/vortex-array/src/arrays/patched/vtable/mod.rs b/vortex-array/src/arrays/patched/vtable/mod.rs index f35a13600c5..c8ae1076fa5 100644 --- a/vortex-array/src/arrays/patched/vtable/mod.rs +++ b/vortex-array/src/arrays/patched/vtable/mod.rs @@ -29,6 +29,7 @@ use crate::array::Array; use crate::array::ArrayId; use crate::array::ArrayParts; use crate::array::ArrayView; +use crate::array::ParentRef; use crate::array::VTable; use crate::array::ValidityChild; use crate::array::ValidityVTableFromChild; @@ -126,14 +127,14 @@ impl VTable for Patched { vortex_panic!("invalid buffer index for PatchedArray: {idx}"); } - fn child(array: ArrayView<'_, Self>, idx: usize) -> ArrayRef { - match idx { - PatchedSlots::INNER => array.inner().clone(), - PatchedSlots::LANE_OFFSETS => array.lane_offsets().clone(), - PatchedSlots::PATCH_INDICES => array.patch_indices().clone(), - PatchedSlots::PATCH_VALUES => array.patch_values().clone(), - _ => vortex_panic!("invalid child index for PatchedArray: {idx}"), + fn child(array: ArrayView<'_, Self>, idx: usize) -> &ArrayRef { + if idx > PatchedSlots::PATCH_VALUES { + vortex_panic!("invalid child index for PatchedArray: {idx}") } + + array.slots()[idx] + .as_ref() + .vortex_expect("child slot is None") } fn serialize( @@ -312,7 +313,7 @@ impl VTable for Patched { fn reduce_parent( array: ArrayView<'_, Self>, - parent: &ArrayRef, + parent: &ParentRef<'_>, child_idx: usize, ) -> VortexResult> { PARENT_RULES.evaluate(array, parent, child_idx) diff --git a/vortex-array/src/arrays/primitive/array/mod.rs b/vortex-array/src/arrays/primitive/array/mod.rs index 7e3dd9b4ce7..06aea0c48fa 100644 --- a/vortex-array/src/arrays/primitive/array/mod.rs +++ b/vortex-array/src/arrays/primitive/array/mod.rs @@ -106,28 +106,25 @@ pub struct PrimitiveDataParts { pub trait PrimitiveArrayExt: TypedArrayRef { fn ptype(&self) -> PType { - match self.as_ref().dtype() { + match self.dtype() { DType::Primitive(ptype, _) => *ptype, _ => unreachable!("PrimitiveArrayExt requires a primitive dtype"), } } fn nullability(&self) -> Nullability { - match self.as_ref().dtype() { + match self.dtype() { DType::Primitive(_, nullability) => *nullability, _ => unreachable!("PrimitiveArrayExt requires a primitive dtype"), } } fn validity_child(&self) -> Option<&ArrayRef> { - self.as_ref().slots()[VALIDITY_SLOT].as_ref() + self.slots()[VALIDITY_SLOT].as_ref() } fn validity(&self) -> Validity { - child_to_validity( - self.as_ref().slots()[VALIDITY_SLOT].as_ref(), - self.nullability(), - ) + child_to_validity(self.slots()[VALIDITY_SLOT].as_ref(), self.nullability()) } fn buffer_handle(&self) -> &BufferHandle { @@ -145,7 +142,11 @@ pub trait PrimitiveArrayExt: TypedArrayRef { "can't reinterpret cast between integers of two different widths" ); - PrimitiveArray::from_buffer_handle(self.buffer_handle().clone(), ptype, self.validity()) + PrimitiveArray::from_buffer_handle( + self.buffer_handle().clone(), + ptype, + PrimitiveArrayExt::validity(self), + ) } /// Narrow the array to the smallest possible integer type that can represent all values. @@ -157,7 +158,7 @@ pub trait PrimitiveArrayExt: TypedArrayRef { let Some(min_max) = min_max(self.as_ref(), ctx)? else { return Ok(PrimitiveArray::new( Buffer::::zeroed(self.len()), - self.validity(), + PrimitiveArrayExt::validity(self), )); }; @@ -178,7 +179,7 @@ pub trait PrimitiveArrayExt: TypedArrayRef { return Ok(self.to_owned()); }; - let nullability = self.as_ref().dtype().nullability(); + let nullability = self.dtype().nullability(); if min < 0 || max < 0 { // Signed diff --git a/vortex-array/src/arrays/primitive/array/top_value.rs b/vortex-array/src/arrays/primitive/array/top_value.rs index d3ee5eb5a65..7932b28572f 100644 --- a/vortex-array/src/arrays/primitive/array/top_value.rs +++ b/vortex-array/src/arrays/primitive/array/top_value.rs @@ -33,10 +33,8 @@ impl PrimitiveArray { match_each_native_ptype!(self.ptype(), |P| { let (top, count) = typed_top_value( self.as_slice::

(), - self.as_ref().validity()?.execute_mask( - self.as_ref().len(), - &mut LEGACY_SESSION.create_execution_ctx(), - )?, + self.validity()? + .execute_mask(self.len(), &mut LEGACY_SESSION.create_execution_ctx())?, ); Ok(Some((top.into(), count))) }) diff --git a/vortex-array/src/arrays/primitive/compute/slice.rs b/vortex-array/src/arrays/primitive/compute/slice.rs index 1830ef3f8c6..32218f9a1f2 100644 --- a/vortex-array/src/arrays/primitive/compute/slice.rs +++ b/vortex-array/src/arrays/primitive/compute/slice.rs @@ -17,11 +17,13 @@ use crate::match_each_native_ptype; impl SliceReduce for Primitive { fn slice(array: ArrayView<'_, Self>, range: Range) -> VortexResult> { let result = match_each_native_ptype!(array.ptype(), |T| { - PrimitiveArray::from_buffer_handle( - array.buffer_handle().slice_typed::(range.clone()), - T::PTYPE, - array.validity()?.slice(range)?, - ) + unsafe { + PrimitiveArray::new_unchecked_from_handle( + array.buffer_handle().slice_typed::(range.clone()), + T::PTYPE, + array.validity()?.slice(range)?, + ) + } .into_array() }); Ok(Some(result)) diff --git a/vortex-array/src/arrays/primitive/vtable/mod.rs b/vortex-array/src/arrays/primitive/vtable/mod.rs index 5130665cd30..aaaab6378c5 100644 --- a/vortex-array/src/arrays/primitive/vtable/mod.rs +++ b/vortex-array/src/arrays/primitive/vtable/mod.rs @@ -12,6 +12,7 @@ use crate::ExecutionCtx; use crate::ExecutionResult; use crate::array::Array; use crate::array::ArrayView; +use crate::array::ParentRef; use crate::array::VTable; use crate::arrays::primitive::PrimitiveData; use crate::buffer::BufferHandle; @@ -186,7 +187,7 @@ impl VTable for Primitive { fn reduce_parent( array: ArrayView<'_, Self>, - parent: &ArrayRef, + parent: &ParentRef<'_>, child_idx: usize, ) -> VortexResult> { RULES.evaluate(array, parent, child_idx) diff --git a/vortex-array/src/arrays/scalar_fn/array.rs b/vortex-array/src/arrays/scalar_fn/array.rs index 2c77be3c329..627f1515811 100644 --- a/vortex-array/src/arrays/scalar_fn/array.rs +++ b/vortex-array/src/arrays/scalar_fn/array.rs @@ -31,16 +31,8 @@ impl Display for ScalarFnData { impl ScalarFnData { /// Create a new ScalarFnArray from a scalar function and its children. - pub fn build( - scalar_fn: ScalarFnRef, - children: Vec, - len: usize, - ) -> VortexResult { - vortex_ensure!( - children.iter().all(|c| c.len() == len), - "ScalarFnArray must have children equal to the array length" - ); - Ok(Self { scalar_fn }) + fn build(scalar_fn: ScalarFnRef) -> Self { + Self { scalar_fn } } /// Get the scalar function bound to this array. @@ -56,13 +48,13 @@ pub trait ScalarFnArrayExt: TypedArrayRef { } fn child_at(&self, idx: usize) -> &ArrayRef { - self.as_ref().slots()[idx] + self.slots()[idx] .as_ref() .vortex_expect("ScalarFnArray child slot") } fn child_count(&self) -> usize { - self.as_ref().slots().len() + self.slots().len() } fn nchildren(&self) -> usize { @@ -90,15 +82,30 @@ impl Array { children: Vec, len: usize, ) -> VortexResult { + Ok(unsafe { Array::from_parts_unchecked(Self::try_new_parts(scalar_fn, children, len)?) }) + } + + /// Build the [`ArrayParts`] for a ScalarFnArray without materializing it. + /// + /// Mirrors [`try_new`](Self::try_new) but stops short of allocating the backing + /// `ArrayRef`, so callers can drive the parts through [`ArrayParts::optimize`] and + /// only pay the wrapper allocation when no reduction fires. + #[inline] + pub fn try_new_parts( + scalar_fn: ScalarFnRef, + children: Vec, + len: usize, + ) -> VortexResult> { + vortex_ensure!( + children.iter().all(|c| c.len() == len), + "ScalarFnArray must have children equal to the array length" + ); let arg_dtypes: Vec<_> = children.iter().map(|c| c.dtype().clone()).collect(); let dtype = scalar_fn.return_dtype(&arg_dtypes)?; - let data = ScalarFnData::build(scalar_fn.clone(), children.clone(), len)?; - let vtable = ScalarFn { id: scalar_fn.id() }; - Ok(unsafe { - Array::from_parts_unchecked( - ArrayParts::new(vtable, dtype, len, data) - .with_slots(children.into_iter().map(Some).collect::()), - ) - }) + let id = scalar_fn.id(); + let data = ScalarFnData::build(scalar_fn); + let vtable = ScalarFn { id }; + Ok(ArrayParts::new(vtable, dtype, len, data) + .with_slots(children.into_iter().map(Some).collect::())) } } diff --git a/vortex-array/src/arrays/scalar_fn/rules.rs b/vortex-array/src/arrays/scalar_fn/rules.rs index 1e9563cf9de..c4b9fec64a8 100644 --- a/vortex-array/src/arrays/scalar_fn/rules.rs +++ b/vortex-array/src/arrays/scalar_fn/rules.rs @@ -1,12 +1,12 @@ // SPDX-License-Identifier: Apache-2.0 // SPDX-FileCopyrightText: Copyright the Vortex contributors -use std::any::Any; use std::sync::Arc; use itertools::Itertools; use vortex_error::VortexExpect; use vortex_error::VortexResult; +use vortex_error::vortex_panic; use crate::ArrayRef; use crate::IntoArray; @@ -20,6 +20,7 @@ use crate::arrays::Slice; use crate::arrays::StructArray; use crate::arrays::scalar_fn::ScalarFnArrayExt; use crate::dtype::DType; +use crate::expr::ExpressionReduceNode; use crate::optimizer::rules::ArrayParentReduceRule; use crate::optimizer::rules::ArrayReduceRule; use crate::optimizer::rules::ParentRuleSet; @@ -44,7 +45,7 @@ pub(super) const PARENT_RULES: ParentRuleSet = ParentRuleSet::new(&[ struct ScalarFnPackToStructRule; impl ArrayReduceRule for ScalarFnPackToStructRule { fn reduce(&self, array: ArrayView<'_, ScalarFn>) -> VortexResult> { - let Some(pack_options) = array.scalar_fn().as_opt::() else { + let Some(pack_options) = ScalarFnArrayExt::scalar_fn(&array).as_opt::() else { return Ok(None); }; @@ -84,7 +85,12 @@ impl ArrayParentReduceRule for ScalarFnSliceReduceRule { .collect::>()?; Ok(Some( - ScalarFnArray::try_new(array.scalar_fn().clone(), children, range.len())?.into_array(), + ScalarFnArray::try_new( + ScalarFnArrayExt::scalar_fn(&array).clone(), + children, + range.len(), + )? + .into_array(), )) } } @@ -93,25 +99,48 @@ impl ArrayParentReduceRule for ScalarFnSliceReduceRule { struct ScalarFnAbstractReduceRule; impl ArrayReduceRule for ScalarFnAbstractReduceRule { fn reduce(&self, array: ArrayView<'_, ScalarFn>) -> VortexResult> { - if let Some(reduced) = array - .scalar_fn() - .reduce(array.as_ref(), &ArrayReduceCtx { len: array.len() })? + if let Some(reduced) = ScalarFnArrayExt::scalar_fn(&array) + .reduce(&array, &ArrayReduceCtx { len: array.len() })? { - return Ok(Some( - reduced - .as_any() - .downcast_ref::() - .vortex_expect("ReduceNode is not an ArrayRef") - .clone(), - )); + return Ok(Some(reduced.as_array())); } Ok(None) } } +impl ReduceNode for ArrayView<'_, ScalarFn> { + fn as_array(&self) -> ArrayRef { + self.array().clone() + } + + fn as_expression(&self) -> ExpressionReduceNode { + vortex_panic!("Cannot convert ArrayView to ExpressionReduceNode") + } + + fn node_dtype(&self) -> VortexResult { + Ok(self.dtype().clone()) + } + + fn scalar_fn(&self) -> Option<&ScalarFnRef> { + Some(ScalarFnArrayExt::scalar_fn(self)) + } + + fn child(&self, idx: usize) -> ReduceNodeRef { + Arc::new(self.child_at(idx).clone()) + } + + fn child_count(&self) -> usize { + ScalarFnArrayExt::nchildren(self) + } +} + impl ReduceNode for ArrayRef { - fn as_any(&self) -> &dyn Any { - self + fn as_array(&self) -> ArrayRef { + self.clone() + } + + fn as_expression(&self) -> ExpressionReduceNode { + vortex_panic!("Cannot convert ArrayRef to ExpressionReduceNode") } fn node_dtype(&self) -> VortexResult { @@ -123,7 +152,11 @@ impl ReduceNode for ArrayRef { } fn child(&self, idx: usize) -> ReduceNodeRef { - Arc::new(self.nth_child(idx).vortex_expect("child idx out of bounds")) + Arc::new( + self.nth_child(idx) + .vortex_expect("child idx out of bounds") + .clone(), + ) } fn child_count(&self) -> usize { @@ -144,15 +177,7 @@ impl ReduceCtx for ArrayReduceCtx { Ok(Arc::new( ScalarFnArray::try_new( scalar_fn, - children - .iter() - .map(|c| { - c.as_any() - .downcast_ref::() - .vortex_expect("ReduceNode is not an ArrayRef") - .clone() - }) - .collect(), + children.iter().map(|c| c.as_array()).collect(), self.len, )? .into_array(), @@ -190,9 +215,12 @@ impl ArrayParentReduceRule for ScalarFnUnaryFilterPushDownRule { }) .try_collect()?; - let new_array = - ScalarFnArray::try_new(child.scalar_fn().clone(), new_children, parent.len())? - .into_array(); + let new_array = ScalarFnArray::try_new( + ScalarFnArrayExt::scalar_fn(&child).clone(), + new_children, + parent.len(), + )? + .into_array(); return Ok(Some(new_array)); } diff --git a/vortex-array/src/arrays/scalar_fn/vtable/mod.rs b/vortex-array/src/arrays/scalar_fn/vtable/mod.rs index 5906b3372f4..a86a933b08e 100644 --- a/vortex-array/src/arrays/scalar_fn/vtable/mod.rs +++ b/vortex-array/src/arrays/scalar_fn/vtable/mod.rs @@ -10,6 +10,7 @@ use std::marker::PhantomData; use std::ops::Deref; use itertools::Itertools; +use vortex_error::VortexExpect; use vortex_error::VortexResult; use vortex_error::vortex_bail; use vortex_error::vortex_ensure; @@ -20,13 +21,12 @@ use vortex_session::registry::CachedId; use crate::ArrayEq; use crate::ArrayHash; use crate::ArrayRef; -use crate::ArraySlots; -use crate::IntoArray; use crate::Precision; use crate::array::Array; use crate::array::ArrayId; use crate::array::ArrayParts; use crate::array::ArrayView; +use crate::array::ParentRef; use crate::array::VTable; use crate::arrays::scalar_fn::array::ScalarFnArrayExt; use crate::arrays::scalar_fn::array::ScalarFnData; @@ -160,7 +160,7 @@ impl VTable for ScalarFn { fn reduce_parent( array: ArrayView<'_, Self>, - parent: &ArrayRef, + parent: &ParentRef<'_>, child_idx: usize, ) -> VortexResult> { PARENT_RULES.evaluate(array, parent, child_idx) @@ -169,34 +169,34 @@ impl VTable for ScalarFn { /// Array factory functions for scalar functions. pub trait ScalarFnFactoryExt: scalar_fn::ScalarFnVTable { - fn try_new_array( + /// Build the [`ArrayParts`] for this scalar function applied to `children`. + /// + /// Stops short of allocating the backing `ArrayRef`, so callers can drive the parts + /// through [`ArrayParts::optimize`] and only pay the wrapper allocation when no + /// reduction fires. + #[inline] + fn try_new_array_parts( &self, len: usize, options: Self::Options, children: impl Into>, - ) -> VortexResult { + ) -> VortexResult> { let scalar_fn = scalar_fn::TypedScalarFnInstance::new(self.clone(), options).erased(); + Array::::try_new_parts(scalar_fn, children.into(), len) + } - let children = children.into(); - vortex_ensure!( - children.iter().all(|c| c.len() == len), - "All child arrays must have the same length as the scalar function array" - ); - - let child_dtypes = children.iter().map(|c| c.dtype().clone()).collect_vec(); - let dtype = scalar_fn.return_dtype(&child_dtypes)?; - - let data = ScalarFnData { - scalar_fn: scalar_fn.clone(), - }; - let vtable = ScalarFn { id: scalar_fn.id() }; - Ok(unsafe { - Array::from_parts_unchecked( - ArrayParts::new(vtable, dtype, len, data) - .with_slots(children.into_iter().map(Some).collect::()), - ) - } - .into_array()) + /// Build a materialized scalar-function array for this scalar function applied to + /// `children`. Equivalent to [`try_new_array_parts`](Self::try_new_array_parts) followed + /// by [`ArrayParts::into_array`]. + fn try_new_array( + &self, + len: usize, + options: Self::Options, + children: impl Into>, + ) -> VortexResult { + Ok(self + .try_new_array_parts(len, options, children)? + .into_array()) } } impl ScalarFnFactoryExt for V {} @@ -207,12 +207,12 @@ pub struct AnyScalarFn; impl Matcher for AnyScalarFn { type Match<'a> = ArrayView<'a, ScalarFn>; - fn matches(array: &ArrayRef) -> bool { - array.is::() + fn try_match<'a>(parent: &'a ParentRef<'_>) -> Option> { + parent.as_opt::() } - fn try_match(array: &ArrayRef) -> Option> { - array.as_opt::() + fn try_match_ref(array: &ArrayRef) -> Option> { + array.as_typed::() } } @@ -220,40 +220,119 @@ impl Matcher for AnyScalarFn { #[derive(Debug, Default)] pub struct ExactScalarFn(PhantomData); +impl ExactScalarFn { + /// Promote an [`ArrayView<'_, ScalarFn>`] to a [`ScalarFnArrayView<'_, F>`] if the + /// inner scalar function is `F`. + /// + /// Shared by [`Matcher::try_match`] and [`Matcher::try_match_ref`]: both paths + /// produce an `ArrayView<'_, ScalarFn>` first and then go through this helper. + #[inline] + fn from_view(view: ArrayView<'_, ScalarFn>) -> Option> { + let scalar_fn = view.data().scalar_fn().downcast_ref::()?; + Some(ScalarFnArrayView { + view, + vtable: scalar_fn.vtable(), + options: scalar_fn.options(), + }) + } +} + impl Matcher for ExactScalarFn { type Match<'a> = ScalarFnArrayView<'a, F>; - fn matches(array: &ArrayRef) -> bool { - if let Some(scalar_fn_array) = array.as_opt::() { - scalar_fn_array.data().scalar_fn().is::() - } else { - false - } + /// Skip the `ArrayView` + `ScalarFnArrayView` construction that the default + /// `try_match(...).is_some()` would do. Two cheap downcasts suffice: encoding + /// id, then scalar function id. + fn matches(parent: &ParentRef<'_>) -> bool { + parent + .typed_data::() + .is_some_and(|data| data.scalar_fn().is::()) } - fn try_match(array: &ArrayRef) -> Option> { - let scalar_fn_array = array.as_opt::()?; - let scalar_fn_data = scalar_fn_array.data(); - let scalar_fn = scalar_fn_data.scalar_fn().downcast_ref::()?; - Some(ScalarFnArrayView { - array, - vtable: scalar_fn.vtable(), - options: scalar_fn.options(), - }) + fn try_match<'a>(parent: &'a ParentRef<'_>) -> Option> { + Self::from_view(parent.as_opt::()?) + } + + /// Heap-side mirror of [`Self::matches`], same reasoning. + fn matches_ref(array: &ArrayRef) -> bool { + array + .as_typed::() + .is_some_and(|view| view.data().scalar_fn().is::()) + } + + fn try_match_ref(array: &ArrayRef) -> Option> { + Self::from_view(array.as_typed::()?) } } +/// A typed view over a [`ScalarFn`] array exposing the concrete `F`-typed `vtable` +/// and `options`. +/// +/// Wraps an [`ArrayView<'_, ScalarFn>`] so all metadata accessors (`dtype`, `len`, +/// `slots`, `encoding_id`) and the cold `array()` materialization go through the +/// same flat-field / [`ArrayBacking`](crate::array::ArrayBacking) path as +/// [`ArrayView`] — stack-backed parents stay on the stack until a consumer reaches +/// for the underlying [`ArrayRef`]. pub struct ScalarFnArrayView<'a, F: scalar_fn::ScalarFnVTable> { - array: &'a ArrayRef, + view: ArrayView<'a, ScalarFn>, pub vtable: &'a F, pub options: &'a F::Options, } -impl Deref for ScalarFnArrayView<'_, F> { - type Target = ArrayRef; +impl<'a, F: scalar_fn::ScalarFnVTable> ScalarFnArrayView<'a, F> { + /// Returns the underlying [`ScalarFn`]-typed array view. + #[inline] + pub fn view(&self) -> ArrayView<'a, ScalarFn> { + self.view + } + + /// Returns the child array at the given slot. + /// + /// Reads from `slots()` directly without forcing stack-backed parents to + /// materialize. + pub fn child_at(&self, idx: usize) -> &'a ArrayRef { + self.view.slots()[idx] + .as_ref() + .vortex_expect("ScalarFnArray child slot") + } + + /// Alias for [`Self::child_at`]. + #[inline] + pub fn get_child(&self, idx: usize) -> &'a ArrayRef { + self.child_at(idx) + } + + /// Returns the number of child slots. + #[inline] + pub fn child_count(&self) -> usize { + self.view.slots().len() + } + + /// Iterates over the array's children. + pub fn iter_children(&self) -> impl Iterator + '_ { + (0..self.child_count()).map(|idx| self.child_at(idx)) + } + + /// Collects the children into a `Vec` of cloned `ArrayRef`s. + pub fn children(&self) -> Vec { + self.iter_children().cloned().collect() + } +} + +impl Copy for ScalarFnArrayView<'_, F> {} + +impl Clone for ScalarFnArrayView<'_, F> { + fn clone(&self) -> Self { + *self + } +} + +impl<'a, F: scalar_fn::ScalarFnVTable> Deref for ScalarFnArrayView<'a, F> { + type Target = ArrayView<'a, ScalarFn>; - fn deref(&self) -> &Self::Target { - self.array + #[inline] + fn deref(&self) -> &ArrayView<'a, ScalarFn> { + &self.view } } diff --git a/vortex-array/src/arrays/shared/array.rs b/vortex-array/src/arrays/shared/array.rs index 9619018ff89..93b896746ab 100644 --- a/vortex-array/src/arrays/shared/array.rs +++ b/vortex-array/src/arrays/shared/array.rs @@ -45,7 +45,7 @@ impl Display for SharedData { #[expect(async_fn_in_trait)] pub trait SharedArrayExt: TypedArrayRef { fn source(&self) -> &ArrayRef { - self.as_ref().slots()[SOURCE_SLOT] + self.slots()[SOURCE_SLOT] .as_ref() .vortex_expect("validated shared source slot") } diff --git a/vortex-array/src/arrays/shared/vtable.rs b/vortex-array/src/arrays/shared/vtable.rs index f13a262479e..d7719ef5cbd 100644 --- a/vortex-array/src/arrays/shared/vtable.rs +++ b/vortex-array/src/arrays/shared/vtable.rs @@ -52,6 +52,7 @@ impl VTable for Shared { type TypedArrayData = SharedData; type OperationsVTable = Self; type ValidityVTable = Self; + fn id(&self) -> ArrayId { static ID: CachedId = CachedId::new("vortex.shared"); *ID diff --git a/vortex-array/src/arrays/slice/array.rs b/vortex-array/src/arrays/slice/array.rs index 9d7e0bb13ec..70f608b202a 100644 --- a/vortex-array/src/arrays/slice/array.rs +++ b/vortex-array/src/arrays/slice/array.rs @@ -38,7 +38,7 @@ pub struct SliceDataParts { pub trait SliceArrayExt: TypedArrayRef { fn child(&self) -> &ArrayRef { - self.as_ref().slots()[CHILD_SLOT] + self.slots()[CHILD_SLOT] .as_ref() .vortex_expect("validated slice child slot") } @@ -84,14 +84,7 @@ impl SliceData { impl Array { /// Constructs a new `SliceArray`. pub fn try_new(child: ArrayRef, range: Range) -> VortexResult { - let len = range.len(); - let dtype = child.dtype().clone(); - let data = SliceData::try_new(child.len(), range)?; - Ok(unsafe { - Array::from_parts_unchecked( - ArrayParts::new(Slice, dtype, len, data).with_slots(smallvec![Some(child)]), - ) - }) + Ok(unsafe { Array::from_parts_unchecked(Self::try_new_parts(child, range)?) }) } /// Constructs a new `SliceArray`. @@ -105,4 +98,14 @@ impl Array { ) } } + + /// Builds the [`ArrayParts`] for a slice. The parts can then be + /// optimized through [`ParentRef::optimize`](crate::array::ParentRef::optimize) + /// or materialized directly with [`ArrayParts::into_array`]. + pub fn try_new_parts(child: ArrayRef, range: Range) -> VortexResult> { + let len = range.len(); + let dtype = child.dtype().clone(); + let data = SliceData::try_new(child.len(), range)?; + Ok(ArrayParts::new(Slice, dtype, len, data).with_slots(smallvec![Some(child)])) + } } diff --git a/vortex-array/src/arrays/slice/vtable.rs b/vortex-array/src/arrays/slice/vtable.rs index ac0ecc18039..71860ca22d7 100644 --- a/vortex-array/src/arrays/slice/vtable.rs +++ b/vortex-array/src/arrays/slice/vtable.rs @@ -24,6 +24,7 @@ use crate::array::Array; use crate::array::ArrayId; use crate::array::ArrayView; use crate::array::OperationsVTable; +use crate::array::ParentRef; use crate::array::VTable; use crate::array::ValidityVTable; use crate::arrays::slice::SliceArrayExt; @@ -63,6 +64,7 @@ impl VTable for Slice { type TypedArrayData = SliceData; type OperationsVTable = Self; type ValidityVTable = Self; + fn id(&self) -> ArrayId { static ID: CachedId = CachedId::new("vortex.slice"); *ID @@ -153,7 +155,7 @@ impl VTable for Slice { fn reduce_parent( array: ArrayView<'_, Self>, - parent: &ArrayRef, + parent: &ParentRef<'_>, child_idx: usize, ) -> VortexResult> { PARENT_RULES.evaluate(array, parent, child_idx) diff --git a/vortex-array/src/arrays/struct_/array.rs b/vortex-array/src/arrays/struct_/array.rs index 7feffb5484c..4167cb4224b 100644 --- a/vortex-array/src/arrays/struct_/array.rs +++ b/vortex-array/src/arrays/struct_/array.rs @@ -174,25 +174,22 @@ pub(super) fn make_struct_slots( pub trait StructArrayExt: TypedArrayRef { fn nullability(&self) -> crate::dtype::Nullability { - match self.as_ref().dtype() { + match self.dtype() { DType::Struct(_, nullability) => *nullability, _ => unreachable!("StructArrayExt requires a struct dtype"), } } fn names(&self) -> &FieldNames { - self.as_ref().dtype().as_struct_fields().names() + self.dtype().as_struct_fields().names() } fn struct_validity(&self) -> Validity { - child_to_validity( - self.as_ref().slots()[VALIDITY_SLOT].as_ref(), - self.nullability(), - ) + child_to_validity(self.slots()[VALIDITY_SLOT].as_ref(), self.nullability()) } fn iter_unmasked_fields(&self) -> impl Iterator + '_ { - self.as_ref().slots()[FIELDS_OFFSET..] + self.slots()[FIELDS_OFFSET..] .iter() .map(|s| s.as_ref().vortex_expect("StructArray field slot")) } @@ -202,7 +199,7 @@ pub trait StructArrayExt: TypedArrayRef { } fn unmasked_field(&self, idx: usize) -> &ArrayRef { - self.as_ref().slots()[FIELDS_OFFSET + idx] + self.slots()[FIELDS_OFFSET + idx] .as_ref() .vortex_expect("StructArray field slot") } @@ -225,7 +222,7 @@ pub trait StructArrayExt: TypedArrayRef { } fn struct_fields(&self) -> &StructFields { - self.as_ref().dtype().as_struct_fields() + self.dtype().as_struct_fields() } } impl> StructArrayExt for T {} diff --git a/vortex-array/src/arrays/struct_/compute/cast.rs b/vortex-array/src/arrays/struct_/compute/cast.rs index f954dbe2fea..f1e51e3ebb2 100644 --- a/vortex-array/src/arrays/struct_/compute/cast.rs +++ b/vortex-array/src/arrays/struct_/compute/cast.rs @@ -9,6 +9,7 @@ use crate::ArrayRef; use crate::ArrayView; use crate::ExecutionCtx; use crate::IntoArray; +use crate::ParentRef; use crate::arrays::ConstantArray; use crate::arrays::Struct; use crate::arrays::StructArray; @@ -17,20 +18,19 @@ use crate::arrays::struct_::StructArrayExt; use crate::builtins::ArrayBuiltins; use crate::dtype::DType; use crate::dtype::StructFields; -use crate::matcher::Matcher; use crate::scalar::Scalar; use crate::scalar_fn::fns::cast::Cast; pub(crate) fn struct_cast_execute_parent( child: &ArrayRef, - parent: &ArrayRef, + parent: &ParentRef, _child_idx: usize, ctx: &mut ExecutionCtx, ) -> VortexResult> { let Some(array) = child.as_opt::() else { return Ok(None); }; - let Some(parent) = ExactScalarFn::::try_match(parent) else { + let Some(parent) = parent.as_opt::>() else { return Ok(None); }; @@ -124,6 +124,7 @@ mod tests { use crate::ArrayRef; use crate::ExecutionCtx; use crate::IntoArray; + use crate::ParentRef; use crate::VortexSessionExecute; use crate::arrays::ConstantArray; use crate::arrays::PrimitiveArray; @@ -153,7 +154,7 @@ mod tests { fn null_struct_cast_execute_parent( child: &ArrayRef, - parent: &ArrayRef, + parent: &ParentRef<'_>, _child_idx: usize, _ctx: &mut ExecutionCtx, ) -> VortexResult> { diff --git a/vortex-array/src/arrays/struct_/compute/rules.rs b/vortex-array/src/arrays/struct_/compute/rules.rs index ae3a67b50e3..4df7d9a574a 100644 --- a/vortex-array/src/arrays/struct_/compute/rules.rs +++ b/vortex-array/src/arrays/struct_/compute/rules.rs @@ -6,6 +6,7 @@ use vortex_error::vortex_err; use crate::ArrayRef; use crate::IntoArray; +use crate::ParentRef; use crate::array::ArrayView; use crate::arrays::ConstantArray; use crate::arrays::Struct; @@ -17,7 +18,6 @@ use crate::arrays::slice::SliceReduceAdaptor; use crate::arrays::struct_::StructArrayExt; use crate::arrays::struct_::compute::cast::struct_cast_fields; use crate::builtins::ArrayBuiltins; -use crate::matcher::Matcher; use crate::optimizer::rules::ArrayParentReduceRule; use crate::optimizer::rules::ParentRuleSet; use crate::scalar::Scalar; @@ -35,13 +35,13 @@ pub(crate) const PARENT_RULES: ParentRuleSet = ParentRuleSet::new(&[ pub(crate) fn struct_cast_reduce_parent( child: &ArrayRef, - parent: &ArrayRef, + parent: &ParentRef<'_>, _child_idx: usize, ) -> VortexResult> { let Some(array) = child.as_opt::() else { return Ok(None); }; - let Some(parent) = ExactScalarFn::::try_match(parent) else { + let Some(parent) = parent.as_opt::>() else { return Ok(None); }; @@ -131,6 +131,7 @@ mod tests { use crate::ArrayRef; use crate::IntoArray; + use crate::ParentRef; use crate::array::ArrayPlugin; use crate::arrays::ScalarFn; use crate::arrays::Struct; @@ -153,12 +154,13 @@ mod tests { use crate::scalar_fn::ScalarFnVTable; use crate::scalar_fn::fns::cast::Cast; use crate::validity::Validity; + static SESSION: LazyLock = LazyLock::new(|| VortexSession::empty().with::()); fn no_struct_cast_plugin( _child: &ArrayRef, - _parent: &ArrayRef, + _parent: &ParentRef<'_>, _child_idx: usize, ) -> VortexResult> { Ok(None) diff --git a/vortex-array/src/arrays/struct_/vtable/mod.rs b/vortex-array/src/arrays/struct_/vtable/mod.rs index 71ec8a8e2df..5a59a1536a5 100644 --- a/vortex-array/src/arrays/struct_/vtable/mod.rs +++ b/vortex-array/src/arrays/struct_/vtable/mod.rs @@ -16,6 +16,7 @@ use crate::array::Array; use crate::array::ArrayParts; use crate::array::ArrayView; use crate::array::EmptyArrayData; +use crate::array::ParentRef; use crate::array::VTable; use crate::array::child_to_validity; use crate::arrays::struct_::array::FIELDS_OFFSET; @@ -42,6 +43,7 @@ impl VTable for Struct { type OperationsVTable = Self; type ValidityVTable = Self; + fn id(&self) -> ArrayId { static ID: CachedId = CachedId::new("vortex.struct"); *ID @@ -187,7 +189,7 @@ impl VTable for Struct { fn reduce_parent( array: ArrayView<'_, Self>, - parent: &ArrayRef, + parent: &ParentRef<'_>, child_idx: usize, ) -> VortexResult> { PARENT_RULES.evaluate(array, parent, child_idx) diff --git a/vortex-array/src/arrays/varbin/array.rs b/vortex-array/src/arrays/varbin/array.rs index deaef952d5e..2a43706aca0 100644 --- a/vortex-array/src/arrays/varbin/array.rs +++ b/vortex-array/src/arrays/varbin/array.rs @@ -292,17 +292,17 @@ impl VarBinData { pub trait VarBinArrayExt: TypedArrayRef { fn offsets(&self) -> &ArrayRef { - self.as_ref().slots()[OFFSETS_SLOT] + self.slots()[OFFSETS_SLOT] .as_ref() .vortex_expect("VarBinArray offsets slot") } fn validity_child(&self) -> Option<&ArrayRef> { - self.as_ref().slots()[VALIDITY_SLOT].as_ref() + self.slots()[VALIDITY_SLOT].as_ref() } fn dtype_parts(&self) -> (bool, Nullability) { - match self.as_ref().dtype() { + match self.dtype() { DType::Utf8(nullability) => (true, *nullability), DType::Binary(nullability) => (false, *nullability), _ => unreachable!("VarBinArrayExt requires a utf8 or binary dtype"), @@ -318,17 +318,14 @@ pub trait VarBinArrayExt: TypedArrayRef { } fn varbin_validity(&self) -> Validity { - child_to_validity( - self.as_ref().slots()[VALIDITY_SLOT].as_ref(), - self.nullability(), - ) + child_to_validity(self.slots()[VALIDITY_SLOT].as_ref(), self.nullability()) } fn offset_at(&self, index: usize) -> usize { assert!( - index <= self.as_ref().len(), + index <= self.len(), "Index {index} out of bounds 0..={}", - self.as_ref().len() + self.len() ); (&self @@ -347,7 +344,7 @@ pub trait VarBinArrayExt: TypedArrayRef { fn sliced_bytes(&self) -> ByteBuffer { let first_offset: usize = self.offset_at(0); - let last_offset = self.offset_at(self.as_ref().len()); + let last_offset = self.offset_at(self.len()); self.bytes().slice(first_offset..last_offset) } } diff --git a/vortex-array/src/arrays/varbin/vtable/mod.rs b/vortex-array/src/arrays/varbin/vtable/mod.rs index c1a0941e720..05eb7786277 100644 --- a/vortex-array/src/arrays/varbin/vtable/mod.rs +++ b/vortex-array/src/arrays/varbin/vtable/mod.rs @@ -18,6 +18,7 @@ use crate::IntoArray; use crate::array::Array; use crate::array::ArrayId; use crate::array::ArrayView; +use crate::array::ParentRef; use crate::array::VTable; use crate::arrays::varbin::VarBinArrayExt; use crate::arrays::varbin::VarBinData; @@ -70,6 +71,7 @@ impl VTable for VarBin { type OperationsVTable = Self; type ValidityVTable = Self; + fn id(&self) -> ArrayId { static ID: CachedId = CachedId::new("vortex.varbin"); *ID @@ -176,7 +178,7 @@ impl VTable for VarBin { fn reduce_parent( array: ArrayView<'_, Self>, - parent: &ArrayRef, + parent: &ParentRef<'_>, child_idx: usize, ) -> VortexResult> { PARENT_RULES.evaluate(array, parent, child_idx) diff --git a/vortex-array/src/arrays/varbinview/array.rs b/vortex-array/src/arrays/varbinview/array.rs index e302c9341d2..d7fe2c00440 100644 --- a/vortex-array/src/arrays/varbinview/array.rs +++ b/vortex-array/src/arrays/varbinview/array.rs @@ -534,7 +534,7 @@ impl VarBinViewData { pub trait VarBinViewArrayExt: TypedArrayRef { fn dtype_parts(&self) -> (bool, Nullability) { - match self.as_ref().dtype() { + match self.dtype() { DType::Utf8(nullability) => (true, *nullability), DType::Binary(nullability) => (false, *nullability), _ => unreachable!("VarBinViewArrayExt requires a utf8 or binary dtype"), @@ -542,10 +542,7 @@ pub trait VarBinViewArrayExt: TypedArrayRef { } fn varbinview_validity(&self) -> Validity { - child_to_validity( - self.as_ref().slots()[VALIDITY_SLOT].as_ref(), - self.dtype_parts().1, - ) + child_to_validity(self.slots()[VALIDITY_SLOT].as_ref(), self.dtype_parts().1) } } impl> VarBinViewArrayExt for T {} diff --git a/vortex-array/src/arrays/varbinview/compact.rs b/vortex-array/src/arrays/varbinview/compact.rs index 6effc7c656a..89bd96da19a 100644 --- a/vortex-array/src/arrays/varbinview/compact.rs +++ b/vortex-array/src/arrays/varbinview/compact.rs @@ -65,10 +65,10 @@ impl VarBinViewArray { where F: FnMut(&Ref), { - match self.as_ref().validity()?.execute_mask( - self.as_ref().len(), - &mut LEGACY_SESSION.create_execution_ctx(), - )? { + match self + .validity()? + .execute_mask(self.len(), &mut LEGACY_SESSION.create_execution_ctx())? + { Mask::AllTrue(_) => { for &view in self.views().iter() { if !view.is_inlined() { diff --git a/vortex-array/src/arrays/varbinview/vtable/mod.rs b/vortex-array/src/arrays/varbinview/vtable/mod.rs index 45db5d904ac..fa541e2e839 100644 --- a/vortex-array/src/arrays/varbinview/vtable/mod.rs +++ b/vortex-array/src/arrays/varbinview/vtable/mod.rs @@ -22,6 +22,7 @@ use crate::Precision; use crate::array::Array; use crate::array::ArrayId; use crate::array::ArrayView; +use crate::array::ParentRef; use crate::array::VTable; use crate::arrays::varbinview::BinaryView; use crate::arrays::varbinview::VarBinViewData; @@ -211,7 +212,7 @@ impl VTable for VarBinView { fn reduce_parent( array: ArrayView<'_, Self>, - parent: &ArrayRef, + parent: &ParentRef<'_>, child_idx: usize, ) -> VortexResult> { PARENT_RULES.evaluate(array, parent, child_idx) diff --git a/vortex-array/src/arrays/variant/mod.rs b/vortex-array/src/arrays/variant/mod.rs index 7d7f23e34c5..a396d831ead 100644 --- a/vortex-array/src/arrays/variant/mod.rs +++ b/vortex-array/src/arrays/variant/mod.rs @@ -35,7 +35,7 @@ pub(super) const SLOT_NAMES: [&str; NUM_SLOTS] = ["core_storage", "shredded"]; pub trait VariantArrayExt: TypedArrayRef { /// Returns the logical variant storage that preserves the full value for every row. fn core_storage(&self) -> &ArrayRef { - self.as_ref().slots()[CORE_STORAGE_SLOT] + self.slots()[CORE_STORAGE_SLOT] .as_ref() .vortex_expect("validated variant core_storage slot") } @@ -44,7 +44,7 @@ pub trait VariantArrayExt: TypedArrayRef { /// This functions returns `Some` only if the array was canonicalized and the shredded data /// was pulled out of the underlying variant storage. fn shredded(&self) -> Option<&ArrayRef> { - self.as_ref().slots()[SHREDDED_SLOT].as_ref() + self.slots()[SHREDDED_SLOT].as_ref() } } impl> VariantArrayExt for T {} diff --git a/vortex-array/src/arrays/variant/vtable/mod.rs b/vortex-array/src/arrays/variant/vtable/mod.rs index 3c0dd76c575..cc6195b7ead 100644 --- a/vortex-array/src/arrays/variant/vtable/mod.rs +++ b/vortex-array/src/arrays/variant/vtable/mod.rs @@ -24,6 +24,7 @@ use crate::array::ArrayId; use crate::array::ArrayParts; use crate::array::ArrayView; use crate::array::EmptyArrayData; +use crate::array::ParentRef; use crate::array::VTable; use crate::arrays::variant::CORE_STORAGE_SLOT; use crate::arrays::variant::NUM_SLOTS; @@ -187,7 +188,7 @@ impl VTable for Variant { fn reduce_parent( array: ArrayView<'_, Self>, - parent: &ArrayRef, + parent: &ParentRef<'_>, child_idx: usize, ) -> VortexResult> { RULES.evaluate(array, parent, child_idx) diff --git a/vortex-array/src/builtins.rs b/vortex-array/src/builtins.rs index dcbe934097e..37ceead9aa7 100644 --- a/vortex-array/src/builtins.rs +++ b/vortex-array/src/builtins.rs @@ -18,7 +18,6 @@ use crate::arrays::scalar_fn::ScalarFnFactoryExt; use crate::dtype::DType; use crate::dtype::FieldName; use crate::expr::Expression; -use crate::optimizer::ArrayOptimizer; use crate::scalar::Scalar; use crate::scalar_fn::EmptyOptions; use crate::scalar_fn::ScalarFnVTableExt; @@ -160,8 +159,8 @@ impl ArrayBuiltins for ArrayRef { if self.dtype() == &dtype { return Ok(self.clone()); } - Cast.try_new_array(self.len(), dtype, [self.clone()])? - .optimize() + let parts = Cast.try_new_array_parts(self.len(), dtype, [self.clone()])?; + parts.optimize() } fn fill_null(&self, fill_value: impl Into) -> VortexResult { @@ -169,60 +168,57 @@ impl ArrayBuiltins for ArrayRef { if !self.dtype().is_nullable() { return self.cast(fill_value.dtype().clone()); } - FillNull - .try_new_array( - self.len(), - EmptyOptions, - [ - self.clone(), - ConstantArray::new(fill_value, self.len()).into_array(), - ], - )? - .optimize() + let parts = FillNull.try_new_array_parts( + self.len(), + EmptyOptions, + [ + self.clone(), + ConstantArray::new(fill_value, self.len()).into_array(), + ], + )?; + parts.optimize() } fn get_item(&self, field_name: impl Into) -> VortexResult { - GetItem - .try_new_array(self.len(), field_name.into(), [self.clone()])? - .optimize() + let parts = GetItem.try_new_array_parts(self.len(), field_name.into(), [self.clone()])?; + parts.optimize() } fn is_null(&self) -> VortexResult { - IsNull - .try_new_array(self.len(), EmptyOptions, [self.clone()])? - .optimize() + let parts = IsNull.try_new_array_parts(self.len(), EmptyOptions, [self.clone()])?; + parts.optimize() } fn is_not_null(&self) -> VortexResult { - IsNotNull - .try_new_array(self.len(), EmptyOptions, [self.clone()])? - .optimize() + let parts = IsNotNull.try_new_array_parts(self.len(), EmptyOptions, [self.clone()])?; + parts.optimize() } fn mask(self, mask: ArrayRef) -> VortexResult { - Mask.try_new_array(self.len(), EmptyOptions, [self, mask])? - .optimize() + let parts = Mask.try_new_array_parts(self.len(), EmptyOptions, [self, mask])?; + parts.optimize() } fn not(&self) -> VortexResult { - Not.try_new_array(self.len(), EmptyOptions, [self.clone()])? - .optimize() + let parts = Not.try_new_array_parts(self.len(), EmptyOptions, [self.clone()])?; + parts.optimize() } fn zip(&self, if_true: ArrayRef, if_false: ArrayRef) -> VortexResult { - Zip.try_new_array(self.len(), EmptyOptions, [if_true, if_false, self.clone()]) + let parts = + Zip.try_new_array_parts(self.len(), EmptyOptions, [if_true, if_false, self.clone()])?; + parts.optimize() } fn list_contains(&self, value: ArrayRef) -> VortexResult { - ListContains - .try_new_array(self.len(), EmptyOptions, [self.clone(), value])? - .optimize() + let parts = + ListContains.try_new_array_parts(self.len(), EmptyOptions, [self.clone(), value])?; + parts.optimize() } fn binary(&self, rhs: ArrayRef, op: Operator) -> VortexResult { - Binary - .try_new_array(self.len(), op, [self.clone(), rhs])? - .optimize() + let parts = Binary.try_new_array_parts(self.len(), op, [self.clone(), rhs])?; + parts.optimize() } fn between( @@ -231,8 +227,7 @@ impl ArrayBuiltins for ArrayRef { upper: ArrayRef, options: BetweenOptions, ) -> VortexResult { - Between - .try_new_array(self.len(), options, [self, lower, upper])? - .optimize() + let parts = Between.try_new_array_parts(self.len(), options, [self, lower, upper])?; + parts.optimize() } } diff --git a/vortex-array/src/canonical.rs b/vortex-array/src/canonical.rs index b1773d453f2..a1794957ac1 100644 --- a/vortex-array/src/canonical.rs +++ b/vortex-array/src/canonical.rs @@ -18,6 +18,7 @@ use crate::Executable; use crate::ExecutionCtx; use crate::IntoArray; use crate::array::ArrayView; +use crate::array::ParentRef; use crate::array::child_to_validity; use crate::arrays::Bool; use crate::arrays::BoolArray; @@ -1062,7 +1063,12 @@ pub struct AnyCanonical; impl Matcher for AnyCanonical { type Match<'a> = CanonicalView<'a>; - fn matches(array: &ArrayRef) -> bool { + /// Fast encoding-id check that skips [`ParentRef`] construction. This is the + /// hot path for [`ArrayRef::is_canonical`](crate::ArrayRef::is_canonical), so + /// each canonical encoding is checked via the cheap `ArrayRef::is::()` + /// direct downcast. + #[inline] + fn matches_ref(array: &ArrayRef) -> bool { array.is::() || array.is::() || array.is::() @@ -1075,7 +1081,9 @@ impl Matcher for AnyCanonical { || array.is::() } - fn try_match(array: &ArrayRef) -> Option> { + /// Direct heap-array downcasts; mirrors [`Self::try_match`] but skips the + /// [`ParentRef`] construction that would otherwise wrap each call. + fn try_match_ref(array: &ArrayRef) -> Option> { if let Some(a) = array.as_opt::() { Some(CanonicalView::Null(a)) } else if let Some(a) = array.as_opt::() { @@ -1098,6 +1106,43 @@ impl Matcher for AnyCanonical { array.as_opt::().map(CanonicalView::Extension) } } + + fn matches(parent: &ParentRef<'_>) -> bool { + parent.is::() + || parent.is::() + || parent.is::() + || parent.is::() + || parent.is::() + || parent.is::() + || parent.is::() + || parent.is::() + || parent.is::() + || parent.is::() + } + + fn try_match<'a>(parent: &'a ParentRef<'_>) -> Option> { + if let Some(a) = parent.as_opt::() { + Some(CanonicalView::Null(a)) + } else if let Some(a) = parent.as_opt::() { + Some(CanonicalView::Bool(a)) + } else if let Some(a) = parent.as_opt::() { + Some(CanonicalView::Primitive(a)) + } else if let Some(a) = parent.as_opt::() { + Some(CanonicalView::Decimal(a)) + } else if let Some(a) = parent.as_opt::() { + Some(CanonicalView::Struct(a)) + } else if let Some(a) = parent.as_opt::() { + Some(CanonicalView::List(a)) + } else if let Some(a) = parent.as_opt::() { + Some(CanonicalView::FixedSizeList(a)) + } else if let Some(a) = parent.as_opt::() { + Some(CanonicalView::VarBinView(a)) + } else if let Some(a) = parent.as_opt::() { + Some(CanonicalView::Variant(a)) + } else { + parent.as_opt::().map(CanonicalView::Extension) + } + } } #[cfg(test)] diff --git a/vortex-array/src/columnar.rs b/vortex-array/src/columnar.rs index 2e4bdc328fd..45b35ad014b 100644 --- a/vortex-array/src/columnar.rs +++ b/vortex-array/src/columnar.rs @@ -12,6 +12,7 @@ use crate::Executable; use crate::ExecutionCtx; use crate::IntoArray; use crate::array::ArrayView; +use crate::array::ParentRef; use crate::arrays::Constant; use crate::arrays::ConstantArray; use crate::dtype::DType; @@ -93,7 +94,28 @@ pub struct AnyColumnar; impl Matcher for AnyColumnar { type Match<'a> = ColumnarView<'a>; - fn try_match(array: &ArrayRef) -> Option> { + fn matches(parent: &ParentRef<'_>) -> bool { + parent.is::() || parent.is::() + } + + fn try_match<'a>(parent: &'a ParentRef<'_>) -> Option> { + if let Some(constant) = parent.as_opt::() { + Some(ColumnarView::Constant(constant)) + } else { + parent.as_opt::().map(ColumnarView::Canonical) + } + } + + /// Fast encoding-id check that skips [`ParentRef`] construction. Mirror of + /// [`AnyCanonical::matches_ref`](crate::AnyCanonical::matches_ref) for the same reason. + #[inline] + fn matches_ref(array: &ArrayRef) -> bool { + array.is::() || array.is::() + } + + /// Direct heap-array downcasts; skips the [`ParentRef`] construction that the + /// default [`Self::try_match`] would otherwise do. + fn try_match_ref(array: &ArrayRef) -> Option> { if let Some(constant) = array.as_opt::() { Some(ColumnarView::Constant(constant)) } else { diff --git a/vortex-array/src/executor.rs b/vortex-array/src/executor.rs index d6070ac1a4d..670fc31d5e0 100644 --- a/vortex-array/src/executor.rs +++ b/vortex-array/src/executor.rs @@ -35,6 +35,7 @@ use crate::ArrayRef; use crate::Canonical; use crate::IntoArray; use crate::array::ArrayId; +use crate::array::ParentRef; use crate::builders::ArrayBuilder; use crate::builders::builder_with_capacity_in; use crate::dtype::DType; @@ -167,9 +168,9 @@ impl ArrayRef { for _ in 0..max_iterations { let is_done = stack .last() - .map_or(M::matches as DonePredicate, |frame| frame.done); + .map_or(M::matches_ref as DonePredicate, |frame| frame.done); - if is_done(¤t_array) || AnyCanonical::matches(¤t_array) { + if is_done(¤t_array) || current_array.is::() { match stack.pop() { None => { debug_assert!( @@ -409,9 +410,10 @@ impl Executable for ArrayRef { return Ok(reduced); } + let parent_ref = ParentRef::from_array_ref(&array); for (slot_idx, slot) in array.slots().iter().enumerate() { let Some(child) = slot else { continue }; - if let Some(reduced_parent) = child.reduce_parent(&array, slot_idx)? { + if let Some(reduced_parent) = child.reduce_parent(&parent_ref, slot_idx)? { ctx.log(format_args!( "reduce_parent: slot[{}]({}) rewrote {} -> {}", slot_idx, @@ -549,8 +551,9 @@ fn execute_parent_for_child( && let Some(plugins) = kernels.find_execute_parent(parent.encoding_id(), child.encoding_id()) { + let parent_ref = ParentRef::from_array_ref(parent); for plugin in plugins.as_ref() { - if let Some(result) = plugin(child, parent, slot_idx, ctx)? { + if let Some(result) = plugin(child, &parent_ref, slot_idx, ctx)? { return Ok(Some(result)); } } @@ -679,7 +682,7 @@ impl ExecutionResult { pub fn execute_slot(array: impl IntoArray, slot_idx: usize) -> Self { Self { array: array.into_array(), - step: ExecutionStep::ExecuteSlot(slot_idx, M::matches), + step: ExecutionStep::ExecuteSlot(slot_idx, M::matches_ref), } } diff --git a/vortex-array/src/expr/mod.rs b/vortex-array/src/expr/mod.rs index a5d32510443..d759e9be88d 100644 --- a/vortex-array/src/expr/mod.rs +++ b/vortex-array/src/expr/mod.rs @@ -42,6 +42,7 @@ pub mod traversal; pub use analysis::*; pub use expression::*; pub use exprs::*; +pub use optimize::ExpressionReduceNode; pub use pruning::StatsCatalog; pub trait VortexExprExt { diff --git a/vortex-array/src/expr/optimize.rs b/vortex-array/src/expr/optimize.rs index 27959a96070..cb15b40eb5a 100644 --- a/vortex-array/src/expr/optimize.rs +++ b/vortex-array/src/expr/optimize.rs @@ -1,16 +1,16 @@ // SPDX-License-Identifier: Apache-2.0 // SPDX-FileCopyrightText: Copyright the Vortex contributors -use std::any::Any; use std::cell::RefCell; use std::ops::Deref; use std::sync::Arc; use itertools::Itertools; -use vortex_error::VortexExpect; use vortex_error::VortexResult; +use vortex_error::vortex_panic; use vortex_utils::aliases::hash_map::HashMap; +use crate::ArrayRef; use crate::dtype::DType; use crate::expr::Expression; use crate::expr::transform::match_between::find_between; @@ -83,12 +83,7 @@ impl Expression { scope: scope.clone(), }; if let Some(reduced) = current.scalar_fn().reduce(&reduce_node, &reduce_ctx)? { - let reduced_expr = reduced - .as_any() - .downcast_ref::() - .vortex_expect("ReduceNode not an ExpressionReduceNode") - .expression - .clone(); + let reduced_expr = reduced.as_expression().expression; current = reduced_expr; changed = true; any_optimizations = true; @@ -252,14 +247,19 @@ impl SimplifyCtx for SimplifyCache<'_> { } } -struct ExpressionReduceNode { +#[derive(Clone)] +pub struct ExpressionReduceNode { expression: Expression, scope: DType, } impl ReduceNode for ExpressionReduceNode { - fn as_any(&self) -> &dyn Any { - self + fn as_array(&self) -> ArrayRef { + vortex_panic!("Cannot produce ArrayRef out of Expression node") + } + + fn as_expression(&self) -> ExpressionReduceNode { + self.clone() } fn node_dtype(&self) -> VortexResult { @@ -285,6 +285,7 @@ impl ReduceNode for ExpressionReduceNode { struct ExpressionReduceCtx { scope: DType, } + impl ReduceCtx for ExpressionReduceCtx { fn new_node( &self, @@ -295,13 +296,7 @@ impl ReduceCtx for ExpressionReduceCtx { scalar_fn, children .iter() - .map(|c| { - c.as_any() - .downcast_ref::() - .vortex_expect("ReduceNode not an ExpressionReduceNode") - .expression - .clone() - }) + .map(|c| c.as_expression().expression) .collect::>(), )?; diff --git a/vortex-array/src/kernel.rs b/vortex-array/src/kernel.rs index f5b75471437..e86ce0e92b8 100644 --- a/vortex-array/src/kernel.rs +++ b/vortex-array/src/kernel.rs @@ -134,7 +134,7 @@ impl> Debug for ParentKernelAdapter { impl> DynParentKernel for ParentKernelAdapter { fn matches(&self, parent: &ArrayRef) -> bool { - K::Parent::matches(parent) + parent.is::() } fn execute_parent( @@ -144,7 +144,7 @@ impl> DynParentKernel for ParentKernelAd child_idx: usize, ctx: &mut ExecutionCtx, ) -> VortexResult> { - let Some(parent_view) = K::Parent::try_match(parent) else { + let Some(parent_view) = parent.as_opt::() else { return Ok(None); }; self.kernel diff --git a/vortex-array/src/matcher.rs b/vortex-array/src/matcher.rs index 532931df083..535fea62a21 100644 --- a/vortex-array/src/matcher.rs +++ b/vortex-array/src/matcher.rs @@ -2,34 +2,59 @@ // SPDX-FileCopyrightText: Copyright the Vortex contributors use crate::ArrayRef; +use crate::array::ParentRef; /// Trait for matching array types. +/// +/// Matchers expose two parallel entry points: +/// +/// - [`matches`](Self::matches) / [`try_match`](Self::try_match) take a [`ParentRef`]. +/// This is the more general path because a `ParentRef` can borrow either a +/// heap-allocated [`ArrayRef`] or stack-allocated construction parts, so it works +/// uniformly for the optimizer's parent-reduce dispatch. +/// - [`matches_ref`](Self::matches_ref) / [`try_match_ref`](Self::try_match_ref) take +/// an [`ArrayRef`] directly. They exist as a fast path for callers that already +/// hold a heap-allocated array (e.g. `ArrayRef::is::()`, `ArrayRef::as_opt::()`) +/// so they don't pay for [`ParentRef`] construction. +/// +/// Both pairs are required because the result types borrow from different things: +/// the heap entries borrow from `&ArrayRef`, the parent entries borrow from +/// `&ParentRef`, and the lifetimes can't be unified without either materializing +/// stack-backed parts or routing every call through a parent allocation. pub trait Matcher { type Match<'a>; - /// Check if the given array matches this matcher type - fn matches(array: &ArrayRef) -> bool { - Self::try_match(array).is_some() + /// Check if the given parent matches this matcher type. + /// + /// The default implementation delegates through [`try_match`](Self::try_match). + /// Override when a cheaper check (e.g. an encoding-id comparison) suffices. + fn matches(parent: &ParentRef<'_>) -> bool { + Self::try_match(parent).is_some() } - /// Try to match the given array, returning the matched view type if successful. - fn try_match(array: &ArrayRef) -> Option>; -} - -/// Matches any array type (wildcard matcher) -#[derive(Debug)] -pub struct AnyArray; - -impl Matcher for AnyArray { - type Match<'a> = &'a ArrayRef; - - #[inline(always)] - fn matches(_array: &ArrayRef) -> bool { - true + /// Try to match a [`ParentRef`]. + /// + /// The returned `Match` borrows from `parent`, so matchers can return a + /// stack-backed [`ArrayView`](crate::array::ArrayView) without forcing the + /// parent to materialize. Implementations typically delegate to + /// [`ParentRef::as_opt`]. + fn try_match<'a>(parent: &'a ParentRef<'_>) -> Option>; + + /// Check if the given heap-allocated array matches this matcher type. + /// + /// The default implementation delegates through + /// [`try_match_ref`](Self::try_match_ref), but matchers that can answer cheaply + /// (encoding-id checks, no view construction) should override this directly so + /// hot callers like `ArrayRef::is::()` don't pay the `try_match_ref` cost. + fn matches_ref(array: &ArrayRef) -> bool { + Self::try_match_ref(array).is_some() } - #[inline(always)] - fn try_match(array: &ArrayRef) -> Option> { - Some(array) - } + /// Try to match a heap-allocated [`ArrayRef`], returning the matched view type + /// if successful. + /// + /// This is the heap-only fast path: callers that already hold an `ArrayRef` + /// skip `ParentRef` construction. Implementations typically delegate to + /// [`ArrayRef::as_typed`](crate::ArrayRef::as_typed). + fn try_match_ref(array: &ArrayRef) -> Option>; } diff --git a/vortex-array/src/optimizer/kernels.rs b/vortex-array/src/optimizer/kernels.rs index d38bc9402d1..73f0ce15ef3 100644 --- a/vortex-array/src/optimizer/kernels.rs +++ b/vortex-array/src/optimizer/kernels.rs @@ -31,6 +31,7 @@ use std::sync::Arc; use std::sync::LazyLock; use arc_swap::ArcSwap; +use vortex_array::arrays::Struct; use vortex_error::VortexResult; use vortex_session::Ref; use vortex_session::SessionExt; @@ -39,13 +40,13 @@ use vortex_session::registry::Id; use vortex_utils::aliases::DefaultHashBuilder; use vortex_utils::aliases::hash_map::HashMap; +use crate::ArrayPlugin; use crate::ArrayRef; use crate::ExecutionCtx; -use crate::array::VTable; -use crate::arrays::Struct; +use crate::array::ParentRef; use crate::arrays::struct_::compute::cast::struct_cast_execute_parent; use crate::arrays::struct_::compute::rules::struct_cast_reduce_parent; -use crate::scalar_fn::ScalarFnVTable; +use crate::scalar_fn::ScalarFnPlugin; use crate::scalar_fn::fns::cast::Cast; /// Shared hasher used to combine `(outer, child)` tuples into registry keys. @@ -59,8 +60,11 @@ static FN_HASHER: LazyLock = LazyLock::new(DefaultHashBuilde /// /// Implementations must preserve the parent's logical length and dtype, matching the invariant /// required of static parent-reduce rules. -pub type ReduceParentFn = - fn(child: &ArrayRef, parent: &ArrayRef, child_idx: usize) -> VortexResult>; +pub type ReduceParentFn = fn( + child: &ArrayRef, + parent: &ParentRef<'_>, + child_idx: usize, +) -> VortexResult>; #[derive(Debug, Clone, Copy, PartialEq, Eq, Hash, Ord, PartialOrd)] #[repr(transparent)] @@ -88,7 +92,7 @@ impl Borrow for ReduceParentFnId { /// required of static `execute_parent` kernels. pub type ExecuteParentFn = fn( child: &ArrayRef, - parent: &ArrayRef, + parent: &ParentRef<'_>, child_idx: usize, ctx: &mut ExecutionCtx, ) -> VortexResult>; diff --git a/vortex-array/src/optimizer/mod.rs b/vortex-array/src/optimizer/mod.rs index d6e93ca0561..794ceee5f07 100644 --- a/vortex-array/src/optimizer/mod.rs +++ b/vortex-array/src/optimizer/mod.rs @@ -22,6 +22,7 @@ use vortex_session::SessionExt; use vortex_session::VortexSession; use crate::ArrayRef; +use crate::array::ParentRef; use crate::optimizer::kernels::ArrayKernels; pub mod kernels; @@ -87,6 +88,7 @@ fn try_optimize( // Apply parent reduction rules to each slot in the context of the current array. // Its important to take all slots here, as `current_array` can change inside the loop. + let parent_ref = ParentRef::from_array_ref(¤t_array); for (slot_idx, slot) in current_array.slots().iter().enumerate() { let Some(child) = slot else { continue }; @@ -96,7 +98,7 @@ fn try_optimize( array_ref.find_reduce_parent(current_array.encoding_id(), child.encoding_id()) { for plugin in plugins.as_ref() { - if let Some(new_array) = plugin(child, ¤t_array, slot_idx)? { + if let Some(new_array) = plugin(child, &parent_ref, slot_idx)? { current_array = new_array; any_optimizations = true; continue 'outer; @@ -104,7 +106,7 @@ fn try_optimize( } } - if let Some(new_array) = child.reduce_parent(¤t_array, slot_idx)? { + if let Some(new_array) = child.reduce_parent(&parent_ref, slot_idx)? { // If the parent was replaced, then we attempt to reduce it again. current_array = new_array; any_optimizations = true; diff --git a/vortex-array/src/optimizer/rules.rs b/vortex-array/src/optimizer/rules.rs index e505b21a199..134e24ca833 100644 --- a/vortex-array/src/optimizer/rules.rs +++ b/vortex-array/src/optimizer/rules.rs @@ -26,6 +26,7 @@ use vortex_error::VortexResult; use crate::ArrayRef; use crate::array::ArrayView; +use crate::array::ParentRef; use crate::array::VTable; use crate::matcher::Matcher; @@ -48,6 +49,16 @@ pub trait ArrayReduceRule: Debug + Send + Sync + 'static { /// The child sees the parent's type via the associated `Parent` [`Matcher`] and can return /// a replacement for the parent. This enables optimizations like pushing operations through /// compression layers (e.g., pushing a scalar function into dictionary values). +/// +/// # Stack-backed parents +/// +/// Construction-side callers borrow `ArrayParts` as a [`ParentRef`] via +/// [`ParentRef::optimize`](crate::array::ParentRef::optimize). [`Matcher::try_match`] +/// returns a stack-backed [`ArrayView`] without materializing an +/// `Arc>`, so rules that consume only the typed metadata (e.g. +/// `view.dtype()`, `view.data()`, `view.slots()`) can fire without forcing a +/// heap allocation. Materialization only happens if the rule reaches for +/// [`ArrayView::array`]. pub trait ArrayParentReduceRule: Debug + Send + Sync + 'static { /// The parent array type this rule matches against. type Parent: Matcher; @@ -58,6 +69,13 @@ pub trait ArrayParentReduceRule: Debug + Send + Sync + 'static { /// - `Ok(Some(new_array))` if the rule applied successfully /// - `Ok(None)` if the rule doesn't apply /// - `Err(e)` if an error occurred + /// + /// # Stack-backed parents + /// + /// The parent is received through [`Matcher::Match`]. For the blanket + /// `impl Matcher for V`, that is an [`ArrayView`] borrowed from + /// the parent — no `Arc>` is allocated unless the rule reaches + /// for [`ArrayView::array`]. fn reduce_parent( &self, array: ArrayView<'_, V>, @@ -69,12 +87,12 @@ pub trait ArrayParentReduceRule: Debug + Send + Sync + 'static { /// Type-erased version of [`ArrayParentReduceRule`] used for dynamic dispatch within /// [`ParentRuleSet`]. pub trait DynArrayParentReduceRule: Debug + Send + Sync { - fn matches(&self, parent: &ArrayRef) -> bool; + fn matches(&self, parent: &ParentRef<'_>) -> bool; fn reduce_parent( &self, array: ArrayView<'_, V>, - parent: &ArrayRef, + parent: &ParentRef<'_>, child_idx: usize, ) -> VortexResult>; } @@ -98,17 +116,17 @@ impl> Debug for ParentReduceRuleAdapter> DynArrayParentReduceRule for ParentReduceRuleAdapter { - fn matches(&self, parent: &ArrayRef) -> bool { - K::Parent::matches(parent) + fn matches(&self, parent: &ParentRef<'_>) -> bool { + parent.is::() } fn reduce_parent( &self, child: ArrayView<'_, V>, - parent: &ArrayRef, + parent: &ParentRef<'_>, child_idx: usize, ) -> VortexResult> { - let Some(parent_view) = K::Parent::try_match(parent) else { + let Some(parent_view) = parent.as_opt::() else { return Ok(None); }; self.rule.reduce_parent(child, parent_view, child_idx) @@ -171,7 +189,7 @@ impl ParentRuleSet { pub fn evaluate( &self, child: ArrayView<'_, V>, - parent: &ArrayRef, + parent: &ParentRef<'_>, child_idx: usize, ) -> VortexResult> { for rule in self.rules.iter() { diff --git a/vortex-array/src/scalar_fn/fns/between/kernel.rs b/vortex-array/src/scalar_fn/fns/between/kernel.rs index ee4fb688982..fedd5a7ae38 100644 --- a/vortex-array/src/scalar_fn/fns/between/kernel.rs +++ b/vortex-array/src/scalar_fn/fns/between/kernel.rs @@ -1,7 +1,6 @@ // SPDX-License-Identifier: Apache-2.0 // SPDX-FileCopyrightText: Copyright the Vortex contributors -use vortex_error::VortexExpect; use vortex_error::VortexResult; use super::Between; @@ -11,9 +10,7 @@ use crate::ArrayRef; use crate::ExecutionCtx; use crate::array::ArrayView; use crate::array::VTable; -use crate::arrays::ScalarFn; use crate::arrays::scalar_fn::ExactScalarFn; -use crate::arrays::scalar_fn::ScalarFnArrayExt; use crate::arrays::scalar_fn::ScalarFnArrayView; use crate::kernel::ExecuteParentKernel; use crate::optimizer::rules::ArrayParentReduceRule; @@ -63,12 +60,8 @@ where if child_idx != 0 { return Ok(None); } - let scalar_fn_array = parent - .as_opt::() - .vortex_expect("ExactScalarFn matcher confirmed ScalarFnArray"); - let children = scalar_fn_array.children(); - let lower = &children[1]; - let upper = &children[2]; + let lower = parent.get_child(1); + let upper = parent.get_child(2); let arr = array.array().clone(); if let Some(result) = precondition(&arr, lower, upper)? { return Ok(Some(result)); @@ -98,12 +91,8 @@ where if child_idx != 0 { return Ok(None); } - let scalar_fn_array = parent - .as_opt::() - .vortex_expect("ExactScalarFn matcher confirmed ScalarFnArray"); - let children = scalar_fn_array.children(); - let lower = &children[1]; - let upper = &children[2]; + let lower = parent.get_child(1); + let upper = parent.get_child(2); let arr = array.array().clone(); if let Some(result) = precondition(&arr, lower, upper)? { return Ok(Some(result)); diff --git a/vortex-array/src/scalar_fn/fns/binary/compare.rs b/vortex-array/src/scalar_fn/fns/binary/compare.rs index a09a28164bd..950c3687343 100644 --- a/vortex-array/src/scalar_fn/fns/binary/compare.rs +++ b/vortex-array/src/scalar_fn/fns/binary/compare.rs @@ -20,9 +20,7 @@ use crate::array::ArrayView; use crate::array::VTable; use crate::arrays::Constant; use crate::arrays::ConstantArray; -use crate::arrays::ScalarFn; use crate::arrays::scalar_fn::ExactScalarFn; -use crate::arrays::scalar_fn::ScalarFnArrayExt; use crate::arrays::scalar_fn::ScalarFnArrayView; use crate::arrow::ArrowSessionExt; use crate::arrow::Datum; @@ -74,15 +72,11 @@ where return Ok(None); }; - // Get the ScalarFnArray to access children - let Some(scalar_fn_array) = parent.as_opt::() else { - return Ok(None); - }; // Normalize so `array` is always LHS, swapping the operator if needed // TODO(joe): should be go this here or in the Rule/Kernel let (cmp_op, other) = match child_idx { - 0 => (cmp_op, scalar_fn_array.get_child(1)), - 1 => (cmp_op.swap(), scalar_fn_array.get_child(0)), + 0 => (cmp_op, parent.get_child(1)), + 1 => (cmp_op.swap(), parent.get_child(0)), _ => return Ok(None), }; diff --git a/vortex-array/src/scalar_fn/fns/fill_null/kernel.rs b/vortex-array/src/scalar_fn/fns/fill_null/kernel.rs index eea3dd6ef7b..b80c43814be 100644 --- a/vortex-array/src/scalar_fn/fns/fill_null/kernel.rs +++ b/vortex-array/src/scalar_fn/fns/fill_null/kernel.rs @@ -12,9 +12,7 @@ use crate::array::ArrayView; use crate::array::VTable; use crate::arrays::Constant; use crate::arrays::ConstantArray; -use crate::arrays::ScalarFn; use crate::arrays::scalar_fn::ExactScalarFn; -use crate::arrays::scalar_fn::ScalarFnArrayExt; use crate::arrays::scalar_fn::ScalarFnArrayView; use crate::builtins::ArrayBuiltins; use crate::kernel::ExecuteParentKernel; @@ -122,10 +120,7 @@ where if child_idx != 0 { return Ok(None); } - let scalar_fn_array = parent - .as_opt::() - .vortex_expect("ExactScalarFn matcher confirmed ScalarFnArray"); - let fill_value = scalar_fn_array + let fill_value = parent .get_child(1) .as_constant() .vortex_expect("fill_null fill_value must be constant"); @@ -158,10 +153,7 @@ where if child_idx != 0 { return Ok(None); } - let scalar_fn_array = parent - .as_opt::() - .vortex_expect("ExactScalarFn matcher confirmed ScalarFnArray"); - let fill_value = scalar_fn_array + let fill_value = parent .get_child(1) .as_constant() .vortex_expect("fill_null fill_value must be constant"); diff --git a/vortex-array/src/scalar_fn/fns/like/kernel.rs b/vortex-array/src/scalar_fn/fns/like/kernel.rs index b3b683212ff..72cbbae3377 100644 --- a/vortex-array/src/scalar_fn/fns/like/kernel.rs +++ b/vortex-array/src/scalar_fn/fns/like/kernel.rs @@ -1,16 +1,13 @@ // SPDX-License-Identifier: Apache-2.0 // SPDX-FileCopyrightText: Copyright the Vortex contributors -use vortex_error::VortexExpect; use vortex_error::VortexResult; use crate::ArrayRef; use crate::ExecutionCtx; use crate::array::ArrayView; use crate::array::VTable; -use crate::arrays::ScalarFn; use crate::arrays::scalar_fn::ExactScalarFn; -use crate::arrays::scalar_fn::ScalarFnArrayExt; use crate::arrays::scalar_fn::ScalarFnArrayView; use crate::kernel::ExecuteParentKernel; use crate::optimizer::rules::ArrayParentReduceRule; @@ -68,10 +65,7 @@ where if child_idx != 0 { return Ok(None); } - let scalar_fn_array = parent - .as_opt::() - .vortex_expect("ExactScalarFn matcher confirmed ScalarFnArray"); - let pattern = scalar_fn_array.get_child(1); + let pattern = parent.get_child(1); let options = *parent.options; ::like(array, pattern, options) } @@ -97,10 +91,7 @@ where if child_idx != 0 { return Ok(None); } - let scalar_fn_array = parent - .as_opt::() - .vortex_expect("ExactScalarFn matcher confirmed ScalarFnArray"); - let pattern = scalar_fn_array.get_child(1); + let pattern = parent.get_child(1); let options = *parent.options; ::like(array, pattern, options, ctx) } diff --git a/vortex-array/src/scalar_fn/fns/list_contains/kernel.rs b/vortex-array/src/scalar_fn/fns/list_contains/kernel.rs index 563600bfeee..38f82e84b8c 100644 --- a/vortex-array/src/scalar_fn/fns/list_contains/kernel.rs +++ b/vortex-array/src/scalar_fn/fns/list_contains/kernel.rs @@ -1,16 +1,13 @@ // SPDX-License-Identifier: Apache-2.0 // SPDX-FileCopyrightText: Copyright the Vortex contributors -use vortex_error::VortexExpect; use vortex_error::VortexResult; use crate::ArrayRef; use crate::ExecutionCtx; use crate::array::ArrayView; use crate::array::VTable; -use crate::arrays::ScalarFn; use crate::arrays::scalar_fn::ExactScalarFn; -use crate::arrays::scalar_fn::ScalarFnArrayExt; use crate::arrays::scalar_fn::ScalarFnArrayView; use crate::kernel::ExecuteParentKernel; use crate::optimizer::rules::ArrayParentReduceRule; @@ -66,10 +63,7 @@ where if child_idx != 1 { return Ok(None); } - let scalar_fn_array = parent - .as_opt::() - .vortex_expect("ExactScalarFn matcher confirmed ScalarFnArray"); - let list = scalar_fn_array.get_child(0); + let list = parent.get_child(0); ::list_contains(list, array) } } @@ -95,10 +89,7 @@ where if child_idx != 1 { return Ok(None); } - let scalar_fn_array = parent - .as_opt::() - .vortex_expect("ExactScalarFn matcher confirmed ScalarFnArray"); - let list = scalar_fn_array.get_child(0); + let list = parent.get_child(0); ::list_contains(list, array, ctx) } } diff --git a/vortex-array/src/scalar_fn/fns/mask/kernel.rs b/vortex-array/src/scalar_fn/fns/mask/kernel.rs index 5346e4e236c..02d3349fc88 100644 --- a/vortex-array/src/scalar_fn/fns/mask/kernel.rs +++ b/vortex-array/src/scalar_fn/fns/mask/kernel.rs @@ -2,7 +2,6 @@ // SPDX-FileCopyrightText: Copyright the Vortex contributors use vortex_error::VortexResult; -use vortex_error::vortex_err; use crate::ArrayRef; use crate::ExecutionCtx; @@ -72,14 +71,11 @@ where } // The mask child (child 1) is a non-nullable BoolArray where true=keep. // If it's not yet a BoolArray, we can't reduce without execution. - let parent_ref: ArrayRef = (*parent).clone(); - let mask_child = parent_ref - .nth_child(1) - .ok_or_else(|| vortex_err!("Mask expression must have 2 children"))?; + let mask_child = parent.get_child(1); if mask_child.as_opt::().is_none() { return Ok(None); }; - ::mask(array, &mask_child) + ::mask(array, mask_child) } } @@ -104,9 +100,7 @@ where if child_idx != 0 { return Ok(None); } - let mask_child = parent - .nth_child(1) - .ok_or_else(|| vortex_err!("Mask expression must have 2 children"))?; - ::mask(array, &mask_child, ctx) + let mask_child = parent.get_child(1); + ::mask(array, mask_child, ctx) } } diff --git a/vortex-array/src/scalar_fn/fns/merge.rs b/vortex-array/src/scalar_fn/fns/merge.rs index 608ba69a255..05ea9354eca 100644 --- a/vortex-array/src/scalar_fn/fns/merge.rs +++ b/vortex-array/src/scalar_fn/fns/merge.rs @@ -7,9 +7,9 @@ use std::hash::Hash; use std::sync::Arc; use itertools::Itertools as _; -use vortex_error::VortexExpect; use vortex_error::VortexResult; use vortex_error::vortex_bail; +use vortex_error::vortex_err; use vortex_session::VortexSession; use vortex_utils::aliases::hash_set::HashSet; @@ -185,16 +185,13 @@ impl ScalarFnVTable for Merge { for child in (0..node.child_count()).map(|i| node.child(i)) { let child_dtype = child.node_dtype()?; - if !child_dtype.is_struct() { - vortex_bail!( + + let child_dtype = child_dtype.as_struct_fields_opt().ok_or_else(|| { + vortex_err!( "Merge child must return a non-nullable struct dtype, got {}", child_dtype ) - } - - let child_dtype = child_dtype - .as_struct_fields_opt() - .vortex_expect("expected struct"); + })?; for name in child_dtype.names().iter() { if let Some(idx) = names.iter().position(|n| n == name) { diff --git a/vortex-array/src/scalar_fn/fns/zip/kernel.rs b/vortex-array/src/scalar_fn/fns/zip/kernel.rs index 575c2f4c55a..0ca31190184 100644 --- a/vortex-array/src/scalar_fn/fns/zip/kernel.rs +++ b/vortex-array/src/scalar_fn/fns/zip/kernel.rs @@ -1,16 +1,13 @@ // SPDX-License-Identifier: Apache-2.0 // SPDX-FileCopyrightText: Copyright the Vortex contributors -use vortex_error::VortexExpect; use vortex_error::VortexResult; use crate::ArrayRef; use crate::ExecutionCtx; use crate::array::ArrayView; use crate::array::VTable; -use crate::arrays::ScalarFn; use crate::arrays::scalar_fn::ExactScalarFn; -use crate::arrays::scalar_fn::ScalarFnArrayExt; use crate::arrays::scalar_fn::ScalarFnArrayView; use crate::kernel::ExecuteParentKernel; use crate::optimizer::rules::ArrayParentReduceRule; @@ -67,11 +64,8 @@ where if child_idx != 0 { return Ok(None); } - let scalar_fn_array = parent - .as_opt::() - .vortex_expect("ExactScalarFn matcher confirmed ScalarFnArray"); - let if_false = scalar_fn_array.get_child(1); - let mask_array = scalar_fn_array.get_child(2); + let if_false = parent.get_child(1); + let mask_array = parent.get_child(2); ::zip(array, if_false, mask_array) } } @@ -96,11 +90,8 @@ where if child_idx != 0 { return Ok(None); } - let scalar_fn_array = parent - .as_opt::() - .vortex_expect("ExactScalarFn matcher confirmed ScalarFnArray"); - let if_false = scalar_fn_array.get_child(1); - let mask_array = scalar_fn_array.get_child(2); + let if_false = parent.get_child(1); + let mask_array = parent.get_child(2); ::zip(array, if_false, mask_array, ctx) } } diff --git a/vortex-array/src/scalar_fn/vtable.rs b/vortex-array/src/scalar_fn/vtable.rs index f4862f6876a..0fa06e833d3 100644 --- a/vortex-array/src/scalar_fn/vtable.rs +++ b/vortex-array/src/scalar_fn/vtable.rs @@ -1,7 +1,6 @@ // SPDX-License-Identifier: Apache-2.0 // SPDX-FileCopyrightText: Copyright the Vortex contributors -use std::any::Any; use std::fmt; use std::fmt::Debug; use std::fmt::Display; @@ -20,6 +19,7 @@ use crate::ArrayRef; use crate::ExecutionCtx; use crate::dtype::DType; use crate::expr::Expression; +use crate::expr::ExpressionReduceNode; use crate::expr::StatsCatalog; use crate::expr::stats::Stat; use crate::expr::traversal::Node; @@ -116,7 +116,7 @@ pub trait ScalarFnVTable: 'static + Sized + Clone + Send + Sync { /// Implementations may assume correct arity and will panic or return nonsensical results if /// violated. /// - /// [`Expression::try_new`]: crate::expr::Expression::try_new + /// [`Expression::try_new`]: Expression::try_new fn return_dtype(&self, options: &Self::Options, args: &[DType]) -> VortexResult; /// Execute the expression over the input arguments. @@ -282,8 +282,9 @@ pub type ReduceNodeRef = Arc; /// A node used for implementing abstract reduction rules. pub trait ReduceNode { - /// Downcast to Any. - fn as_any(&self) -> &dyn Any; + fn as_array(&self) -> ArrayRef; + + fn as_expression(&self) -> ExpressionReduceNode; /// Return the data type of this node. fn node_dtype(&self) -> VortexResult; diff --git a/vortex-array/src/validity.rs b/vortex-array/src/validity.rs index 204205d1f51..314b9992fed 100644 --- a/vortex-array/src/validity.rs +++ b/vortex-array/src/validity.rs @@ -28,7 +28,6 @@ use crate::arrays::scalar_fn::ScalarFnFactoryExt; use crate::builtins::ArrayBuiltins; use crate::dtype::DType; use crate::dtype::Nullability; -use crate::optimizer::ArrayOptimizer; use crate::patches::Patches; use crate::scalar::Scalar; use crate::scalar_fn::fns::binary::Binary; @@ -265,11 +264,10 @@ impl Validity { | (Validity::AllValid, Validity::NonNullable) | (Validity::AllValid, Validity::AllValid) => Validity::AllValid, // Here we actually have to do some work - (Validity::Array(lhs), Validity::Array(rhs)) => Validity::Array( - Binary - .try_new_array(lhs.len(), Operator::And, [lhs, rhs])? - .optimize()?, - ), + (Validity::Array(lhs), Validity::Array(rhs)) => { + let parts = Binary.try_new_array_parts(lhs.len(), Operator::And, [lhs, rhs])?; + Validity::Array(parts.optimize()?) + } }) } diff --git a/vortex-cuda/src/dynamic_dispatch/plan_builder.rs b/vortex-cuda/src/dynamic_dispatch/plan_builder.rs index d20af9f7d21..d17635e0c55 100644 --- a/vortex-cuda/src/dynamic_dispatch/plan_builder.rs +++ b/vortex-cuda/src/dynamic_dispatch/plan_builder.rs @@ -12,6 +12,7 @@ use itertools::zip_eq; use tracing::trace; use vortex::array::ArrayRef; use vortex::array::ArrayVTable; +use vortex::array::ParentRef; use vortex::array::arrays::Dict; use vortex::array::arrays::Primitive; use vortex::array::arrays::Slice; @@ -520,7 +521,8 @@ impl FusedPlan { let slice_arr = array.as_::(); let child = slice_arr.child().clone(); - if let Some(reduced) = child.reduce_parent(&array, 0)? { + let parent_ref = ParentRef::from_array_ref(&array); + if let Some(reduced) = child.reduce_parent(&parent_ref, 0)? { return self.walk(reduced, pending_subtrees); } diff --git a/vortex-layout/src/layouts/dict/reader.rs b/vortex-layout/src/layouts/dict/reader.rs index 96f12d53ece..424eb635c41 100644 --- a/vortex-layout/src/layouts/dict/reader.rs +++ b/vortex-layout/src/layouts/dict/reader.rs @@ -21,7 +21,6 @@ use vortex_array::dtype::DType; use vortex_array::dtype::FieldMask; use vortex_array::expr::Expression; use vortex_array::expr::root; -use vortex_array::optimizer::ArrayOptimizer; use vortex_error::VortexError; use vortex_error::VortexExpect; use vortex_error::VortexResult; @@ -241,12 +240,9 @@ impl LayoutReader for DictReader { // * The codes child reader ensures the correct dtype. // * The layout stores `all_values_referenced` and if this is malicious then it must // only affect correctness not memory safety. - let array = unsafe { - DictArray::new_unchecked(codes, values) - .set_all_values_referenced(all_values_referenced) - } - .into_array() - .optimize()?; + let parts = + unsafe { DictArray::new_unchecked_parts(codes, values, all_values_referenced) }; + let array = parts.optimize()?; array.apply(&expr) } diff --git a/vortex-python/src/arrays/py/vtable.rs b/vortex-python/src/arrays/py/vtable.rs index 9e1461f3c51..b138233fb6c 100644 --- a/vortex-python/src/arrays/py/vtable.rs +++ b/vortex-python/src/arrays/py/vtable.rs @@ -87,7 +87,7 @@ impl VTable for PythonVTable { 0 } - fn child(_array: ArrayView<'_, Self>, idx: usize) -> ArrayRef { + fn child(_array: ArrayView<'_, Self>, idx: usize) -> &ArrayRef { vortex_panic!("PythonArray child index {idx} out of bounds") } diff --git a/vortex-tensor/src/scalar_fns/inner_product.rs b/vortex-tensor/src/scalar_fns/inner_product.rs index 197d5cc2fb3..b5df68a0b9b 100644 --- a/vortex-tensor/src/scalar_fns/inner_product.rs +++ b/vortex-tensor/src/scalar_fns/inner_product.rs @@ -361,9 +361,7 @@ impl InnerProduct { let new_constant = Vector::constant_array(&rotated_query, len)?; // Extract the SorfTransform child (the already-padded Vector). - let sorf_child = sorf_view - .nth_child(0) - .vortex_expect("SorfTransform must have exactly one child"); + let sorf_child = sorf_view.get_child(0).clone(); // Recursively execute the rewritten inner product. This allows case 2 to fire on // the rewritten tree if the sorf child is `Vector[FSL(Dict)]`. Termination is diff --git a/vortex-tensor/src/scalar_fns/l2_denorm.rs b/vortex-tensor/src/scalar_fns/l2_denorm.rs index 00ad7d75e8a..670f8f5ca76 100644 --- a/vortex-tensor/src/scalar_fns/l2_denorm.rs +++ b/vortex-tensor/src/scalar_fns/l2_denorm.rs @@ -16,12 +16,10 @@ use vortex_array::arrays::Extension; use vortex_array::arrays::ExtensionArray; use vortex_array::arrays::FixedSizeListArray; use vortex_array::arrays::PrimitiveArray; -use vortex_array::arrays::ScalarFn as ScalarFnArrayEncoding; use vortex_array::arrays::ScalarFnArray; use vortex_array::arrays::extension::ExtensionArrayExt; use vortex_array::arrays::fixed_size_list::FixedSizeListArrayExt; use vortex_array::arrays::scalar_fn::ExactScalarFn; -use vortex_array::arrays::scalar_fn::ScalarFnArrayExt; use vortex_array::arrays::scalar_fn::ScalarFnArrayView; use vortex_array::arrays::scalar_fn::plugin::ScalarFnArrayParts; use vortex_array::arrays::scalar_fn::plugin::ScalarFnArrayVTable; @@ -288,9 +286,8 @@ impl ScalarFnArrayVTable for L2Denorm { view: &ScalarFnArrayView, _session: &VortexSession, ) -> VortexResult>> { - let scalar_fn_array = view.as_::(); - let normalized_dtype = Some(scalar_fn_array.child_at(0).dtype().try_into()?); - let norms_dtype = Some(scalar_fn_array.child_at(1).dtype().try_into()?); + let normalized_dtype = Some(view.child_at(0).dtype().try_into()?); + let norms_dtype = Some(view.child_at(1).dtype().try_into()?); Ok(Some( L2DenormMetadata { normalized_dtype, diff --git a/vortex-tensor/src/scalar_fns/l2_norm.rs b/vortex-tensor/src/scalar_fns/l2_norm.rs index d760c3429bd..306b2b8f597 100644 --- a/vortex-tensor/src/scalar_fns/l2_norm.rs +++ b/vortex-tensor/src/scalar_fns/l2_norm.rs @@ -12,11 +12,9 @@ use vortex_array::arrays::Constant; use vortex_array::arrays::ConstantArray; use vortex_array::arrays::ExtensionArray; use vortex_array::arrays::PrimitiveArray; -use vortex_array::arrays::ScalarFn as ScalarFnArrayEncoding; use vortex_array::arrays::ScalarFnArray; use vortex_array::arrays::extension::ExtensionArrayExt; use vortex_array::arrays::scalar_fn::ExactScalarFn; -use vortex_array::arrays::scalar_fn::ScalarFnArrayExt; use vortex_array::arrays::scalar_fn::ScalarFnArrayView; use vortex_array::arrays::scalar_fn::plugin::ScalarFnArrayParts; use vortex_array::arrays::scalar_fn::plugin::ScalarFnArrayVTable; @@ -209,8 +207,7 @@ impl ScalarFnArrayVTable for L2Norm { view: &ScalarFnArrayView, _session: &VortexSession, ) -> VortexResult>> { - let scalar_fn_array = view.as_::(); - let input_dtype = Some(scalar_fn_array.child_at(0).dtype().try_into()?); + let input_dtype = Some(view.child_at(0).dtype().try_into()?); Ok(Some(L2NormMetadata { input_dtype }.encode_to_vec())) } diff --git a/vortex-tensor/src/scalar_fns/sorf_transform/vtable.rs b/vortex-tensor/src/scalar_fns/sorf_transform/vtable.rs index 76648decae2..f045a406d8c 100644 --- a/vortex-tensor/src/scalar_fns/sorf_transform/vtable.rs +++ b/vortex-tensor/src/scalar_fns/sorf_transform/vtable.rs @@ -14,10 +14,8 @@ use vortex_array::IntoArray; use vortex_array::arrays::ExtensionArray; use vortex_array::arrays::FixedSizeListArray; use vortex_array::arrays::PrimitiveArray; -use vortex_array::arrays::ScalarFn as ScalarFnArrayEncoding; use vortex_array::arrays::extension::ExtensionArrayExt; use vortex_array::arrays::fixed_size_list::FixedSizeListArrayExt; -use vortex_array::arrays::scalar_fn::ScalarFnArrayExt; use vortex_array::arrays::scalar_fn::ScalarFnArrayView; use vortex_array::arrays::scalar_fn::plugin::ScalarFnArrayParts; use vortex_array::arrays::scalar_fn::plugin::ScalarFnArrayVTable; @@ -210,8 +208,7 @@ impl ScalarFnArrayVTable for SorfTransform { view: &ScalarFnArrayView, _session: &VortexSession, ) -> VortexResult>> { - let scalar_fn_array = view.as_::(); - let child_dtype = Some(scalar_fn_array.child_at(0).dtype().try_into()?); + let child_dtype = Some(view.child_at(0).dtype().try_into()?); let metadata = SorfTransformMetadata { child_dtype, ..SorfTransformMetadata::from(view.options) diff --git a/vortex-tensor/src/utils.rs b/vortex-tensor/src/utils.rs index e6d2cce453b..0f46131ee81 100644 --- a/vortex-tensor/src/utils.rs +++ b/vortex-tensor/src/utils.rs @@ -10,11 +10,9 @@ use vortex_array::arrays::Constant; use vortex_array::arrays::ConstantArray; use vortex_array::arrays::FixedSizeListArray; use vortex_array::arrays::PrimitiveArray; -use vortex_array::arrays::ScalarFn; use vortex_array::arrays::fixed_size_list::FixedSizeListArrayExt; use vortex_array::arrays::primitive::PrimitiveArrayExt; use vortex_array::arrays::scalar_fn::ExactScalarFn; -use vortex_array::arrays::scalar_fn::ScalarFnArrayExt; use vortex_array::arrays::scalar_fn::ScalarFnArrayView; use vortex_array::dtype::DType; use vortex_array::dtype::NativePType; @@ -67,11 +65,7 @@ pub fn extract_l2_denorm_children(array: &ArrayRef) -> (ArrayRef, ArrayRef) { let sfn = array .as_opt::>() .vortex_expect("expected ScalarFnArray wrapping L2Denorm"); - ( - sfn.nth_child(0) - .vortex_expect("L2Denorm missing normalized array"), - sfn.nth_child(1).vortex_expect("L2Denorm missing norms"), - ) + (sfn.get_child(0).clone(), sfn.get_child(1).clone()) } /// Validates that `input_dtype` is a float-valued tensor-like extension dtype. @@ -273,9 +267,8 @@ impl BinaryTensorOpMetadata { pub(crate) fn encode_from_view( view: &ScalarFnArrayView, ) -> VortexResult> { - let scalar_fn_array = view.as_::(); - let lhs_dtype = Some(scalar_fn_array.child_at(0).dtype().try_into()?); - let rhs_dtype = Some(scalar_fn_array.child_at(1).dtype().try_into()?); + let lhs_dtype = Some(view.child_at(0).dtype().try_into()?); + let rhs_dtype = Some(view.child_at(1).dtype().try_into()?); Ok(Self { lhs_dtype, rhs_dtype,