diff --git a/encodings/alp/src/alp/array.rs b/encodings/alp/src/alp/array.rs index e70969085a6..f20082a5916 100644 --- a/encodings/alp/src/alp/array.rs +++ b/encodings/alp/src/alp/array.rs @@ -3,6 +3,7 @@ use std::fmt::Debug; use std::hash::Hash; +use std::ops::Range; use vortex_array::Array; use vortex_array::ArrayBufferVisitor; @@ -13,6 +14,7 @@ use vortex_array::ArrayRef; use vortex_array::Canonical; use vortex_array::DeserializeMetadata; use vortex_array::ExecutionCtx; +use vortex_array::IntoArray; use vortex_array::Precision; use vortex_array::ProstMetadata; use vortex_array::SerializeMetadata; @@ -191,6 +193,17 @@ impl VTable for ALPVTable { ctx, )?)) } + + fn slice(array: &Self::Array, range: Range) -> VortexResult> { + Ok(Some( + ALPArray::new( + array.encoded().slice(range.clone()), + array.exponents(), + array.patches().and_then(|p| p.slice(range)), + ) + .into_array(), + )) + } } #[derive(Clone, Debug)] diff --git a/encodings/alp/src/alp/ops.rs b/encodings/alp/src/alp/ops.rs index ebb2bd61023..2703f588fb1 100644 --- a/encodings/alp/src/alp/ops.rs +++ b/encodings/alp/src/alp/ops.rs @@ -1,11 +1,6 @@ // SPDX-License-Identifier: Apache-2.0 // SPDX-FileCopyrightText: Copyright the Vortex contributors -use std::ops::Range; - -use vortex_array::Array; -use vortex_array::ArrayRef; -use vortex_array::IntoArray; use vortex_array::vtable::OperationsVTable; use vortex_error::VortexExpect; use vortex_scalar::Scalar; @@ -16,15 +11,6 @@ use crate::ALPVTable; use crate::match_each_alp_float_ptype; impl OperationsVTable for ALPVTable { - fn slice(array: &ALPArray, range: Range) -> ArrayRef { - ALPArray::new( - array.encoded().slice(range.clone()), - array.exponents(), - array.patches().and_then(|p| p.slice(range)), - ) - .into_array() - } - fn scalar_at(array: &ALPArray, index: usize) -> Scalar { if let Some(patches) = array.patches() && let Some(patch) = patches.get_patched(index) diff --git a/encodings/alp/src/alp_rd/array.rs b/encodings/alp/src/alp_rd/array.rs index 408b2366ddd..75bb4fedd15 100644 --- a/encodings/alp/src/alp_rd/array.rs +++ b/encodings/alp/src/alp_rd/array.rs @@ -13,6 +13,7 @@ use vortex_array::ArrayHash; use vortex_array::ArrayRef; use vortex_array::Canonical; use vortex_array::DeserializeMetadata; +use vortex_array::IntoArray; use vortex_array::Precision; use vortex_array::ProstMetadata; use vortex_array::SerializeMetadata; @@ -83,6 +84,25 @@ impl VTable for ALPRDVTable { ArrayId::new_ref("vortex.alprd") } + fn slice(array: &Self::Array, range: std::ops::Range) -> VortexResult> { + let left_parts_exceptions = array + .left_parts_patches() + .and_then(|patches| patches.slice(range.clone())); + + // SAFETY: slicing components does not change the encoded values + Ok(Some(unsafe { + ALPRDArray::new_unchecked( + array.dtype().clone(), + array.left_parts().slice(range.clone()), + array.left_parts_dictionary().clone(), + array.right_parts().slice(range), + array.right_bit_width(), + left_parts_exceptions, + ) + .into_array() + })) + } + fn encoding(_array: &Self::Array) -> ArrayVTable { ALPRDVTable.as_vtable() } diff --git a/encodings/alp/src/alp_rd/ops.rs b/encodings/alp/src/alp_rd/ops.rs index b04d53c0197..91e2ae0a71a 100644 --- a/encodings/alp/src/alp_rd/ops.rs +++ b/encodings/alp/src/alp_rd/ops.rs @@ -1,11 +1,7 @@ // SPDX-License-Identifier: Apache-2.0 // SPDX-FileCopyrightText: Copyright the Vortex contributors -use std::ops::Range; - use vortex_array::Array; -use vortex_array::ArrayRef; -use vortex_array::IntoArray; use vortex_array::vtable::OperationsVTable; use vortex_error::VortexExpect; use vortex_scalar::Scalar; @@ -14,25 +10,6 @@ use crate::ALPRDArray; use crate::ALPRDVTable; impl OperationsVTable for ALPRDVTable { - fn slice(array: &ALPRDArray, range: Range) -> ArrayRef { - let left_parts_exceptions = array - .left_parts_patches() - .and_then(|patches| patches.slice(range.clone())); - - // SAFETY: slicing components does not change the encoded values - unsafe { - ALPRDArray::new_unchecked( - array.dtype().clone(), - array.left_parts().slice(range.clone()), - array.left_parts_dictionary().clone(), - array.right_parts().slice(range), - array.right_bit_width(), - left_parts_exceptions, - ) - .into_array() - } - } - fn scalar_at(array: &ALPRDArray, index: usize) -> Scalar { // The left value can either be a direct value, or an exception. // The exceptions array represents exception positions with non-null values. diff --git a/encodings/bytebool/src/array.rs b/encodings/bytebool/src/array.rs index aa09130874e..6de5e8677e5 100644 --- a/encodings/bytebool/src/array.rs +++ b/encodings/bytebool/src/array.rs @@ -117,6 +117,16 @@ impl VTable for ByteBoolVTable { Ok(()) } + + fn slice(array: &ByteBoolArray, range: Range) -> VortexResult> { + Ok(Some( + ByteBoolArray::new( + array.buffer().slice(range.clone()), + array.validity().slice(range), + ) + .into_array(), + )) + } } #[derive(Clone, Debug)] @@ -213,14 +223,6 @@ impl CanonicalVTable for ByteBoolVTable { } impl OperationsVTable for ByteBoolVTable { - fn slice(array: &ByteBoolArray, range: Range) -> ArrayRef { - ByteBoolArray::new( - array.buffer().slice(range.clone()), - array.validity().slice(range), - ) - .into_array() - } - fn scalar_at(array: &ByteBoolArray, index: usize) -> Scalar { Scalar::bool(array.buffer()[index] == 1, array.dtype().nullability()) } diff --git a/encodings/datetime-parts/src/array.rs b/encodings/datetime-parts/src/array.rs index 7cd732e1aa1..74809a87dff 100644 --- a/encodings/datetime-parts/src/array.rs +++ b/encodings/datetime-parts/src/array.rs @@ -3,6 +3,7 @@ use std::fmt::Debug; use std::hash::Hash; +use std::ops::Range; use vortex_array::Array; use vortex_array::ArrayBufferVisitor; @@ -12,6 +13,7 @@ use vortex_array::ArrayHash; use vortex_array::ArrayRef; use vortex_array::Canonical; use vortex_array::DeserializeMetadata; +use vortex_array::IntoArray; use vortex_array::Precision; use vortex_array::ProstMetadata; use vortex_array::SerializeMetadata; @@ -169,6 +171,19 @@ impl VTable for DateTimePartsVTable { ) -> VortexResult> { PARENT_RULES.evaluate(array, parent, child_idx) } + + fn slice(array: &Self::Array, range: Range) -> VortexResult> { + // SAFETY: slicing all components preserves values + Ok(Some(unsafe { + DateTimePartsArray::new_unchecked( + array.dtype().clone(), + array.days().slice(range.clone()), + array.seconds().slice(range.clone()), + array.subseconds().slice(range), + ) + .into_array() + })) + } } #[derive(Clone, Debug)] diff --git a/encodings/datetime-parts/src/ops.rs b/encodings/datetime-parts/src/ops.rs index 994d2458751..cd81925cbe1 100644 --- a/encodings/datetime-parts/src/ops.rs +++ b/encodings/datetime-parts/src/ops.rs @@ -1,11 +1,7 @@ // SPDX-License-Identifier: Apache-2.0 // SPDX-FileCopyrightText: Copyright the Vortex contributors -use std::ops::Range; - use vortex_array::Array; -use vortex_array::ArrayRef; -use vortex_array::IntoArray; use vortex_array::vtable::OperationsVTable; use vortex_dtype::DType; use vortex_dtype::datetime::TemporalMetadata; @@ -19,19 +15,6 @@ use crate::timestamp; use crate::timestamp::TimestampParts; impl OperationsVTable for DateTimePartsVTable { - fn slice(array: &DateTimePartsArray, range: Range) -> ArrayRef { - // SAFETY: slicing all components preserves values - unsafe { - DateTimePartsArray::new_unchecked( - array.dtype().clone(), - array.days().slice(range.clone()), - array.seconds().slice(range.clone()), - array.subseconds().slice(range), - ) - .into_array() - } - } - fn scalar_at(array: &DateTimePartsArray, index: usize) -> Scalar { let DType::Extension(ext) = array.dtype().clone() else { vortex_panic!( diff --git a/encodings/decimal-byte-parts/src/decimal_byte_parts/mod.rs b/encodings/decimal-byte-parts/src/decimal_byte_parts/mod.rs index 2232d9a86bc..f0b38ba3543 100644 --- a/encodings/decimal-byte-parts/src/decimal_byte_parts/mod.rs +++ b/encodings/decimal-byte-parts/src/decimal_byte_parts/mod.rs @@ -138,6 +138,14 @@ impl VTable for DecimalBytePartsVTable { ) -> VortexResult> { PARENT_RULES.evaluate(array, parent, child_idx) } + + fn slice(array: &Self::Array, range: Range) -> VortexResult> { + // SAFETY: slicing encoded MSP does not change the encoded values + Ok(Some(unsafe { + DecimalBytePartsArray::new_unchecked(array.msp.slice(range), *array.decimal_dtype()) + .into_array() + })) + } } /// This array encodes decimals as between 1-4 columns of primitive typed children. @@ -248,14 +256,6 @@ impl CanonicalVTable for DecimalBytePartsVTable { } impl OperationsVTable for DecimalBytePartsVTable { - fn slice(array: &DecimalBytePartsArray, range: Range) -> ArrayRef { - // SAFETY: slicing encoded MSP does not change the encoded values - unsafe { - DecimalBytePartsArray::new_unchecked(array.msp.slice(range), *array.decimal_dtype()) - .into_array() - } - } - fn scalar_at(array: &DecimalBytePartsArray, index: usize) -> Scalar { // TODO(joe): support parts len != 1 let scalar = array.msp.scalar_at(index); diff --git a/encodings/fastlanes/src/bitpacking/mod.rs b/encodings/fastlanes/src/bitpacking/mod.rs index d78b20be314..274a0642ce6 100644 --- a/encodings/fastlanes/src/bitpacking/mod.rs +++ b/encodings/fastlanes/src/bitpacking/mod.rs @@ -8,6 +8,7 @@ pub use array::bitpack_decompress; pub use array::unpack_iter; mod compute; +mod rules; mod vtable; pub use vtable::BitPackedVTable; diff --git a/encodings/fastlanes/src/bitpacking/rules.rs b/encodings/fastlanes/src/bitpacking/rules.rs new file mode 100644 index 00000000000..cbb811ebfae --- /dev/null +++ b/encodings/fastlanes/src/bitpacking/rules.rs @@ -0,0 +1,8 @@ +// SPDX-License-Identifier: Apache-2.0 +// SPDX-FileCopyrightText: Copyright the Vortex contributors + +use vortex_array::optimizer::rules::ParentRuleSet; + +use crate::BitPackedVTable; + +pub(super) const RULES: ParentRuleSet = ParentRuleSet::new(&[]); diff --git a/encodings/fastlanes/src/bitpacking/vtable/mod.rs b/encodings/fastlanes/src/bitpacking/vtable/mod.rs index e47a6d92f0d..835cd029fd2 100644 --- a/encodings/fastlanes/src/bitpacking/vtable/mod.rs +++ b/encodings/fastlanes/src/bitpacking/vtable/mod.rs @@ -1,10 +1,14 @@ // SPDX-License-Identifier: Apache-2.0 // SPDX-FileCopyrightText: Copyright the Vortex contributors +use std::cmp::max; +use std::ops::Range; + use vortex_array::ArrayRef; use vortex_array::Canonical; use vortex_array::DeserializeMetadata; use vortex_array::ExecutionCtx; +use vortex_array::IntoArray; use vortex_array::ProstMetadata; use vortex_array::SerializeMetadata; use vortex_array::buffer::BufferHandle; @@ -18,6 +22,7 @@ use vortex_array::vtable::ArrayVTable; use vortex_array::vtable::ArrayVTableExt; use vortex_array::vtable::NotSupported; use vortex_array::vtable::VTable; +use vortex_array::vtable::ValidityHelper; use vortex_array::vtable::ValidityVTableFromValidityHelper; use vortex_dtype::DType; use vortex_dtype::PType; @@ -28,6 +33,7 @@ use vortex_error::vortex_ensure; use vortex_error::vortex_err; use crate::BitPackedArray; +use crate::bitpacking::rules::RULES; use crate::bitpacking::vtable::kernels::filter::PARENT_KERNELS; mod array; @@ -251,6 +257,40 @@ impl VTable for BitPackedVTable { ) -> VortexResult> { PARENT_KERNELS.execute(array, parent, child_idx, ctx) } + + fn reduce_parent( + array: &Self::Array, + parent: &ArrayRef, + child_idx: usize, + ) -> VortexResult> { + RULES.evaluate(array, parent, child_idx) + } + + fn slice(array: &Self::Array, range: Range) -> VortexResult> { + let offset_start = range.start + array.offset() as usize; + let offset_stop = range.end + array.offset() as usize; + let offset = offset_start % 1024; + let block_start = max(0, offset_start - offset); + let block_stop = offset_stop.div_ceil(1024) * 1024; + + let encoded_start = (block_start / 8) * array.bit_width() as usize; + let encoded_stop = (block_stop / 8) * array.bit_width() as usize; + + // slice the buffer using the encoded start/stop values + // SAFETY: slicing packed values without decoding preserves invariants + Ok(Some(unsafe { + BitPackedArray::new_unchecked( + array.packed().slice(encoded_start..encoded_stop), + array.dtype.clone(), + array.validity().slice(range.clone()), + array.patches().and_then(|p| p.slice(range.clone())), + array.bit_width(), + range.len(), + offset as u16, + ) + .into_array() + })) + } } #[derive(Debug)] diff --git a/encodings/fastlanes/src/bitpacking/vtable/operations.rs b/encodings/fastlanes/src/bitpacking/vtable/operations.rs index ad883eb5dc7..b3ec21345f7 100644 --- a/encodings/fastlanes/src/bitpacking/vtable/operations.rs +++ b/encodings/fastlanes/src/bitpacking/vtable/operations.rs @@ -1,13 +1,7 @@ // SPDX-License-Identifier: Apache-2.0 // SPDX-FileCopyrightText: Copyright the Vortex contributors -use std::cmp::max; -use std::ops::Range; - -use vortex_array::ArrayRef; -use vortex_array::IntoArray; use vortex_array::vtable::OperationsVTable; -use vortex_array::vtable::ValidityHelper; use vortex_scalar::Scalar; use crate::BitPackedArray; @@ -15,32 +9,6 @@ use crate::BitPackedVTable; use crate::bitpack_decompress; impl OperationsVTable for BitPackedVTable { - fn slice(array: &BitPackedArray, range: Range) -> ArrayRef { - let offset_start = range.start + array.offset() as usize; - let offset_stop = range.end + array.offset() as usize; - let offset = offset_start % 1024; - let block_start = max(0, offset_start - offset); - let block_stop = offset_stop.div_ceil(1024) * 1024; - - let encoded_start = (block_start / 8) * array.bit_width() as usize; - let encoded_stop = (block_stop / 8) * array.bit_width() as usize; - - // slice the buffer using the encoded start/stop values - // SAFETY: slicing packed values without decoding preserves invariants - unsafe { - BitPackedArray::new_unchecked( - array.packed().slice(encoded_start..encoded_stop), - array.dtype.clone(), - array.validity().slice(range.clone()), - array.patches().and_then(|p| p.slice(range.clone())), - array.bit_width(), - range.len(), - offset as u16, - ) - .into_array() - } - } - fn scalar_at(array: &BitPackedArray, index: usize) -> Scalar { if let Some(patches) = array.patches() && let Some(patch) = patches.get_patched(index) diff --git a/encodings/fastlanes/src/delta/vtable/mod.rs b/encodings/fastlanes/src/delta/vtable/mod.rs index 502a127886b..9b6987378f1 100644 --- a/encodings/fastlanes/src/delta/vtable/mod.rs +++ b/encodings/fastlanes/src/delta/vtable/mod.rs @@ -1,9 +1,13 @@ // SPDX-License-Identifier: Apache-2.0 // SPDX-FileCopyrightText: Copyright the Vortex contributors +use std::cmp::min; +use std::ops::Range; + use fastlanes::FastLanes; use prost::Message; use vortex_array::ArrayRef; +use vortex_array::IntoArray; use vortex_array::ProstMetadata; use vortex_array::buffer::BufferHandle; use vortex_array::serde::ArrayChildren; @@ -61,6 +65,32 @@ impl VTable for DeltaVTable { DeltaVTable.as_vtable() } + fn slice(array: &Self::Array, range: Range) -> VortexResult> { + let physical_start = range.start + array.offset(); + let physical_stop = range.end + array.offset(); + + let start_chunk = physical_start / 1024; + let stop_chunk = physical_stop.div_ceil(1024); + + let bases = array.bases(); + let deltas = array.deltas(); + let lanes = array.lanes(); + + let new_bases = bases.slice( + min(start_chunk * lanes, array.bases_len())..min(stop_chunk * lanes, array.bases_len()), + ); + + let new_deltas = deltas.slice( + min(start_chunk * 1024, array.deltas_len())..min(stop_chunk * 1024, array.deltas_len()), + ); + + // SAFETY: slicing valid bases/deltas preserves correctness + Ok(Some(unsafe { + DeltaArray::new_unchecked(new_bases, new_deltas, physical_start % 1024, range.len()) + .into_array() + })) + } + fn with_children(array: &mut Self::Array, children: Vec) -> VortexResult<()> { // DeltaArray children order (from visit_children): // 1. bases diff --git a/encodings/fastlanes/src/delta/vtable/operations.rs b/encodings/fastlanes/src/delta/vtable/operations.rs index d8a8014fff4..96947350fc0 100644 --- a/encodings/fastlanes/src/delta/vtable/operations.rs +++ b/encodings/fastlanes/src/delta/vtable/operations.rs @@ -1,12 +1,6 @@ // SPDX-License-Identifier: Apache-2.0 // SPDX-FileCopyrightText: Copyright the Vortex contributors -use std::cmp::min; -use std::ops::Range; - -use vortex_array::Array; -use vortex_array::ArrayRef; -use vortex_array::IntoArray; use vortex_array::ToCanonical; use vortex_array::vtable::OperationsVTable; use vortex_scalar::Scalar; @@ -15,32 +9,6 @@ use super::DeltaVTable; use crate::DeltaArray; impl OperationsVTable for DeltaVTable { - fn slice(array: &DeltaArray, range: Range) -> ArrayRef { - let physical_start = range.start + array.offset(); - let physical_stop = range.end + array.offset(); - - let start_chunk = physical_start / 1024; - let stop_chunk = physical_stop.div_ceil(1024); - - let bases = array.bases(); - let deltas = array.deltas(); - let lanes = array.lanes(); - - let new_bases = bases.slice( - min(start_chunk * lanes, array.bases_len())..min(stop_chunk * lanes, array.bases_len()), - ); - - let new_deltas = deltas.slice( - min(start_chunk * 1024, array.deltas_len())..min(stop_chunk * 1024, array.deltas_len()), - ); - - // SAFETY: slicing valid bases/deltas preserves correctness - unsafe { - DeltaArray::new_unchecked(new_bases, new_deltas, physical_start % 1024, range.len()) - .into_array() - } - } - fn scalar_at(array: &DeltaArray, index: usize) -> Scalar { let decompressed = array.slice(index..index + 1).to_primitive(); decompressed.scalar_at(0) @@ -56,7 +24,6 @@ mod tests { use vortex_array::compute::conformance::binary_numeric::test_binary_numeric_array; use vortex_array::compute::conformance::consistency::test_array_consistency; - use super::*; use crate::DeltaArray; #[test] diff --git a/encodings/fastlanes/src/for/vtable/mod.rs b/encodings/fastlanes/src/for/vtable/mod.rs index 07fc74d3d36..2705b8b9a2c 100644 --- a/encodings/fastlanes/src/for/vtable/mod.rs +++ b/encodings/fastlanes/src/for/vtable/mod.rs @@ -3,9 +3,11 @@ use std::fmt::Debug; use std::fmt::Formatter; +use std::ops::Range; use vortex_array::ArrayRef; use vortex_array::DeserializeMetadata; +use vortex_array::IntoArray; use vortex_array::SerializeMetadata; use vortex_array::buffer::BufferHandle; use vortex_array::serde::ArrayChildren; @@ -114,6 +116,17 @@ impl VTable for FoRVTable { ) -> VortexResult> { PARENT_RULES.evaluate(array, parent, child_idx) } + + fn slice(array: &Self::Array, range: Range) -> VortexResult> { + // SAFETY: Just slicing encoded data does not affect FOR. + Ok(Some(unsafe { + FoRArray::new_unchecked( + array.encoded().slice(range), + array.reference_scalar().clone(), + ) + .into_array() + })) + } } #[derive(Debug)] diff --git a/encodings/fastlanes/src/for/vtable/operations.rs b/encodings/fastlanes/src/for/vtable/operations.rs index 5eade29935b..2dff803197d 100644 --- a/encodings/fastlanes/src/for/vtable/operations.rs +++ b/encodings/fastlanes/src/for/vtable/operations.rs @@ -1,10 +1,6 @@ // SPDX-License-Identifier: Apache-2.0 // SPDX-FileCopyrightText: Copyright the Vortex contributors -use std::ops::Range; - -use vortex_array::ArrayRef; -use vortex_array::IntoArray; use vortex_array::vtable::OperationsVTable; use vortex_dtype::match_each_integer_ptype; use vortex_error::VortexExpect; @@ -14,17 +10,6 @@ use super::FoRVTable; use crate::FoRArray; impl OperationsVTable for FoRVTable { - fn slice(array: &FoRArray, range: Range) -> ArrayRef { - // SAFETY: Just slicing encoded data does not affect FOR. - unsafe { - FoRArray::new_unchecked( - array.encoded().slice(range), - array.reference_scalar().clone(), - ) - .into_array() - } - } - fn scalar_at(array: &FoRArray, index: usize) -> Scalar { let encoded_pvalue = array.encoded().scalar_at(index); let encoded_pvalue = encoded_pvalue.as_primitive(); diff --git a/encodings/fastlanes/src/rle/array/mod.rs b/encodings/fastlanes/src/rle/array/mod.rs index 468e3e02983..ce639b96996 100644 --- a/encodings/fastlanes/src/rle/array/mod.rs +++ b/encodings/fastlanes/src/rle/array/mod.rs @@ -313,7 +313,8 @@ mod tests { ) .unwrap(); - let valid_slice = rle_array.slice(0..3); + let valid_slice = rle_array.slice(0..3).to_primitive(); + // TODO(joe): replace with compute null count assert!(valid_slice.all_valid()); let mixed_slice = rle_array.slice(1..5); @@ -349,7 +350,8 @@ mod tests { ) .unwrap(); - let invalid_slice = rle_array.slice(2..5); + // TODO(joe): replace with compute null count + let invalid_slice = rle_array.slice(2..5).to_canonical().into_primitive(); assert!(invalid_slice.all_invalid()); let mixed_slice = rle_array.slice(1..4); diff --git a/encodings/fastlanes/src/rle/kernel.rs b/encodings/fastlanes/src/rle/kernel.rs new file mode 100644 index 00000000000..d8d10bb0fd2 --- /dev/null +++ b/encodings/fastlanes/src/rle/kernel.rs @@ -0,0 +1,8 @@ +// SPDX-License-Identifier: Apache-2.0 +// SPDX-FileCopyrightText: Copyright the Vortex contributors + +use vortex_array::kernel::ParentKernelSet; + +use crate::RLEVTable; + +pub(super) const PARENT_KERNELS: ParentKernelSet = ParentKernelSet::new(&[]); diff --git a/encodings/fastlanes/src/rle/mod.rs b/encodings/fastlanes/src/rle/mod.rs index 0b53880a5d8..f3400fcfe21 100644 --- a/encodings/fastlanes/src/rle/mod.rs +++ b/encodings/fastlanes/src/rle/mod.rs @@ -5,6 +5,7 @@ mod array; pub use array::RLEArray; mod compute; +mod kernel; mod vtable; pub use vtable::RLEVTable; diff --git a/encodings/fastlanes/src/rle/vtable/mod.rs b/encodings/fastlanes/src/rle/vtable/mod.rs index 0aba9335fa9..6ab2180e9b2 100644 --- a/encodings/fastlanes/src/rle/vtable/mod.rs +++ b/encodings/fastlanes/src/rle/vtable/mod.rs @@ -3,6 +3,8 @@ use prost::Message; use vortex_array::ArrayRef; +use vortex_array::Canonical; +use vortex_array::ExecutionCtx; use vortex_array::ProstMetadata; use vortex_array::buffer::BufferHandle; use vortex_array::serde::ArrayChildren; @@ -20,11 +22,13 @@ use vortex_error::VortexResult; use vortex_error::vortex_ensure; use crate::RLEArray; +use crate::rle::kernel::PARENT_KERNELS; mod array; mod canonical; mod encode; mod operations; +mod rules; mod validity; mod visitor; @@ -67,6 +71,47 @@ impl VTable for RLEVTable { RLEVTable.as_vtable() } + fn slice(array: &Self::Array, range: std::ops::Range) -> VortexResult> { + use vortex_array::IntoArray; + + use crate::FL_CHUNK_SIZE; + + let offset_in_chunk = array.offset(); + let chunk_start_idx = (offset_in_chunk + range.start) / FL_CHUNK_SIZE; + let chunk_end_idx = (offset_in_chunk + range.end).div_ceil(FL_CHUNK_SIZE); + + let values_start_idx = array.values_idx_offset(chunk_start_idx); + let values_end_idx = if chunk_end_idx < array.values_idx_offsets().len() { + array.values_idx_offset(chunk_end_idx) + } else { + array.values().len() + }; + + let sliced_values = array.values().slice(values_start_idx..values_end_idx); + + let sliced_values_idx_offsets = array + .values_idx_offsets() + .slice(chunk_start_idx..chunk_end_idx); + + let sliced_indices = array + .indices() + .slice(chunk_start_idx * FL_CHUNK_SIZE..chunk_end_idx * FL_CHUNK_SIZE); + + // SAFETY: Slicing preserves all invariants. + Ok(Some(unsafe { + RLEArray::new_unchecked( + sliced_values, + sliced_indices, + sliced_values_idx_offsets, + array.dtype().clone(), + // Keep the offset relative to the first chunk. + (array.offset() + range.start) % FL_CHUNK_SIZE, + range.len(), + ) + .into_array() + })) + } + fn with_children(array: &mut Self::Array, children: Vec) -> VortexResult<()> { // RLEArray children order (from visit_children): // 1. values @@ -143,6 +188,23 @@ impl VTable for RLEVTable { len, ) } + + fn execute_parent( + array: &Self::Array, + parent: &ArrayRef, + child_idx: usize, + ctx: &mut ExecutionCtx, + ) -> VortexResult> { + PARENT_KERNELS.execute(array, parent, child_idx, ctx) + } + + fn reduce_parent( + array: &RLEArray, + parent: &ArrayRef, + child_idx: usize, + ) -> VortexResult> { + rules::RULES.evaluate(array, parent, child_idx) + } } #[derive(Debug)] diff --git a/encodings/fastlanes/src/rle/vtable/operations.rs b/encodings/fastlanes/src/rle/vtable/operations.rs index 1ea116887d7..4a5797df683 100644 --- a/encodings/fastlanes/src/rle/vtable/operations.rs +++ b/encodings/fastlanes/src/rle/vtable/operations.rs @@ -1,10 +1,6 @@ // SPDX-License-Identifier: Apache-2.0 // SPDX-FileCopyrightText: Copyright the Vortex contributors -use std::ops::Range; - -use vortex_array::ArrayRef; -use vortex_array::IntoArray; use vortex_array::vtable::OperationsVTable; use vortex_error::VortexExpect; use vortex_scalar::Scalar; @@ -14,43 +10,6 @@ use crate::FL_CHUNK_SIZE; use crate::RLEArray; impl OperationsVTable for RLEVTable { - fn slice(array: &RLEArray, range: Range) -> ArrayRef { - let offset_in_chunk = array.offset(); - let chunk_start_idx = (offset_in_chunk + range.start) / FL_CHUNK_SIZE; - let chunk_end_idx = (offset_in_chunk + range.end).div_ceil(FL_CHUNK_SIZE); - - let values_start_idx = array.values_idx_offset(chunk_start_idx); - let values_end_idx = if chunk_end_idx < array.values_idx_offsets().len() { - array.values_idx_offset(chunk_end_idx) - } else { - array.values().len() - }; - - let sliced_values = array.values().slice(values_start_idx..values_end_idx); - - let sliced_values_idx_offsets = array - .values_idx_offsets() - .slice(chunk_start_idx..chunk_end_idx); - - let sliced_indices = array - .indices() - .slice(chunk_start_idx * FL_CHUNK_SIZE..chunk_end_idx * FL_CHUNK_SIZE); - - // SAFETY: Slicing preserves all invariants. - unsafe { - RLEArray::new_unchecked( - sliced_values, - sliced_indices, - sliced_values_idx_offsets, - array.dtype().clone(), - // Keep the offset relative to the first chunk. - (array.offset() + range.start) % FL_CHUNK_SIZE, - range.len(), - ) - .into_array() - } - } - fn scalar_at(array: &RLEArray, index: usize) -> Scalar { let offset_in_chunk = array.offset(); let chunk_relative_idx = array.indices().scalar_at(offset_in_chunk + index); diff --git a/encodings/fastlanes/src/rle/vtable/rules.rs b/encodings/fastlanes/src/rle/vtable/rules.rs new file mode 100644 index 00000000000..dbe1b3d6c7c --- /dev/null +++ b/encodings/fastlanes/src/rle/vtable/rules.rs @@ -0,0 +1,8 @@ +// SPDX-License-Identifier: Apache-2.0 +// SPDX-FileCopyrightText: Copyright the Vortex contributors + +use vortex_array::optimizer::rules::ParentRuleSet; + +use crate::RLEVTable; + +pub(super) const RULES: ParentRuleSet = ParentRuleSet::new(&[]); diff --git a/encodings/fsst/src/array.rs b/encodings/fsst/src/array.rs index 78973336fef..96557fd7e17 100644 --- a/encodings/fsst/src/array.rs +++ b/encodings/fsst/src/array.rs @@ -4,6 +4,7 @@ use std::fmt::Debug; use std::fmt::Formatter; use std::hash::Hash; +use std::ops::Range; use std::sync::Arc; use std::sync::LazyLock; @@ -19,6 +20,7 @@ use vortex_array::ArrayRef; use vortex_array::Canonical; use vortex_array::DeserializeMetadata; use vortex_array::ExecutionCtx; +use vortex_array::IntoArray; use vortex_array::Precision; use vortex_array::ProstMetadata; use vortex_array::SerializeMetadata; @@ -189,6 +191,26 @@ impl VTable for FSSTVTable { ) -> VortexResult> { PARENT_KERNELS.execute(array, parent, child_idx, ctx) } + + fn slice(array: &Self::Array, range: Range) -> VortexResult> { + // SAFETY: slicing the `codes` leaves the symbol table intact + Ok(Some( + unsafe { + FSSTArray::new_unchecked( + array.dtype().clone(), + array.symbols().clone(), + array.symbol_lengths().clone(), + array + .codes() + .slice(range.clone()) + .as_::() + .clone(), + array.uncompressed_lengths().slice(range), + ) + } + .into_array(), + )) + } } #[derive(Clone)] diff --git a/encodings/fsst/src/ops.rs b/encodings/fsst/src/ops.rs index b348703e57f..570b15253b5 100644 --- a/encodings/fsst/src/ops.rs +++ b/encodings/fsst/src/ops.rs @@ -1,12 +1,6 @@ // SPDX-License-Identifier: Apache-2.0 // SPDX-FileCopyrightText: Copyright the Vortex contributors -use std::ops::Range; - -use vortex_array::Array; -use vortex_array::ArrayRef; -use vortex_array::IntoArray; -use vortex_array::arrays::VarBinVTable; use vortex_array::arrays::varbin_scalar; use vortex_array::vtable::OperationsVTable; use vortex_buffer::ByteBuffer; @@ -17,24 +11,6 @@ use crate::FSSTArray; use crate::FSSTVTable; impl OperationsVTable for FSSTVTable { - fn slice(array: &FSSTArray, range: Range) -> ArrayRef { - // SAFETY: slicing the `codes` leaves the symbol table intact - unsafe { - FSSTArray::new_unchecked( - array.dtype().clone(), - array.symbols().clone(), - array.symbol_lengths().clone(), - array - .codes() - .slice(range.clone()) - .as_::() - .clone(), - array.uncompressed_lengths().slice(range), - ) - .into_array() - } - } - fn scalar_at(array: &FSSTArray, index: usize) -> Scalar { let compressed = array.codes().scalar_at(index); let binary_datum = compressed.as_binary().value().vortex_expect("non-null"); diff --git a/encodings/pco/src/array.rs b/encodings/pco/src/array.rs index 399f6a6fe0d..79bf7a6e0db 100644 --- a/encodings/pco/src/array.rs +++ b/encodings/pco/src/array.rs @@ -182,6 +182,10 @@ impl VTable for PcoVTable { Ok(()) } + + fn slice(array: &Self::Array, range: Range) -> VortexResult> { + Ok(Some(array._slice(range.start, range.end).into_array())) + } } pub(crate) fn number_type_from_dtype(dtype: &DType) -> NumberType { @@ -530,10 +534,6 @@ impl CanonicalVTable for PcoVTable { } impl OperationsVTable for PcoVTable { - fn slice(array: &PcoArray, range: Range) -> ArrayRef { - array._slice(range.start, range.end).into_array() - } - fn scalar_at(array: &PcoArray, index: usize) -> Scalar { array._slice(index, index + 1).decompress().scalar_at(0) } diff --git a/encodings/runend/src/array.rs b/encodings/runend/src/array.rs index 37546d04c46..38cf93d1a04 100644 --- a/encodings/runend/src/array.rs +++ b/encodings/runend/src/array.rs @@ -3,6 +3,7 @@ use std::fmt::Debug; use std::hash::Hash; +use std::ops::Range; use vortex_array::Array; use vortex_array::ArrayEq; @@ -10,11 +11,13 @@ use vortex_array::ArrayHash; use vortex_array::ArrayRef; use vortex_array::Canonical; use vortex_array::DeserializeMetadata; +use vortex_array::ExecutionCtx; use vortex_array::IntoArray; use vortex_array::Precision; use vortex_array::ProstMetadata; use vortex_array::SerializeMetadata; use vortex_array::ToCanonical; +use vortex_array::arrays::ConstantArray; use vortex_array::arrays::PrimitiveVTable; use vortex_array::buffer::BufferHandle; use vortex_array::search_sorted::SearchSorted; @@ -46,6 +49,7 @@ use vortex_scalar::PValue; use crate::compress::runend_decode_bools; use crate::compress::runend_decode_primitive; use crate::compress::runend_encode; +use crate::kernel::PARENT_KERNELS; use crate::rules::RULES; vtable!(RunEnd); @@ -81,6 +85,30 @@ impl VTable for RunEndVTable { RunEndVTable.as_vtable() } + fn slice(array: &Self::Array, range: Range) -> VortexResult> { + let new_length = range.len(); + + let slice_begin = array.find_physical_index(range.start); + let slice_end = crate::ops::find_slice_end_index(array.ends(), range.end + array.offset()); + + // If the sliced range contains only a single run, opt to return a ConstantArray. + if slice_begin + 1 == slice_end { + let value = array.values().scalar_at(slice_begin); + return Ok(Some(ConstantArray::new(value, new_length).into_array())); + } + + // SAFETY: we maintain the ends invariant in our slice implementation + Ok(Some(unsafe { + RunEndArray::new_unchecked( + array.ends().slice(slice_begin..slice_end), + array.values().slice(slice_begin..slice_end), + range.start + array.offset(), + new_length, + ) + .into_array() + })) + } + fn metadata(array: &RunEndArray) -> VortexResult { Ok(ProstMetadata(RunEndMetadata { ends_ptype: PType::try_from(array.ends().dtype()).vortex_expect("Must be a valid PType") @@ -142,6 +170,15 @@ impl VTable for RunEndVTable { ) -> VortexResult> { RULES.evaluate(array, parent, child_idx) } + + fn execute_parent( + array: &Self::Array, + parent: &ArrayRef, + child_idx: usize, + ctx: &mut ExecutionCtx, + ) -> VortexResult> { + PARENT_KERNELS.execute(array, parent, child_idx, ctx) + } } #[derive(Clone, Debug)] diff --git a/encodings/runend/src/compute/filter.rs b/encodings/runend/src/compute/filter.rs index 191adfddfa8..aee21762d73 100644 --- a/encodings/runend/src/compute/filter.rs +++ b/encodings/runend/src/compute/filter.rs @@ -141,10 +141,12 @@ fn filter_run_end_primitive + AsPrimitiv #[cfg(test)] mod tests { + use vortex_array::Array; use vortex_array::IntoArray; use vortex_array::ToCanonical; use vortex_array::arrays::PrimitiveArray; use vortex_array::assert_arrays_eq; + use vortex_error::VortexResult; use vortex_mask::Mask; use super::filter_run_end; @@ -181,22 +183,17 @@ mod tests { } #[test] - fn filter_sliced_run_end() { + fn filter_sliced_run_end() -> VortexResult<()> { let arr = ree_array().slice(2..7); - let filtered = filter_run_end( - arr.as_::(), - &Mask::from_iter([true, false, false, true, true]), - ) - .unwrap(); - let filtered_run_end = filtered.as_::(); + let filtered = arr.filter(Mask::from_iter([true, false, false, true, true]))?; assert_arrays_eq!( - filtered_run_end.ends().to_primitive(), - PrimitiveArray::from_iter([1u8, 2, 3]) - ); - assert_arrays_eq!( - filtered_run_end.values().to_primitive(), - PrimitiveArray::from_iter([1i32, 4, 2]) + filtered, + RunEndArray::new( + PrimitiveArray::from_iter([1u8, 2, 3]).into_array(), + PrimitiveArray::from_iter([1i32, 4, 2]).into_array() + ) ); + Ok(()) } } diff --git a/encodings/runend/src/kernel.rs b/encodings/runend/src/kernel.rs new file mode 100644 index 00000000000..e1ba34b2b76 --- /dev/null +++ b/encodings/runend/src/kernel.rs @@ -0,0 +1,8 @@ +// SPDX-License-Identifier: Apache-2.0 +// SPDX-FileCopyrightText: Copyright the Vortex contributors + +use vortex_array::kernel::ParentKernelSet; + +use crate::RunEndVTable; + +pub(super) const PARENT_KERNELS: ParentKernelSet = ParentKernelSet::new(&[]); diff --git a/encodings/runend/src/lib.rs b/encodings/runend/src/lib.rs index 7dbc2727a07..9a14bb0e1dd 100644 --- a/encodings/runend/src/lib.rs +++ b/encodings/runend/src/lib.rs @@ -10,6 +10,7 @@ mod arrow; pub mod compress; mod compute; mod iter; +mod kernel; mod ops; mod rules; diff --git a/encodings/runend/src/ops.rs b/encodings/runend/src/ops.rs index b1e37f3a973..6a15906275c 100644 --- a/encodings/runend/src/ops.rs +++ b/encodings/runend/src/ops.rs @@ -1,12 +1,7 @@ // SPDX-License-Identifier: Apache-2.0 // SPDX-FileCopyrightText: Copyright the Vortex contributors -use std::ops::Range; - use vortex_array::Array; -use vortex_array::ArrayRef; -use vortex_array::IntoArray; -use vortex_array::arrays::ConstantArray; use vortex_array::search_sorted::SearchResult; use vortex_array::search_sorted::SearchSorted; use vortex_array::search_sorted::SearchSortedSide; @@ -18,30 +13,6 @@ use crate::RunEndArray; use crate::RunEndVTable; impl OperationsVTable for RunEndVTable { - fn slice(array: &RunEndArray, range: Range) -> ArrayRef { - let new_length = range.len(); - - let slice_begin = array.find_physical_index(range.start); - let slice_end = find_slice_end_index(array.ends(), range.end + array.offset()); - - // If the sliced range contains only a single run, opt to return a ConstantArray. - if slice_begin + 1 == slice_end { - let value = array.values().scalar_at(slice_begin); - return ConstantArray::new(value, new_length).into_array(); - } - - // SAFETY: we maintain the ends invariant in our slice implementation - unsafe { - RunEndArray::new_unchecked( - array.ends().slice(slice_begin..slice_end), - array.values().slice(slice_begin..slice_end), - range.start + array.offset(), - new_length, - ) - .into_array() - } - } - fn scalar_at(array: &RunEndArray, index: usize) -> Scalar { array.values().scalar_at(array.find_physical_index(index)) } @@ -74,6 +45,7 @@ mod tests { use vortex_array::IntoArray; use vortex_array::arrays::PrimitiveArray; use vortex_array::assert_arrays_eq; + use vortex_array::compute::Cost; use vortex_buffer::buffer; use vortex_dtype::DType; use vortex_dtype::Nullability; @@ -159,7 +131,7 @@ mod tests { let sliced_array = re_array.slice(2..5); - assert!(sliced_array.is_constant()) + assert!(sliced_array.is_constant_opts(Cost::Canonicalize)) } #[test] diff --git a/encodings/sequence/src/array.rs b/encodings/sequence/src/array.rs index cdc04f060ad..6a94bdb716b 100644 --- a/encodings/sequence/src/array.rs +++ b/encodings/sequence/src/array.rs @@ -314,6 +314,27 @@ impl VTable for SequenceVTable { } .map(|a| a.map(|a| a.into_array(array.dtype()))) } + + fn reduce_parent( + _array: &SequenceArray, + _parent: &ArrayRef, + _child_idx: usize, + ) -> VortexResult> { + Ok(None) + } + + fn slice(array: &Self::Array, range: Range) -> VortexResult> { + Ok(Some( + SequenceArray::unchecked_new( + array.index_value(range.start), + array.multiplier, + array.ptype(), + array.dtype().nullability(), + range.len(), + ) + .to_array(), + )) + } } fn execute_iter>( @@ -382,17 +403,6 @@ impl CanonicalVTable for SequenceVTable { } impl OperationsVTable for SequenceVTable { - fn slice(array: &SequenceArray, range: Range) -> ArrayRef { - SequenceArray::unchecked_new( - array.index_value(range.start), - array.multiplier, - array.ptype(), - array.dtype().nullability(), - range.len(), - ) - .to_array() - } - fn scalar_at(array: &SequenceArray, index: usize) -> Scalar { Scalar::new( array.dtype().clone(), diff --git a/encodings/sparse/src/lib.rs b/encodings/sparse/src/lib.rs index 88337677f50..a9e53e3aec3 100644 --- a/encodings/sparse/src/lib.rs +++ b/encodings/sparse/src/lib.rs @@ -3,6 +3,7 @@ use std::fmt::Debug; use std::hash::Hash; +use std::ops::Range; use itertools::Itertools as _; use num_traits::AsPrimitive; @@ -164,6 +165,28 @@ impl VTable for SparseVTable { Ok(()) } + + fn slice(array: &SparseArray, range: Range) -> VortexResult> { + let new_patches = array.patches().slice(range.clone()); + + let Some(new_patches) = new_patches else { + return Ok(Some( + ConstantArray::new(array.fill_scalar().clone(), range.len()).into_array(), + )); + }; + + // If the number of values in the sparse array matches the array length, then all + // values are in fact patches, since patches are sorted this is the correct values. + if new_patches.array_len() == new_patches.values().len() { + return Ok(Some(new_patches.into_values())); + } + + // SAFETY: + Ok(Some( + unsafe { SparseArray::new_unchecked(new_patches, array.fill_scalar().clone()) } + .into_array(), + )) + } } #[derive(Clone, Debug)] diff --git a/encodings/sparse/src/ops.rs b/encodings/sparse/src/ops.rs index b1184bad4ed..dccafedc1ff 100644 --- a/encodings/sparse/src/ops.rs +++ b/encodings/sparse/src/ops.rs @@ -1,12 +1,6 @@ // SPDX-License-Identifier: Apache-2.0 // SPDX-FileCopyrightText: Copyright the Vortex contributors -use std::ops::Range; - -use vortex_array::Array; -use vortex_array::ArrayRef; -use vortex_array::IntoArray; -use vortex_array::arrays::ConstantArray; use vortex_array::vtable::OperationsVTable; use vortex_scalar::Scalar; @@ -14,23 +8,6 @@ use crate::SparseArray; use crate::SparseVTable; impl OperationsVTable for SparseVTable { - fn slice(array: &SparseArray, range: Range) -> ArrayRef { - let new_patches = array.patches().slice(range.clone()); - - let Some(new_patches) = new_patches else { - return ConstantArray::new(array.fill_scalar().clone(), range.len()).into_array(); - }; - - // If the number of values in the sparse array matches the array length, then all - // values are in fact patches, since patches are sorted this is the correct values. - if new_patches.array_len() == new_patches.values().len() { - return new_patches.into_values(); - } - - // SAFETY: - unsafe { SparseArray::new_unchecked(new_patches, array.fill_scalar().clone()).into_array() } - } - fn scalar_at(array: &SparseArray, index: usize) -> Scalar { array .patches() diff --git a/encodings/zigzag/src/array.rs b/encodings/zigzag/src/array.rs index 83d15c5d7c8..8c2e7066625 100644 --- a/encodings/zigzag/src/array.rs +++ b/encodings/zigzag/src/array.rs @@ -109,6 +109,12 @@ impl VTable for ZigZagVTable { array.encoded = children.into_iter().next().vortex_expect("checked"); Ok(()) } + + fn slice(array: &Self::Array, range: Range) -> VortexResult> { + Ok(Some( + ZigZagArray::new(array.encoded().slice(range)).into_array(), + )) + } } #[derive(Clone, Debug)] @@ -181,10 +187,6 @@ impl CanonicalVTable for ZigZagVTable { } impl OperationsVTable for ZigZagVTable { - fn slice(array: &ZigZagArray, range: Range) -> ArrayRef { - ZigZagArray::new(array.encoded().slice(range)).into_array() - } - fn scalar_at(array: &ZigZagArray, index: usize) -> Scalar { let scalar = array.encoded().scalar_at(index); if scalar.is_null() { diff --git a/encodings/zstd/src/array.rs b/encodings/zstd/src/array.rs index 88a9571775d..38e8d2cadcc 100644 --- a/encodings/zstd/src/array.rs +++ b/encodings/zstd/src/array.rs @@ -181,6 +181,10 @@ impl VTable for ZstdVTable { Ok(()) } + + fn slice(array: &Self::Array, range: Range) -> VortexResult> { + Ok(Some(array._slice(range.start, range.end).into_array())) + } } #[derive(Debug)] @@ -777,10 +781,6 @@ impl CanonicalVTable for ZstdVTable { } impl OperationsVTable for ZstdVTable { - fn slice(array: &ZstdArray, range: Range) -> ArrayRef { - array._slice(range.start, range.end).into_array() - } - fn scalar_at(array: &ZstdArray, index: usize) -> Scalar { array._slice(index, index + 1).decompress().scalar_at(0) } diff --git a/vortex-array/src/array/mod.rs b/vortex-array/src/array/mod.rs index 3188f9c9e1b..38bac982b48 100644 --- a/vortex-array/src/array/mod.rs +++ b/vortex-array/src/array/mod.rs @@ -39,6 +39,7 @@ use crate::arrays::FixedSizeListVTable; use crate::arrays::ListViewVTable; use crate::arrays::NullVTable; use crate::arrays::PrimitiveVTable; +use crate::arrays::SliceArray; use crate::arrays::StructVTable; use crate::arrays::VarBinVTable; use crate::arrays::VarBinViewVTable; @@ -499,7 +500,10 @@ impl Array for ArrayAdapter { return Canonical::empty(self.dtype()).into_array(); } - let sliced = >::slice(&self.0, range); + let sliced = SliceArray::new(self.0.to_array(), range) + .into_array() + .optimize() + .vortex_expect("cannot fail for now"); assert_eq!( sliced.len(), diff --git a/vortex-array/src/arrays/bool/vtable/mod.rs b/vortex-array/src/arrays/bool/vtable/mod.rs index b29dad2e7da..eb5226c3757 100644 --- a/vortex-array/src/arrays/bool/vtable/mod.rs +++ b/vortex-array/src/arrays/bool/vtable/mod.rs @@ -1,6 +1,8 @@ // SPDX-License-Identifier: Apache-2.0 // SPDX-FileCopyrightText: Copyright the Vortex contributors +use std::ops::Range; + use vortex_buffer::BitBuffer; use vortex_dtype::DType; use vortex_error::VortexExpect; @@ -10,6 +12,7 @@ use vortex_error::vortex_ensure; use crate::ArrayRef; use crate::DeserializeMetadata; +use crate::IntoArray; use crate::ProstMetadata; use crate::SerializeMetadata; use crate::arrays::BoolArray; @@ -20,6 +23,7 @@ use crate::vtable; use crate::vtable::ArrayVTableExt; use crate::vtable::NotSupported; use crate::vtable::VTable; +use crate::vtable::ValidityHelper; use crate::vtable::ValidityVTableFromValidityHelper; mod array; @@ -132,6 +136,16 @@ impl VTable for BoolVTable { ) -> VortexResult> { RULES.evaluate(array, parent, child_idx) } + + fn slice(array: &Self::Array, range: Range) -> VortexResult> { + Ok(Some( + BoolArray::from_bit_buffer( + array.bit_buffer().slice(range.clone()), + array.validity().slice(range), + ) + .into_array(), + )) + } } #[derive(Debug)] diff --git a/vortex-array/src/arrays/bool/vtable/operations.rs b/vortex-array/src/arrays/bool/vtable/operations.rs index 0788a80db75..61ac756e3b9 100644 --- a/vortex-array/src/arrays/bool/vtable/operations.rs +++ b/vortex-array/src/arrays/bool/vtable/operations.rs @@ -1,26 +1,13 @@ // SPDX-License-Identifier: Apache-2.0 // SPDX-FileCopyrightText: Copyright the Vortex contributors -use std::ops::Range; - use vortex_scalar::Scalar; -use crate::ArrayRef; -use crate::IntoArray; use crate::arrays::BoolArray; use crate::arrays::BoolVTable; use crate::vtable::OperationsVTable; -use crate::vtable::ValidityHelper; impl OperationsVTable for BoolVTable { - fn slice(array: &BoolArray, range: Range) -> ArrayRef { - BoolArray::from_bit_buffer( - array.bit_buffer().slice(range.clone()), - array.validity().slice(range), - ) - .into_array() - } - fn scalar_at(array: &BoolArray, index: usize) -> Scalar { Scalar::bool(array.bit_buffer().value(index), array.dtype().nullability()) } diff --git a/vortex-array/src/arrays/chunked/vtable/mod.rs b/vortex-array/src/arrays/chunked/vtable/mod.rs index 9c4ecfce7a2..9c4634167c5 100644 --- a/vortex-array/src/arrays/chunked/vtable/mod.rs +++ b/vortex-array/src/arrays/chunked/vtable/mod.rs @@ -1,6 +1,8 @@ // SPDX-License-Identifier: Apache-2.0 // SPDX-FileCopyrightText: Copyright the Vortex contributors +use std::ops::Range; + use itertools::Itertools; use vortex_dtype::DType; use vortex_dtype::Nullability; @@ -176,4 +178,51 @@ impl VTable for ChunkedVTable { ) -> VortexResult> { PARENT_RULES.evaluate(array, parent, child_idx) } + + fn slice(array: &Self::Array, range: Range) -> VortexResult> { + assert!( + !array.is_empty() || (range.start > 0 && range.end > 0), + "Empty chunked array can't be sliced from {} to {}", + range.start, + range.end + ); + + if array.is_empty() { + // SAFETY: empty chunked array trivially satisfies all validations + unsafe { + return Ok(Some( + ChunkedArray::new_unchecked(vec![], array.dtype().clone()).into_array(), + )); + } + } + + let (offset_chunk, offset_in_first_chunk) = array.find_chunk_idx(range.start); + let (length_chunk, length_in_last_chunk) = array.find_chunk_idx(range.end); + + if length_chunk == offset_chunk { + let chunk = array.chunk(offset_chunk); + return Ok(Some( + chunk.slice(offset_in_first_chunk..length_in_last_chunk), + )); + } + + let mut chunks = (offset_chunk..length_chunk + 1) + .map(|i| array.chunk(i).clone()) + .collect_vec(); + if let Some(c) = chunks.first_mut() { + *c = c.slice(offset_in_first_chunk..c.len()); + } + + if length_in_last_chunk == 0 { + chunks.pop(); + } else if let Some(c) = chunks.last_mut() { + *c = c.slice(0..length_in_last_chunk); + } + + // SAFETY: chunks are slices of the original valid chunks, preserving their dtype. + // All chunks maintain the same dtype as the original array. + Ok(Some(unsafe { + ChunkedArray::new_unchecked(chunks, array.dtype().clone()).into_array() + })) + } } diff --git a/vortex-array/src/arrays/chunked/vtable/operations.rs b/vortex-array/src/arrays/chunked/vtable/operations.rs index 0d72993bcbd..28b62a78849 100644 --- a/vortex-array/src/arrays/chunked/vtable/operations.rs +++ b/vortex-array/src/arrays/chunked/vtable/operations.rs @@ -1,60 +1,13 @@ // SPDX-License-Identifier: Apache-2.0 // SPDX-FileCopyrightText: Copyright the Vortex contributors -use std::ops::Range; - -use itertools::Itertools; use vortex_scalar::Scalar; -use crate::Array; -use crate::ArrayRef; -use crate::IntoArray; use crate::arrays::ChunkedArray; use crate::arrays::ChunkedVTable; use crate::vtable::OperationsVTable; impl OperationsVTable for ChunkedVTable { - fn slice(array: &ChunkedArray, range: Range) -> ArrayRef { - assert!( - !array.is_empty() || (range.start > 0 && range.end > 0), - "Empty chunked array can't be sliced from {} to {}", - range.start, - range.end - ); - - if array.is_empty() { - // SAFETY: empty chunked array trivially satisfies all validations - unsafe { - return ChunkedArray::new_unchecked(vec![], array.dtype().clone()).into_array(); - } - } - - let (offset_chunk, offset_in_first_chunk) = array.find_chunk_idx(range.start); - let (length_chunk, length_in_last_chunk) = array.find_chunk_idx(range.end); - - if length_chunk == offset_chunk { - let chunk = array.chunk(offset_chunk); - return chunk.slice(offset_in_first_chunk..length_in_last_chunk); - } - - let mut chunks = (offset_chunk..length_chunk + 1) - .map(|i| array.chunk(i).clone()) - .collect_vec(); - if let Some(c) = chunks.first_mut() { - *c = c.slice(offset_in_first_chunk..c.len()); - } - - if length_in_last_chunk == 0 { - chunks.pop(); - } else if let Some(c) = chunks.last_mut() { - *c = c.slice(0..length_in_last_chunk); - } - - // SAFETY: chunks are slices of the original valid chunks, preserving their dtype. - // All chunks maintain the same dtype as the original array. - unsafe { ChunkedArray::new_unchecked(chunks, array.dtype().clone()).into_array() } - } - fn scalar_at(array: &ChunkedArray, index: usize) -> Scalar { let (chunk_index, chunk_offset) = array.find_chunk_idx(index); array.chunk(chunk_index).scalar_at(chunk_offset) diff --git a/vortex-array/src/arrays/chunked/vtable/rules.rs b/vortex-array/src/arrays/chunked/vtable/rules.rs index d5ccd23bfe5..c7b1493936c 100644 --- a/vortex-array/src/arrays/chunked/vtable/rules.rs +++ b/vortex-array/src/arrays/chunked/vtable/rules.rs @@ -4,7 +4,6 @@ use itertools::Itertools; use vortex_error::VortexResult; -use crate::Array; use crate::ArrayRef; use crate::IntoArray; use crate::arrays::AnyScalarFn; diff --git a/vortex-array/src/arrays/constant/vtable/mod.rs b/vortex-array/src/arrays/constant/vtable/mod.rs index 8e8e3e7fd73..f045fc3cb5c 100644 --- a/vortex-array/src/arrays/constant/vtable/mod.rs +++ b/vortex-array/src/arrays/constant/vtable/mod.rs @@ -2,6 +2,7 @@ // SPDX-FileCopyrightText: Copyright the Vortex contributors use std::fmt::Debug; +use std::ops::Range; use vortex_dtype::DType; use vortex_error::VortexResult; @@ -12,6 +13,7 @@ use vortex_scalar::ScalarValue; use crate::ArrayRef; use crate::EmptyMetadata; +use crate::IntoArray; use crate::arrays::ConstantArray; use crate::arrays::constant::vtable::rules::PARENT_RULES; use crate::buffer::BufferHandle; @@ -103,4 +105,10 @@ impl VTable for ConstantVTable { ) -> VortexResult> { PARENT_RULES.evaluate(array, parent, child_idx) } + + fn slice(array: &Self::Array, range: Range) -> VortexResult> { + Ok(Some( + ConstantArray::new(array.scalar.clone(), range.len()).into_array(), + )) + } } diff --git a/vortex-array/src/arrays/constant/vtable/operations.rs b/vortex-array/src/arrays/constant/vtable/operations.rs index 9fb91d3bf9f..fe632e50b9e 100644 --- a/vortex-array/src/arrays/constant/vtable/operations.rs +++ b/vortex-array/src/arrays/constant/vtable/operations.rs @@ -1,21 +1,13 @@ // SPDX-License-Identifier: Apache-2.0 // SPDX-FileCopyrightText: Copyright the Vortex contributors -use std::ops::Range; - use vortex_scalar::Scalar; -use crate::ArrayRef; -use crate::IntoArray; use crate::arrays::ConstantArray; use crate::arrays::ConstantVTable; use crate::vtable::OperationsVTable; impl OperationsVTable for ConstantVTable { - fn slice(array: &ConstantArray, range: Range) -> ArrayRef { - ConstantArray::new(array.scalar.clone(), range.len()).into_array() - } - fn scalar_at(array: &ConstantArray, _index: usize) -> Scalar { array.scalar.clone() } diff --git a/vortex-array/src/arrays/decimal/vtable/mod.rs b/vortex-array/src/arrays/decimal/vtable/mod.rs index 1163c67595b..2e9c0e67275 100644 --- a/vortex-array/src/arrays/decimal/vtable/mod.rs +++ b/vortex-array/src/arrays/decimal/vtable/mod.rs @@ -1,6 +1,8 @@ // SPDX-License-Identifier: Apache-2.0 // SPDX-FileCopyrightText: Copyright the Vortex contributors +use std::ops::Range; + use vortex_buffer::Alignment; use vortex_buffer::Buffer; use vortex_dtype::DType; @@ -14,6 +16,7 @@ use vortex_scalar::DecimalType; use crate::ArrayRef; use crate::DeserializeMetadata; +use crate::IntoArray; use crate::ProstMetadata; use crate::SerializeMetadata; use crate::arrays::DecimalArray; @@ -24,6 +27,7 @@ use crate::vtable; use crate::vtable::ArrayVTableExt; use crate::vtable::NotSupported; use crate::vtable::VTable; +use crate::vtable::ValidityHelper; use crate::vtable::ValidityVTableFromValidityHelper; mod array; @@ -149,6 +153,17 @@ impl VTable for DecimalVTable { ) -> VortexResult> { RULES.evaluate(array, parent, child_idx) } + + fn slice(array: &Self::Array, range: Range) -> VortexResult> { + let result = match_each_decimal_value_type!(array.values_type(), |D| { + let sliced = array.buffer::().slice(range.clone()); + let validity = array.validity().clone().slice(range); + // SAFETY: Slicing preserves all DecimalArray invariants + unsafe { DecimalArray::new_unchecked(sliced, array.decimal_dtype(), validity) } + .into_array() + }); + Ok(Some(result)) + } } #[derive(Debug)] diff --git a/vortex-array/src/arrays/decimal/vtable/operations.rs b/vortex-array/src/arrays/decimal/vtable/operations.rs index 689b353efcc..35c04254e9a 100644 --- a/vortex-array/src/arrays/decimal/vtable/operations.rs +++ b/vortex-array/src/arrays/decimal/vtable/operations.rs @@ -1,34 +1,15 @@ // SPDX-License-Identifier: Apache-2.0 // SPDX-FileCopyrightText: Copyright the Vortex contributors -use std::ops::Range; - -use vortex_buffer::Buffer; -use vortex_dtype::DecimalDType; -use vortex_dtype::NativeDecimalType; use vortex_dtype::match_each_decimal_value_type; use vortex_scalar::DecimalValue; use vortex_scalar::Scalar; -use crate::ArrayRef; -use crate::IntoArray; use crate::arrays::DecimalArray; use crate::arrays::DecimalVTable; -use crate::validity::Validity; use crate::vtable::OperationsVTable; impl OperationsVTable for DecimalVTable { - fn slice(array: &DecimalArray, range: Range) -> ArrayRef { - match_each_decimal_value_type!(array.values_type(), |D| { - slice_typed( - array.buffer::(), - range, - array.decimal_dtype(), - array.validity.clone(), - ) - }) - } - fn scalar_at(array: &DecimalArray, index: usize) -> Scalar { match_each_decimal_value_type!(array.values_type(), |D| { Scalar::decimal( @@ -40,21 +21,6 @@ impl OperationsVTable for DecimalVTable { } } -fn slice_typed( - values: Buffer, - range: Range, - decimal_dtype: DecimalDType, - validity: Validity, -) -> ArrayRef { - let sliced = values.slice(range.clone()); - let validity = validity.slice(range); - // SAFETY: Slicing preserves all DecimalArray invariants: - // - Buffer is correctly typed and sized from the slice operation. - // - Decimal dtype is preserved from the parent array. - // - Validity is correctly sliced to match the new length. - unsafe { DecimalArray::new_unchecked(sliced, decimal_dtype, validity) }.into_array() -} - #[cfg(test)] mod tests { use vortex_buffer::buffer; diff --git a/vortex-array/src/arrays/dict/vtable/mod.rs b/vortex-array/src/arrays/dict/vtable/mod.rs index 53dc862645b..8824fea071b 100644 --- a/vortex-array/src/arrays/dict/vtable/mod.rs +++ b/vortex-array/src/arrays/dict/vtable/mod.rs @@ -1,6 +1,8 @@ // SPDX-License-Identifier: Apache-2.0 // SPDX-FileCopyrightText: Copyright the Vortex contributors +use std::ops::Range; + use vortex_dtype::DType; use vortex_dtype::Nullability; use vortex_dtype::PType; @@ -13,6 +15,7 @@ use vortex_scalar::Scalar; use super::DictArray; use super::DictMetadata; use super::take_canonical; +use crate::Array; use crate::ArrayRef; use crate::Canonical; use crate::DeserializeMetadata; @@ -20,6 +23,7 @@ use crate::IntoArray; use crate::ProstMetadata; use crate::SerializeMetadata; use crate::arrays::ConstantArray; +use crate::arrays::ConstantVTable; use crate::arrays::vtable::rules::PARENT_RULES; use crate::buffer::BufferHandle; use crate::executor::ExecutionCtx; @@ -65,6 +69,28 @@ impl VTable for DictVTable { DictVTable.as_vtable() } + fn slice(array: &Self::Array, range: Range) -> VortexResult> { + let sliced_code = array.codes().slice(range); + if sliced_code.is::() { + let code = &sliced_code.scalar_at(0).as_primitive().as_::(); + return if let Some(code) = code { + Ok(Some( + ConstantArray::new(array.values().scalar_at(*code), sliced_code.len()) + .into_array(), + )) + } else { + Ok(Some( + ConstantArray::new(Scalar::null(array.dtype().clone()), sliced_code.len()) + .to_array(), + )) + }; + } + // SAFETY: slicing the codes preserves invariants. + Ok(Some( + unsafe { DictArray::new_unchecked(sliced_code, array.values().clone()) }.into_array(), + )) + } + fn metadata(array: &DictArray) -> VortexResult { Ok(ProstMetadata(DictMetadata { codes_ptype: PType::try_from(array.codes().dtype())? as i32, diff --git a/vortex-array/src/arrays/dict/vtable/operations.rs b/vortex-array/src/arrays/dict/vtable/operations.rs index 45590effbed..bfc8f9d774a 100644 --- a/vortex-array/src/arrays/dict/vtable/operations.rs +++ b/vortex-array/src/arrays/dict/vtable/operations.rs @@ -1,36 +1,15 @@ // SPDX-License-Identifier: Apache-2.0 // SPDX-FileCopyrightText: Copyright the Vortex contributors -use std::ops::Range; - use vortex_error::VortexExpect; use vortex_scalar::Scalar; use super::DictVTable; use crate::Array; -use crate::ArrayRef; -use crate::IntoArray; -use crate::arrays::ConstantArray; -use crate::arrays::ConstantVTable; use crate::arrays::dict::DictArray; use crate::vtable::OperationsVTable; impl OperationsVTable for DictVTable { - fn slice(array: &DictArray, range: Range) -> ArrayRef { - let sliced_code = array.codes().slice(range); - if sliced_code.is::() { - let code = &sliced_code.scalar_at(0).as_primitive().as_::(); - return if let Some(code) = code { - ConstantArray::new(array.values().scalar_at(*code), sliced_code.len()).into_array() - } else { - ConstantArray::new(Scalar::null(array.dtype().clone()), sliced_code.len()) - .to_array() - }; - } - // SAFETY: slicing the codes preserves invariants. - unsafe { DictArray::new_unchecked(sliced_code, array.values().clone()).into_array() } - } - fn scalar_at(array: &DictArray, index: usize) -> Scalar { let Some(dict_index) = array.codes().scalar_at(index).as_primitive().as_::() else { return Scalar::null(array.dtype().clone()); diff --git a/vortex-array/src/arrays/extension/vtable/mod.rs b/vortex-array/src/arrays/extension/vtable/mod.rs index e16a4d4f65f..2ee09cd5c51 100644 --- a/vortex-array/src/arrays/extension/vtable/mod.rs +++ b/vortex-array/src/arrays/extension/vtable/mod.rs @@ -8,6 +8,8 @@ mod rules; mod validity; mod visitor; +use std::ops::Range; + use vortex_dtype::DType; use vortex_error::VortexExpect; use vortex_error::VortexResult; @@ -16,6 +18,7 @@ use vortex_error::vortex_ensure; use crate::ArrayRef; use crate::EmptyMetadata; +use crate::IntoArray; use crate::arrays::extension::ExtensionArray; use crate::arrays::extension::vtable::rules::PARENT_RULES; use crate::buffer::BufferHandle; @@ -101,6 +104,13 @@ impl VTable for ExtensionVTable { ) -> VortexResult> { PARENT_RULES.evaluate(array, parent, child_idx) } + + fn slice(array: &Self::Array, range: Range) -> VortexResult> { + Ok(Some( + ExtensionArray::new(array.ext_dtype().clone(), array.storage().slice(range)) + .into_array(), + )) + } } #[derive(Debug)] diff --git a/vortex-array/src/arrays/extension/vtable/operations.rs b/vortex-array/src/arrays/extension/vtable/operations.rs index 19c92288122..ab89ef99bb3 100644 --- a/vortex-array/src/arrays/extension/vtable/operations.rs +++ b/vortex-array/src/arrays/extension/vtable/operations.rs @@ -1,21 +1,13 @@ // SPDX-License-Identifier: Apache-2.0 // SPDX-FileCopyrightText: Copyright the Vortex contributors -use std::ops::Range; - use vortex_scalar::Scalar; -use crate::ArrayRef; -use crate::IntoArray; use crate::arrays::extension::ExtensionArray; use crate::arrays::extension::ExtensionVTable; use crate::vtable::OperationsVTable; impl OperationsVTable for ExtensionVTable { - fn slice(array: &ExtensionArray, range: Range) -> ArrayRef { - ExtensionArray::new(array.ext_dtype().clone(), array.storage().slice(range)).into_array() - } - fn scalar_at(array: &ExtensionArray, index: usize) -> Scalar { Scalar::extension(array.ext_dtype().clone(), array.storage().scalar_at(index)) } diff --git a/vortex-array/src/arrays/filter/array.rs b/vortex-array/src/arrays/filter/array.rs index d7b5872a2b1..e01fa3c6a9b 100644 --- a/vortex-array/src/arrays/filter/array.rs +++ b/vortex-array/src/arrays/filter/array.rs @@ -30,6 +30,11 @@ impl FilterArray { } } + /// The child array being filtered. + pub fn child(&self) -> &ArrayRef { + &self.child + } + /// The mask used to filter the child array. pub fn filter_mask(&self) -> &Mask { &self.mask diff --git a/vortex-array/src/arrays/filter/vtable.rs b/vortex-array/src/arrays/filter/vtable.rs index e9abf72b108..3118997382c 100644 --- a/vortex-array/src/arrays/filter/vtable.rs +++ b/vortex-array/src/arrays/filter/vtable.rs @@ -4,7 +4,6 @@ use std::fmt::Debug; use std::fmt::Formatter; use std::hash::Hasher; -use std::ops::Range; use vortex_compute::filter::Filter; use vortex_dtype::DType; @@ -206,10 +205,6 @@ impl CanonicalVTable for FilterVTable { } impl OperationsVTable for FilterVTable { - fn slice(array: &FilterArray, range: Range) -> ArrayRef { - FilterArray::new(array.child.slice(range.clone()), array.mask.slice(range)).into_array() - } - fn scalar_at(array: &FilterArray, index: usize) -> Scalar { let rank_idx = array.mask.rank(index); array.child.scalar_at(rank_idx) diff --git a/vortex-array/src/arrays/fixed_size_list/vtable/mod.rs b/vortex-array/src/arrays/fixed_size_list/vtable/mod.rs index ee6d5e01683..be031c9c6ff 100644 --- a/vortex-array/src/arrays/fixed_size_list/vtable/mod.rs +++ b/vortex-array/src/arrays/fixed_size_list/vtable/mod.rs @@ -1,6 +1,8 @@ // SPDX-License-Identifier: Apache-2.0 // SPDX-FileCopyrightText: Copyright the Vortex contributors +use std::ops::Range; + use vortex_dtype::DType; use vortex_error::VortexExpect; use vortex_error::VortexResult; @@ -9,6 +11,7 @@ use vortex_error::vortex_ensure; use crate::ArrayRef; use crate::EmptyMetadata; +use crate::IntoArray; use crate::arrays::FixedSizeListArray; use crate::buffer::BufferHandle; use crate::serde::ArrayChildren; @@ -19,6 +22,7 @@ use crate::vtable::ArrayVTable; use crate::vtable::ArrayVTableExt; use crate::vtable::NotSupported; use crate::vtable::VTable; +use crate::vtable::ValidityHelper; use crate::vtable::ValidityVTableFromValidityHelper; mod array; @@ -49,6 +53,26 @@ impl VTable for FixedSizeListVTable { ArrayId::new_ref("vortex.fixed_size_list") } + fn slice(array: &Self::Array, range: Range) -> VortexResult> { + let new_len = range.len(); + let list_size = array.list_size() as usize; + + // SAFETY: Slicing preserves FixedSizeListArray invariants + Ok(Some( + unsafe { + FixedSizeListArray::new_unchecked( + array + .elements() + .slice(range.start * list_size..range.end * list_size), + array.list_size(), + array.validity().slice(range), + new_len, + ) + } + .into_array(), + )) + } + fn encoding(_array: &Self::Array) -> ArrayVTable { FixedSizeListVTable.as_vtable() } diff --git a/vortex-array/src/arrays/fixed_size_list/vtable/operations.rs b/vortex-array/src/arrays/fixed_size_list/vtable/operations.rs index fd47de75773..0daac82189b 100644 --- a/vortex-array/src/arrays/fixed_size_list/vtable/operations.rs +++ b/vortex-array/src/arrays/fixed_size_list/vtable/operations.rs @@ -1,47 +1,13 @@ // SPDX-License-Identifier: Apache-2.0 // SPDX-FileCopyrightText: Copyright the Vortex contributors -use std::ops::Range; - use vortex_scalar::Scalar; -use crate::ArrayRef; -use crate::IntoArray; use crate::arrays::FixedSizeListArray; use crate::arrays::FixedSizeListVTable; use crate::vtable::OperationsVTable; -use crate::vtable::ValidityHelper; impl OperationsVTable for FixedSizeListVTable { - fn slice(array: &FixedSizeListArray, range: Range) -> ArrayRef { - let start = range.start; - let end = range.end; - - debug_assert!( - start <= end && end <= array.len(), - "slice [{start}..{end}) out of bounds: then len is {}", - array.len() - ); - - let new_len = end - start; - let list_size = array.list_size() as usize; - - // SAFETY: - // - If the `list_size` is 0, then the elements slice has length 0 - // - The length of the sliced elements must be a multiple of the `list_size` since we - // multiply both ends by `list_size` - // - The validity is sliced with equal length to `new_len` - unsafe { - FixedSizeListArray::new_unchecked( - array.elements().slice(start * list_size..end * list_size), - array.list_size(), - array.validity().slice(range), - new_len, - ) - } - .into_array() - } - fn scalar_at(array: &FixedSizeListArray, index: usize) -> Scalar { // By the preconditions we know that the list scalar is not null. let list = array.fixed_size_list_elements_at(index); diff --git a/vortex-array/src/arrays/list/vtable/mod.rs b/vortex-array/src/arrays/list/vtable/mod.rs index 8725202626d..2c8e0a207e5 100644 --- a/vortex-array/src/arrays/list/vtable/mod.rs +++ b/vortex-array/src/arrays/list/vtable/mod.rs @@ -1,6 +1,8 @@ // SPDX-License-Identifier: Apache-2.0 // SPDX-FileCopyrightText: Copyright the Vortex contributors +use std::ops::Range; + use vortex_dtype::DType; use vortex_dtype::Nullability; use vortex_dtype::PType; @@ -13,6 +15,7 @@ use crate::Array; use crate::ArrayRef; use crate::Canonical; use crate::ExecutionCtx; +use crate::IntoArray; use crate::ProstMetadata; use crate::arrays::ListArray; use crate::arrays::list::vtable::kernel::PARENT_KERNELS; @@ -27,6 +30,7 @@ use crate::vtable::ArrayVTable; use crate::vtable::ArrayVTableExt; use crate::vtable::NotSupported; use crate::vtable::VTable; +use crate::vtable::ValidityHelper; use crate::vtable::ValidityVTableFromValidityHelper; mod array; @@ -63,6 +67,17 @@ impl VTable for ListVTable { ArrayId::new_ref("vortex.list") } + fn slice(array: &Self::Array, range: Range) -> VortexResult> { + Ok(Some( + ListArray::new( + array.elements().clone(), + array.offsets().slice(range.start..range.end + 1), + array.validity().slice(range), + ) + .into_array(), + )) + } + fn encoding(_array: &Self::Array) -> ArrayVTable { ListVTable.as_vtable() } diff --git a/vortex-array/src/arrays/list/vtable/operations.rs b/vortex-array/src/arrays/list/vtable/operations.rs index 557a0f13d8e..582ee3f2503 100644 --- a/vortex-array/src/arrays/list/vtable/operations.rs +++ b/vortex-array/src/arrays/list/vtable/operations.rs @@ -1,28 +1,15 @@ // SPDX-License-Identifier: Apache-2.0 // SPDX-FileCopyrightText: Copyright the Vortex contributors -use std::ops::Range; use std::sync::Arc; use vortex_scalar::Scalar; -use crate::ArrayRef; -use crate::IntoArray; use crate::arrays::ListArray; use crate::arrays::ListVTable; use crate::vtable::OperationsVTable; -use crate::vtable::ValidityHelper; impl OperationsVTable for ListVTable { - fn slice(array: &ListArray, range: Range) -> ArrayRef { - ListArray::new( - array.elements().clone(), - array.offsets().slice(range.start..range.end + 1), - array.validity().slice(range), - ) - .into_array() - } - fn scalar_at(array: &ListArray, index: usize) -> Scalar { // By the preconditions we know that the list scalar is not null. let elems = array.list_elements_at(index); diff --git a/vortex-array/src/arrays/listview/vtable/mod.rs b/vortex-array/src/arrays/listview/vtable/mod.rs index 9e1dffab074..28d86587c35 100644 --- a/vortex-array/src/arrays/listview/vtable/mod.rs +++ b/vortex-array/src/arrays/listview/vtable/mod.rs @@ -1,6 +1,8 @@ // SPDX-License-Identifier: Apache-2.0 // SPDX-FileCopyrightText: Copyright the Vortex contributors +use std::ops::Range; + use vortex_dtype::DType; use vortex_dtype::Nullability; use vortex_dtype::PType; @@ -11,6 +13,7 @@ use vortex_error::vortex_ensure; use crate::ArrayRef; use crate::DeserializeMetadata; +use crate::IntoArray; use crate::ProstMetadata; use crate::SerializeMetadata; use crate::arrays::ListViewArray; @@ -24,6 +27,7 @@ use crate::vtable::ArrayVTable; use crate::vtable::ArrayVTableExt; use crate::vtable::NotSupported; use crate::vtable::VTable; +use crate::vtable::ValidityHelper; use crate::vtable::ValidityVTableFromValidityHelper; mod array; @@ -65,6 +69,22 @@ impl VTable for ListViewVTable { ArrayId::new_ref("vortex.listview") } + fn slice(array: &Self::Array, range: Range) -> VortexResult> { + // SAFETY: Slicing the components of an existing valid array is still valid. + Ok(Some( + unsafe { + ListViewArray::new_unchecked( + array.elements().clone(), + array.offsets().slice(range.clone()), + array.sizes().slice(range.clone()), + array.validity().slice(range), + ) + .with_zero_copy_to_list(array.is_zero_copy_to_list()) + } + .into_array(), + )) + } + fn encoding(_array: &Self::Array) -> ArrayVTable { ListViewVTable.as_vtable() } diff --git a/vortex-array/src/arrays/listview/vtable/operations.rs b/vortex-array/src/arrays/listview/vtable/operations.rs index 0cf380aeb7f..ec58b5c636b 100644 --- a/vortex-array/src/arrays/listview/vtable/operations.rs +++ b/vortex-array/src/arrays/listview/vtable/operations.rs @@ -1,44 +1,15 @@ // SPDX-License-Identifier: Apache-2.0 // SPDX-FileCopyrightText: Copyright the Vortex contributors -use std::ops::Range; use std::sync::Arc; use vortex_scalar::Scalar; -use crate::ArrayRef; -use crate::IntoArray; use crate::arrays::ListViewArray; use crate::arrays::ListViewVTable; use crate::vtable::OperationsVTable; -use crate::vtable::ValidityHelper; impl OperationsVTable for ListViewVTable { - fn slice(array: &ListViewArray, range: Range) -> ArrayRef { - let start = range.start; - let end = range.end; - - // We implement slice by simply slicing the views. We leave the child `elements` array alone - // since slicing could potentially require calculating which elements are referenced by the - // new set of views. - - // SAFETY: The preconditions of `slice` mean that the bounds have already been checked, and - // slicing the components of an existing valid array is still valid. - // Additionally, slicing elements of a `ListViewArray` that is already zero-copyable to a - // `ListArray` does not reorder or create gaps and overlaps, slicing maintains whatever - // `is_zero_copy_to_list` flag it already had. - unsafe { - ListViewArray::new_unchecked( - array.elements().clone(), - array.offsets().slice(start..end), - array.sizes().slice(start..end), - array.validity().slice(start..end), - ) - .with_zero_copy_to_list(array.is_zero_copy_to_list()) - } - .into_array() - } - fn scalar_at(array: &ListViewArray, index: usize) -> Scalar { // By the preconditions we know that the list scalar is not null. let list = array.list_elements_at(index); diff --git a/vortex-array/src/arrays/masked/vtable/mod.rs b/vortex-array/src/arrays/masked/vtable/mod.rs index ae1692219ad..d5543321c9f 100644 --- a/vortex-array/src/arrays/masked/vtable/mod.rs +++ b/vortex-array/src/arrays/masked/vtable/mod.rs @@ -6,6 +6,8 @@ mod canonical; mod operations; mod validity; +use std::ops::Range; + use vortex_dtype::DType; use vortex_error::VortexExpect; use vortex_error::VortexResult; @@ -25,6 +27,7 @@ use crate::arrays::masked::mask_validity_canonical; use crate::buffer::BufferHandle; use crate::executor::ExecutionCtx; use crate::serde::ArrayChildren; +use crate::stats::ArrayStats; use crate::validity::Validity; use crate::vtable; use crate::vtable::ArrayId; @@ -66,6 +69,21 @@ impl VTable for MaskedVTable { ArrayId::new_ref("vortex.masked") } + fn slice(array: &Self::Array, range: Range) -> VortexResult> { + let child = array.child.slice(range.clone()); + let validity = array.validity.slice(range); + + Ok(Some( + MaskedArray { + child, + validity, + dtype: array.dtype.clone(), + stats: ArrayStats::default(), + } + .into_array(), + )) + } + fn encoding(_array: &Self::Array) -> ArrayVTable { MaskedVTable.as_vtable() } diff --git a/vortex-array/src/arrays/masked/vtable/operations.rs b/vortex-array/src/arrays/masked/vtable/operations.rs index 9c8bf51d959..ad213e6b4c3 100644 --- a/vortex-array/src/arrays/masked/vtable/operations.rs +++ b/vortex-array/src/arrays/masked/vtable/operations.rs @@ -1,31 +1,13 @@ // SPDX-License-Identifier: Apache-2.0 // SPDX-FileCopyrightText: Copyright the Vortex contributors -use std::ops::Range; - use vortex_scalar::Scalar; -use crate::ArrayRef; -use crate::IntoArray; use crate::arrays::MaskedVTable; use crate::arrays::masked::MaskedArray; -use crate::stats::ArrayStats; use crate::vtable::OperationsVTable; impl OperationsVTable for MaskedVTable { - fn slice(array: &MaskedArray, range: Range) -> ArrayRef { - let child = array.child.slice(range.clone()); - let validity = array.validity.slice(range); - - MaskedArray { - child, - validity, - dtype: array.dtype.clone(), - stats: ArrayStats::default(), - } - .into_array() - } - fn scalar_at(array: &MaskedArray, index: usize) -> Scalar { // Invalid indices are handled by the entrypoint function. array.child.scalar_at(index).into_nullable() diff --git a/vortex-array/src/arrays/mod.rs b/vortex-array/src/arrays/mod.rs index 8171a89490b..59a97d19d9a 100644 --- a/vortex-array/src/arrays/mod.rs +++ b/vortex-array/src/arrays/mod.rs @@ -30,6 +30,7 @@ mod masked; mod null; mod primitive; mod scalar_fn; +mod slice; mod struct_; mod varbin; mod varbinview; @@ -54,6 +55,7 @@ pub use masked::*; pub use null::*; pub use primitive::*; pub use scalar_fn::*; +pub use slice::*; pub use struct_::*; pub use varbin::*; pub use varbinview::*; diff --git a/vortex-array/src/arrays/null/mod.rs b/vortex-array/src/arrays/null/mod.rs index fd0368a8d61..95743b9ba84 100644 --- a/vortex-array/src/arrays/null/mod.rs +++ b/vortex-array/src/arrays/null/mod.rs @@ -90,6 +90,10 @@ impl VTable for NullVTable { ); Ok(()) } + + fn slice(_array: &Self::Array, range: Range) -> VortexResult> { + Ok(Some(NullArray::new(range.len()).into_array())) + } } /// A array where all values are null. @@ -169,10 +173,6 @@ impl CanonicalVTable for NullVTable { } impl OperationsVTable for NullVTable { - fn slice(_array: &NullArray, range: Range) -> ArrayRef { - NullArray::new(range.len()).into_array() - } - fn scalar_at(_array: &NullArray, _index: usize) -> Scalar { Scalar::null(DType::Null) } diff --git a/vortex-array/src/arrays/primitive/vtable/mod.rs b/vortex-array/src/arrays/primitive/vtable/mod.rs index 5093bcef0c1..cc3010939d8 100644 --- a/vortex-array/src/arrays/primitive/vtable/mod.rs +++ b/vortex-array/src/arrays/primitive/vtable/mod.rs @@ -1,8 +1,12 @@ // SPDX-License-Identifier: Apache-2.0 // SPDX-FileCopyrightText: Copyright the Vortex contributors +use std::ops::Range; + use vortex_dtype::DType; +use vortex_dtype::NativePType; use vortex_dtype::PType; +use vortex_dtype::match_each_native_ptype; use vortex_error::VortexExpect; use vortex_error::VortexResult; use vortex_error::vortex_bail; @@ -10,6 +14,7 @@ use vortex_error::vortex_ensure; use crate::ArrayRef; use crate::EmptyMetadata; +use crate::IntoArray; use crate::arrays::PrimitiveArray; use crate::buffer::BufferHandle; use crate::serde::ArrayChildren; @@ -18,6 +23,7 @@ use crate::vtable; use crate::vtable::ArrayVTableExt; use crate::vtable::NotSupported; use crate::vtable::VTable; +use crate::vtable::ValidityHelper; use crate::vtable::ValidityVTableFromValidityHelper; mod array; @@ -145,6 +151,18 @@ impl VTable for PrimitiveVTable { ) -> VortexResult> { RULES.evaluate(array, parent, child_idx) } + + fn slice(array: &Self::Array, range: Range) -> VortexResult> { + let result = match_each_native_ptype!(array.ptype(), |T| { + PrimitiveArray::from_buffer_handle( + array.buffer_handle().slice_typed::(range.clone()), + T::PTYPE, + array.validity().slice(range), + ) + .into_array() + }); + Ok(Some(result)) + } } #[derive(Debug)] diff --git a/vortex-array/src/arrays/primitive/vtable/operations.rs b/vortex-array/src/arrays/primitive/vtable/operations.rs index 9cd846ca460..63a472ec47a 100644 --- a/vortex-array/src/arrays/primitive/vtable/operations.rs +++ b/vortex-array/src/arrays/primitive/vtable/operations.rs @@ -1,30 +1,14 @@ // SPDX-License-Identifier: Apache-2.0 // SPDX-FileCopyrightText: Copyright the Vortex contributors -use std::ops::Range; - use vortex_dtype::match_each_native_ptype; use vortex_scalar::Scalar; -use crate::ArrayRef; -use crate::IntoArray; use crate::arrays::PrimitiveArray; use crate::arrays::PrimitiveVTable; use crate::vtable::OperationsVTable; -use crate::vtable::ValidityHelper; impl OperationsVTable for PrimitiveVTable { - fn slice(array: &PrimitiveArray, range: Range) -> ArrayRef { - match_each_native_ptype!(array.ptype(), |P| { - PrimitiveArray::from_buffer_handle( - array.buffer.slice_typed::

(range.clone()), - array.ptype(), - array.validity().slice(range), - ) - .into_array() - }) - } - fn scalar_at(array: &PrimitiveArray, index: usize) -> Scalar { match_each_native_ptype!(array.ptype(), |T| { Scalar::primitive(array.as_slice::()[index], array.dtype().nullability()) diff --git a/vortex-array/src/arrays/scalar_fn/vtable/mod.rs b/vortex-array/src/arrays/scalar_fn/vtable/mod.rs index 222ccc92186..1412021b81a 100644 --- a/vortex-array/src/arrays/scalar_fn/vtable/mod.rs +++ b/vortex-array/src/arrays/scalar_fn/vtable/mod.rs @@ -13,6 +13,7 @@ use std::hash::Hash; use std::hash::Hasher; use std::marker::PhantomData; use std::ops::Deref; +use std::ops::Range; use std::sync::Arc; use itertools::Itertools; @@ -183,6 +184,26 @@ impl VTable for ScalarFnVTable { ) -> VortexResult> { PARENT_RULES.evaluate(array, parent, child_idx) } + + fn slice(array: &Self::Array, range: Range) -> VortexResult> { + let children: Vec<_> = array + .children() + .iter() + .map(|c| c.slice(range.clone())) + .collect(); + + Ok(Some( + ScalarFnArray { + vtable: array.vtable.clone(), + scalar_fn: array.scalar_fn.clone(), + dtype: array.dtype.clone(), + len: range.len(), + children, + stats: Default::default(), + } + .into_array(), + )) + } } /// Array factory functions for scalar functions. diff --git a/vortex-array/src/arrays/scalar_fn/vtable/operations.rs b/vortex-array/src/arrays/scalar_fn/vtable/operations.rs index 946043b6c35..c0d785072d6 100644 --- a/vortex-array/src/arrays/scalar_fn/vtable/operations.rs +++ b/vortex-array/src/arrays/scalar_fn/vtable/operations.rs @@ -1,15 +1,12 @@ // SPDX-License-Identifier: Apache-2.0 // SPDX-FileCopyrightText: Copyright the Vortex contributors -use std::ops::Range; use std::sync::Arc; use vortex_error::VortexExpect; use vortex_scalar::Scalar; use crate::Array; -use crate::ArrayRef; -use crate::IntoArray; use crate::arrays::scalar_fn::array::ScalarFnArray; use crate::arrays::scalar_fn::vtable::ScalarFnVTable; use crate::expr::Expression; @@ -17,24 +14,6 @@ use crate::expr::lit; use crate::vtable::OperationsVTable; impl OperationsVTable for ScalarFnVTable { - fn slice(array: &ScalarFnArray, range: Range) -> ArrayRef { - let children: Vec<_> = array - .children() - .iter() - .map(|c| c.slice(range.clone())) - .collect(); - - ScalarFnArray { - vtable: array.vtable.clone(), - scalar_fn: array.scalar_fn.clone(), - dtype: array.dtype.clone(), - len: range.len(), - children, - stats: Default::default(), - } - .into_array() - } - fn scalar_at(array: &ScalarFnArray, index: usize) -> Scalar { // TODO(ngates): we should evaluate the scalar function over the scalar inputs. let inputs: Arc<[_]> = array diff --git a/vortex-array/src/arrays/slice/array.rs b/vortex-array/src/arrays/slice/array.rs new file mode 100644 index 00000000000..e5b2048437f --- /dev/null +++ b/vortex-array/src/arrays/slice/array.rs @@ -0,0 +1,43 @@ +// SPDX-License-Identifier: Apache-2.0 +// SPDX-FileCopyrightText: Copyright the Vortex contributors + +use std::ops::Range; + +use vortex_error::vortex_panic; + +use crate::ArrayRef; +use crate::stats::ArrayStats; + +#[derive(Clone, Debug)] +pub struct SliceArray { + pub(super) child: ArrayRef, + pub(super) range: Range, + pub(super) stats: ArrayStats, +} + +impl SliceArray { + pub fn new(child: ArrayRef, range: Range) -> Self { + if range.end > child.len() { + vortex_panic!( + "SliceArray range out of bounds: range {:?} exceeds child array length {}", + range, + child.len() + ); + } + Self { + child, + range, + stats: ArrayStats::default(), + } + } + + /// The range used to slice the child array. + pub fn slice_range(&self) -> &Range { + &self.range + } + + /// The child array being sliced. + pub fn child(&self) -> &ArrayRef { + &self.child + } +} diff --git a/vortex-array/src/arrays/slice/mod.rs b/vortex-array/src/arrays/slice/mod.rs new file mode 100644 index 00000000000..548b13acff5 --- /dev/null +++ b/vortex-array/src/arrays/slice/mod.rs @@ -0,0 +1,9 @@ +// SPDX-License-Identifier: Apache-2.0 +// SPDX-FileCopyrightText: Copyright the Vortex contributors + +mod array; +mod rules; +mod vtable; + +pub use array::*; +pub use vtable::*; diff --git a/vortex-array/src/arrays/slice/rules.rs b/vortex-array/src/arrays/slice/rules.rs new file mode 100644 index 00000000000..8769bad4659 --- /dev/null +++ b/vortex-array/src/arrays/slice/rules.rs @@ -0,0 +1,27 @@ +// SPDX-License-Identifier: Apache-2.0 +// SPDX-FileCopyrightText: Copyright the Vortex contributors + +use vortex_error::VortexResult; + +use crate::ArrayRef; +use crate::arrays::slice::SliceArray; +use crate::arrays::slice::SliceVTable; +use crate::optimizer::rules::ArrayReduceRule; +use crate::optimizer::rules::ReduceRuleSet; + +pub(super) const RULES: ReduceRuleSet = ReduceRuleSet::new(&[&SliceVTableRule]); + +/// Generic reduce rule that calls VTable::slice on the child. +/// This allows all encodings to implement their own slice logic. +#[derive(Debug)] +struct SliceVTableRule; + +impl ArrayReduceRule for SliceVTableRule { + fn reduce(&self, array: &SliceArray) -> VortexResult> { + // Try the child's VTable::slice implementation + array + .child() + .encoding() + .slice(array.child(), array.slice_range().clone()) + } +} diff --git a/vortex-array/src/arrays/slice/vtable.rs b/vortex-array/src/arrays/slice/vtable.rs new file mode 100644 index 00000000000..a8bb3d9714c --- /dev/null +++ b/vortex-array/src/arrays/slice/vtable.rs @@ -0,0 +1,243 @@ +// SPDX-License-Identifier: Apache-2.0 +// SPDX-FileCopyrightText: Copyright the Vortex contributors + +use std::fmt::Debug; +use std::fmt::Formatter; +use std::hash::Hash; +use std::hash::Hasher; +use std::ops::Range; + +use vortex_dtype::DType; +use vortex_error::VortexExpect; +use vortex_error::VortexResult; +use vortex_error::vortex_bail; +use vortex_error::vortex_ensure; +use vortex_mask::Mask; +use vortex_scalar::Scalar; + +use crate::Array; +use crate::ArrayBufferVisitor; +use crate::ArrayChildVisitor; +use crate::ArrayEq; +use crate::ArrayHash; +use crate::ArrayRef; +use crate::Canonical; +use crate::IntoArray; +use crate::LEGACY_SESSION; +use crate::Precision; +use crate::VortexSessionExecute; +use crate::arrays::slice::array::SliceArray; +use crate::arrays::slice::rules::RULES; +use crate::buffer::BufferHandle; +use crate::executor::ExecutionCtx; +use crate::serde::ArrayChildren; +use crate::stats::StatsSetRef; +use crate::validity::Validity; +use crate::vtable; +use crate::vtable::ArrayId; +use crate::vtable::ArrayVTable; +use crate::vtable::ArrayVTableExt; +use crate::vtable::BaseArrayVTable; +use crate::vtable::CanonicalVTable; +use crate::vtable::NotSupported; +use crate::vtable::OperationsVTable; +use crate::vtable::VTable; +use crate::vtable::ValidityVTable; +use crate::vtable::VisitorVTable; + +vtable!(Slice); + +#[derive(Debug)] +pub struct SliceVTable; + +impl VTable for SliceVTable { + type Array = SliceArray; + type Metadata = SliceMetadata; + type ArrayVTable = Self; + type CanonicalVTable = Self; + type OperationsVTable = Self; + type ValidityVTable = Self; + type VisitorVTable = Self; + type ComputeVTable = NotSupported; + type EncodeVTable = NotSupported; + + fn id(&self) -> ArrayId { + ArrayId::from("vortex.slice") + } + + fn encoding(_array: &Self::Array) -> ArrayVTable { + SliceVTable.as_vtable() + } + + fn metadata(array: &Self::Array) -> VortexResult { + Ok(SliceMetadata(array.range.clone())) + } + + fn serialize(_metadata: Self::Metadata) -> VortexResult>> { + Ok(None) + } + + fn deserialize(_bytes: &[u8]) -> VortexResult { + vortex_bail!("Slice array is not serializable") + } + + fn build( + &self, + dtype: &DType, + len: usize, + metadata: &SliceMetadata, + _buffers: &[BufferHandle], + children: &dyn ArrayChildren, + ) -> VortexResult { + assert_eq!(len, metadata.0.len()); + let child = children.get(0, dtype, metadata.0.end)?; + Ok(SliceArray { + child, + range: metadata.0.clone(), + stats: Default::default(), + }) + } + + fn with_children(array: &mut Self::Array, children: Vec) -> VortexResult<()> { + vortex_ensure!( + children.len() == 1, + "SliceArray expects exactly 1 child, got {}", + children.len() + ); + array.child = children + .into_iter() + .next() + .vortex_expect("children length already validated"); + Ok(()) + } + + fn execute(array: &Self::Array, ctx: &mut ExecutionCtx) -> VortexResult { + // Execute the child to get canonical form, then slice it + let canonical = array.child.clone().execute::(ctx)?; + let result = canonical.as_ref().slice(array.range.clone()); + assert!( + result.is_canonical(), + "this must be canonical fix the slice impl for the dtype {} showing this error", + array.dtype() + ); + // TODO(joe): this is a downcast not a execute. + Ok(result.to_canonical()) + } + + fn reduce(array: &Self::Array) -> VortexResult> { + RULES.evaluate(array) + } + + fn slice(array: &Self::Array, range: Range) -> VortexResult> { + let inner_range = array.slice_range(); + + let combined_start = inner_range.start + range.start; + let combined_end = inner_range.start + range.end; + + Ok(Some( + SliceArray::new(array.child().clone(), combined_start..combined_end).into_array(), + )) + } +} + +impl BaseArrayVTable for SliceVTable { + fn len(array: &SliceArray) -> usize { + array.range.len() + } + + fn dtype(array: &SliceArray) -> &DType { + array.child.dtype() + } + + fn stats(array: &SliceArray) -> StatsSetRef<'_> { + array.stats.to_ref(array.as_ref()) + } + + fn array_hash(array: &SliceArray, state: &mut H, precision: Precision) { + array.child.array_hash(state, precision); + array.range.start.hash(state); + array.range.end.hash(state); + } + + fn array_eq(array: &SliceArray, other: &SliceArray, precision: Precision) -> bool { + array.child.array_eq(&other.child, precision) && array.range == other.range + } +} + +impl CanonicalVTable for SliceVTable { + fn canonicalize(array: &SliceArray) -> Canonical { + SliceVTable::execute(array, &mut LEGACY_SESSION.create_execution_ctx()) + .vortex_expect("Canonicalize should be fallible") + } +} + +impl OperationsVTable for SliceVTable { + fn scalar_at(array: &SliceArray, index: usize) -> Scalar { + array.child.scalar_at(array.range.start + index) + } +} + +impl ValidityVTable for SliceVTable { + fn is_valid(array: &SliceArray, index: usize) -> bool { + array.child.is_valid(array.range.start + index) + } + + fn all_valid(array: &SliceArray) -> bool { + // This is an over-approximation: if the entire child is all valid, + // then the slice is all valid too. + array.child.all_valid() + } + + fn all_invalid(array: &SliceArray) -> bool { + // This is an over-approximation: if the entire child is all invalid, + // then the slice is all invalid too. + array.child.all_invalid() + } + + fn validity(array: &SliceArray) -> VortexResult { + Ok(array.child.validity()?.slice(array.range.clone())) + } + + fn validity_mask(array: &SliceArray) -> Mask { + array.child.validity_mask().slice(array.range.clone()) + } +} + +impl VisitorVTable for SliceVTable { + fn visit_buffers(_array: &SliceArray, _visitor: &mut dyn ArrayBufferVisitor) {} + + fn visit_children(array: &SliceArray, visitor: &mut dyn ArrayChildVisitor) { + visitor.visit_child("child", &array.child); + } +} + +pub struct SliceMetadata(pub(super) Range); + +impl Debug for SliceMetadata { + fn fmt(&self, f: &mut Formatter<'_>) -> std::fmt::Result { + write!(f, "{}..{}", self.0.start, self.0.end) + } +} + +#[cfg(test)] +mod tests { + use vortex_error::VortexResult; + + use crate::Array; + use crate::IntoArray; + use crate::arrays::PrimitiveArray; + use crate::arrays::SliceArray; + use crate::assert_arrays_eq; + + #[test] + fn test_slice_slice() -> VortexResult<()> { + // Slice(1..4, Slice(2..8, base)) combines to Slice(3..6, base) + let arr = PrimitiveArray::from_iter(0i32..10).into_array(); + let inner_slice = SliceArray::new(arr, 2..8).into_array(); + let slice = inner_slice.slice(1..4); + + assert_arrays_eq!(slice, PrimitiveArray::from_iter([3i32, 4, 5])); + + Ok(()) + } +} diff --git a/vortex-array/src/arrays/struct_/vtable/mod.rs b/vortex-array/src/arrays/struct_/vtable/mod.rs index f50af218265..6a04c943f12 100644 --- a/vortex-array/src/arrays/struct_/vtable/mod.rs +++ b/vortex-array/src/arrays/struct_/vtable/mod.rs @@ -1,6 +1,7 @@ // SPDX-License-Identifier: Apache-2.0 // SPDX-FileCopyrightText: Copyright the Vortex contributors +use std::ops::Range; use std::sync::Arc; use itertools::Itertools; @@ -12,6 +13,7 @@ use vortex_error::vortex_ensure; use crate::ArrayRef; use crate::EmptyMetadata; +use crate::IntoArray; use crate::arrays::struct_::StructArray; use crate::arrays::struct_::vtable::rules::PARENT_RULES; use crate::buffer::BufferHandle; @@ -21,6 +23,7 @@ use crate::vtable; use crate::vtable::ArrayVTableExt; use crate::vtable::NotSupported; use crate::vtable::VTable; +use crate::vtable::ValidityHelper; use crate::vtable::ValidityVTableFromValidityHelper; mod array; @@ -146,6 +149,27 @@ impl VTable for StructVTable { ) -> VortexResult> { PARENT_RULES.evaluate(array, parent, child_idx) } + + fn slice(array: &Self::Array, range: Range) -> VortexResult> { + let fields = array + .fields() + .iter() + .map(|field| field.slice(range.clone())) + .collect_vec(); + + // SAFETY: Slicing preserves all StructArray invariants + Ok(Some( + unsafe { + StructArray::new_unchecked( + fields, + array.struct_fields().clone(), + range.len(), + array.validity().slice(range), + ) + } + .into_array(), + )) + } } #[derive(Debug)] diff --git a/vortex-array/src/arrays/struct_/vtable/operations.rs b/vortex-array/src/arrays/struct_/vtable/operations.rs index 8cb693bbb88..9efbd216e89 100644 --- a/vortex-array/src/arrays/struct_/vtable/operations.rs +++ b/vortex-array/src/arrays/struct_/vtable/operations.rs @@ -1,41 +1,14 @@ // SPDX-License-Identifier: Apache-2.0 // SPDX-FileCopyrightText: Copyright the Vortex contributors -use std::ops::Range; - use itertools::Itertools; use vortex_scalar::Scalar; -use crate::ArrayRef; -use crate::IntoArray; use crate::arrays::struct_::StructArray; use crate::arrays::struct_::StructVTable; use crate::vtable::OperationsVTable; -use crate::vtable::ValidityHelper; impl OperationsVTable for StructVTable { - fn slice(array: &StructArray, range: Range) -> ArrayRef { - let fields = array - .fields() - .iter() - .map(|field| field.slice(range.clone())) - .collect_vec(); - // SAFETY: All invariants are preserved: - // - fields.len() == dtype.names().len() (same struct fields) - // - Every field has length == range.len() (all sliced to same range) - // - Each field's dtype matches the struct dtype (unchanged from original) - // - Validity length matches array length (both sliced to same range) - unsafe { - StructArray::new_unchecked( - fields, - array.struct_fields().clone(), - range.len(), - array.validity().slice(range), - ) - } - .into_array() - } - fn scalar_at(array: &StructArray, index: usize) -> Scalar { Scalar::struct_( array.dtype().clone(), diff --git a/vortex-array/src/arrays/varbin/vtable/mod.rs b/vortex-array/src/arrays/varbin/vtable/mod.rs index f679569760f..7b3700317e8 100644 --- a/vortex-array/src/arrays/varbin/vtable/mod.rs +++ b/vortex-array/src/arrays/varbin/vtable/mod.rs @@ -1,6 +1,8 @@ // SPDX-License-Identifier: Apache-2.0 // SPDX-FileCopyrightText: Copyright the Vortex contributors +use std::ops::Range; + use vortex_dtype::DType; use vortex_dtype::Nullability; use vortex_dtype::PType; @@ -11,6 +13,7 @@ use vortex_error::vortex_err; use crate::ArrayRef; use crate::DeserializeMetadata; +use crate::IntoArray; use crate::ProstMetadata; use crate::SerializeMetadata; use crate::arrays::varbin::VarBinArray; @@ -23,6 +26,7 @@ use crate::vtable::ArrayVTable; use crate::vtable::ArrayVTableExt; use crate::vtable::NotSupported; use crate::vtable::VTable; +use crate::vtable::ValidityHelper; use crate::vtable::ValidityVTableFromValidityHelper; mod array; @@ -130,6 +134,18 @@ impl VTable for VarBinVTable { } Ok(()) } + + fn slice(array: &Self::Array, range: Range) -> VortexResult> { + Ok(Some(unsafe { + VarBinArray::new_unchecked( + array.offsets().slice(range.start..range.end + 1), + array.bytes().clone(), + array.dtype().clone(), + array.validity().slice(range), + ) + .into_array() + })) + } } #[derive(Debug)] diff --git a/vortex-array/src/arrays/varbin/vtable/operations.rs b/vortex-array/src/arrays/varbin/vtable/operations.rs index 0c7f4d1df1f..25cd1255222 100644 --- a/vortex-array/src/arrays/varbin/vtable/operations.rs +++ b/vortex-array/src/arrays/varbin/vtable/operations.rs @@ -1,32 +1,14 @@ // SPDX-License-Identifier: Apache-2.0 // SPDX-FileCopyrightText: Copyright the Vortex contributors -use std::ops::Range; - use vortex_scalar::Scalar; -use crate::Array; -use crate::ArrayRef; -use crate::IntoArray; use crate::arrays::VarBinArray; use crate::arrays::VarBinVTable; use crate::arrays::varbin_scalar; use crate::vtable::OperationsVTable; -use crate::vtable::ValidityHelper; impl OperationsVTable for VarBinVTable { - fn slice(array: &VarBinArray, range: Range) -> ArrayRef { - unsafe { - VarBinArray::new_unchecked( - array.offsets().slice(range.start..range.end + 1), - array.bytes().clone(), - array.dtype().clone(), - array.validity().slice(range), - ) - .into_array() - } - } - fn scalar_at(array: &VarBinArray, index: usize) -> Scalar { varbin_scalar(array.bytes_at(index), array.dtype()) } diff --git a/vortex-array/src/arrays/varbinview/vtable/mod.rs b/vortex-array/src/arrays/varbinview/vtable/mod.rs index bc5f086d38a..f30a00405b9 100644 --- a/vortex-array/src/arrays/varbinview/vtable/mod.rs +++ b/vortex-array/src/arrays/varbinview/vtable/mod.rs @@ -1,6 +1,7 @@ // SPDX-License-Identifier: Apache-2.0 // SPDX-FileCopyrightText: Copyright the Vortex contributors +use std::ops::Range; use std::sync::Arc; use vortex_buffer::Buffer; @@ -14,6 +15,7 @@ use vortex_vector::binaryview::BinaryView; use crate::ArrayRef; use crate::EmptyMetadata; +use crate::IntoArray; use crate::arrays::varbinview::VarBinViewArray; use crate::buffer::BufferHandle; use crate::serde::ArrayChildren; @@ -24,6 +26,7 @@ use crate::vtable::ArrayVTable; use crate::vtable::ArrayVTableExt; use crate::vtable::NotSupported; use crate::vtable::VTable; +use crate::vtable::ValidityHelper; use crate::vtable::ValidityVTableFromValidityHelper; mod array; @@ -121,4 +124,16 @@ impl VTable for VarBinViewVTable { } Ok(()) } + + fn slice(array: &Self::Array, range: Range) -> VortexResult> { + Ok(Some( + VarBinViewArray::new( + array.views().slice(range.clone()), + array.buffers().clone(), + array.dtype().clone(), + array.validity().slice(range), + ) + .into_array(), + )) + } } diff --git a/vortex-array/src/arrays/varbinview/vtable/operations.rs b/vortex-array/src/arrays/varbinview/vtable/operations.rs index 9794fd6f328..4eb88976376 100644 --- a/vortex-array/src/arrays/varbinview/vtable/operations.rs +++ b/vortex-array/src/arrays/varbinview/vtable/operations.rs @@ -1,31 +1,14 @@ // SPDX-License-Identifier: Apache-2.0 // SPDX-FileCopyrightText: Copyright the Vortex contributors -use std::ops::Range; - use vortex_scalar::Scalar; -use crate::ArrayRef; -use crate::IntoArray; use crate::arrays::VarBinViewArray; use crate::arrays::VarBinViewVTable; use crate::arrays::varbin_scalar; use crate::vtable::OperationsVTable; -use crate::vtable::ValidityHelper; impl OperationsVTable for VarBinViewVTable { - fn slice(array: &VarBinViewArray, range: Range) -> ArrayRef { - let views = array.views().slice(range.clone()); - - VarBinViewArray::new( - views, - array.buffers().clone(), - array.dtype().clone(), - array.validity().slice(range), - ) - .into_array() - } - fn scalar_at(array: &VarBinViewArray, index: usize) -> Scalar { varbin_scalar(array.bytes_at(index), array.dtype()) } diff --git a/vortex-array/src/arrow/array.rs b/vortex-array/src/arrow/array.rs index b724114fec3..a4fde270c9f 100644 --- a/vortex-array/src/arrow/array.rs +++ b/vortex-array/src/arrow/array.rs @@ -3,7 +3,6 @@ use std::fmt::Debug; use std::hash::Hash; -use std::ops::Range; use arrow_array::ArrayRef as ArrowArrayRef; use vortex_buffer::BitBuffer; @@ -158,16 +157,6 @@ impl CanonicalVTable for ArrowVTable { } impl OperationsVTable for ArrowVTable { - fn slice(array: &ArrowArray, range: Range) -> ArrayRef { - let inner = array.inner.slice(range.start, range.len()); - let new_array = ArrowArray { - inner, - dtype: array.dtype.clone(), - stats_set: Default::default(), - }; - new_array.into_array() - } - fn scalar_at(_array: &ArrowArray, _index: usize) -> Scalar { vortex_panic!("Not supported") } diff --git a/vortex-array/src/vtable/dyn_.rs b/vortex-array/src/vtable/dyn_.rs index 6958cccbffe..b586851622d 100644 --- a/vortex-array/src/vtable/dyn_.rs +++ b/vortex-array/src/vtable/dyn_.rs @@ -8,6 +8,7 @@ use std::fmt::Display; use std::fmt::Formatter; use std::hash::Hash; use std::hash::Hasher; +use std::ops::Range; use std::sync::Arc; use arcref::ArcRef; @@ -73,6 +74,8 @@ pub trait DynVTable: 'static + private::Sealed + Send + Sync + Debug { child_idx: usize, ctx: &mut ExecutionCtx, ) -> VortexResult>; + + fn slice(&self, array: &ArrayRef, range: Range) -> VortexResult>; } /// Adapter struct used to lift the [`VTable`] trait into an object-safe [`DynVTable`] @@ -247,6 +250,33 @@ impl DynVTable for ArrayVTableAdapter { Ok(Some(result)) } + + fn slice(&self, array: &ArrayRef, range: Range) -> VortexResult> { + vortex_ensure!( + range.end <= array.len(), + "slice range {}..{} out of bounds for array of length {}", + range.start, + range.end, + array.len() + ); + + let Some(sliced) = V::slice(downcast::(array), range.clone())? else { + return Ok(None); + }; + vortex_ensure!( + sliced.len() == range.len(), + "Sliced array length mismatch: expected {}, got {}", + range.len(), + sliced.len() + ); + vortex_ensure!( + sliced.dtype() == array.dtype(), + "Sliced array dtype mismatch: expected {}, got {}", + array.dtype(), + sliced.dtype() + ); + Ok(Some(sliced)) + } } fn downcast(array: &ArrayRef) -> &V::Array { @@ -311,6 +341,11 @@ impl ArrayVTable { ) -> VortexResult> { self.as_dyn().encode(input, like) } + + /// Slice the array using the VTable's slice implementation. + pub fn slice(&self, array: &ArrayRef, range: Range) -> VortexResult> { + self.as_dyn().slice(array, range) + } } impl PartialEq for ArrayVTable { diff --git a/vortex-array/src/vtable/mod.rs b/vortex-array/src/vtable/mod.rs index eccb4b73afe..d29e13a8779 100644 --- a/vortex-array/src/vtable/mod.rs +++ b/vortex-array/src/vtable/mod.rs @@ -14,6 +14,7 @@ mod visitor; use std::fmt::Debug; use std::ops::Deref; +use std::ops::Range; pub use array::*; pub use canonical::*; @@ -186,6 +187,22 @@ pub trait VTable: 'static + Sized + Send + Sync + Debug { _ = (array, parent, child_idx); Ok(None) } + + /// Perform a constant-time slice of the array. + /// + /// If an encoding cannot perform this slice in constant time, it should instead + /// wrap itself in the `SliceArray`. + /// + /// This function returns [`ArrayRef`] since some encodings can return a simpler array for + /// some slices, for example a [`crate::arrays::ChunkedArray`] may slice into a single chunk. + /// + /// ## Preconditions + /// + /// Bounds-checking has already been performed by the time this function is called. + fn slice(array: &Self::Array, range: Range) -> VortexResult> { + _ = (array, range); + Ok(None) + } } /// Placeholder type used to indicate when a particular vtable is not supported by the encoding. diff --git a/vortex-array/src/vtable/operations.rs b/vortex-array/src/vtable/operations.rs index 750a8487ac7..951ea7105c6 100644 --- a/vortex-array/src/vtable/operations.rs +++ b/vortex-array/src/vtable/operations.rs @@ -1,29 +1,13 @@ // SPDX-License-Identifier: Apache-2.0 // SPDX-FileCopyrightText: Copyright the Vortex contributors -use std::ops::Range; - use vortex_error::vortex_panic; use vortex_scalar::Scalar; -use crate::ArrayRef; use crate::vtable::NotSupported; use crate::vtable::VTable; pub trait OperationsVTable { - /// Perform a constant-time slice of the array. - /// - /// If an encoding cannot perform this slice in constant time, it should internally - /// store an offset and length in order to defer slicing until the array is accessed. - /// - /// This function returns [`ArrayRef`] since some encodings can return a simpler array for - /// some slices, for example a [`crate::arrays::ChunkedArray`] may slice into a single chunk. - /// - /// ## Preconditions - /// - /// Bounds-checking has already been performed by the time this function is called. - fn slice(array: &V::Array, range: Range) -> ArrayRef; - /// Fetch the scalar at the given index. /// /// ## Preconditions @@ -34,13 +18,6 @@ pub trait OperationsVTable { } impl OperationsVTable for NotSupported { - fn slice(array: &V::Array, _range: Range) -> ArrayRef { - vortex_panic!( - "Legacy slice operation is not supported for {} arrays", - array.encoding_id() - ) - } - fn scalar_at(array: &V::Array, _index: usize) -> Scalar { vortex_panic!( "Legacy scalar_at operation is not supported for {} arrays", diff --git a/vortex-mask/src/lib.rs b/vortex-mask/src/lib.rs index b82d47adc5c..1f87eb70f46 100644 --- a/vortex-mask/src/lib.rs +++ b/vortex-mask/src/lib.rs @@ -434,6 +434,7 @@ impl Mask { match &self { Self::AllTrue(_) => n, Self::AllFalse(_) => unreachable!("no true values in all-false mask"), + // TODO(joe): optimize this function Self::Values(values) => values.indices()[n], } } diff --git a/vortex-python/src/arrays/py/vtable.rs b/vortex-python/src/arrays/py/vtable.rs index 519533ed1a0..e3300077272 100644 --- a/vortex-python/src/arrays/py/vtable.rs +++ b/vortex-python/src/arrays/py/vtable.rs @@ -2,7 +2,6 @@ // SPDX-FileCopyrightText: Copyright the Vortex contributors use std::hash::Hash; -use std::ops::Range; use std::sync::Arc; use pyo3::Python; @@ -185,10 +184,6 @@ impl CanonicalVTable for PythonVTable { } impl OperationsVTable for PythonVTable { - fn slice(_array: &PythonArray, _range: Range) -> ArrayRef { - todo!() - } - fn scalar_at(_array: &PythonArray, _index: usize) -> Scalar { todo!() }