From 89878c13f4a276e31c956ad92d831199663553e2 Mon Sep 17 00:00:00 2001 From: Dimitar Dimitrov Date: Thu, 12 Feb 2026 12:42:57 +0000 Subject: [PATCH 01/15] fix(varbin): eagerly set IsSorted stat on builder offsets VarBinBuilder guarantees monotonically increasing offsets, but finish() wasn't setting the IsSorted stat. This caused an O(n) recomputation every time a deserialized VarBinArray was validated, which showed up as a hot path in production profiles. Signed-off-by: Dimitar Dimitrov --- vortex-array/src/arrays/varbin/builder.rs | 42 +++++++++++++++++++++++ 1 file changed, 42 insertions(+) diff --git a/vortex-array/src/arrays/varbin/builder.rs b/vortex-array/src/arrays/varbin/builder.rs index 2c81676be78..0e53349a891 100644 --- a/vortex-array/src/arrays/varbin/builder.rs +++ b/vortex-array/src/arrays/varbin/builder.rs @@ -11,6 +11,8 @@ use vortex_error::vortex_panic; use crate::IntoArray; use crate::arrays::primitive::PrimitiveArray; use crate::arrays::varbin::VarBinArray; +use crate::expr::stats::Precision; +use crate::expr::stats::Stat; use crate::validity::Validity; pub struct VarBinBuilder { @@ -94,6 +96,13 @@ impl VarBinBuilder { let validity = Validity::from_bit_buffer(nulls, dtype.nullability()); + // The builder guarantees offsets are monotonically increasing, so we can set + // this stat eagerly. This avoids an O(n) recomputation when the array is + // deserialized and VarBinArray::validate checks sortedness. + offsets + .statistics() + .set(Stat::IsSorted, Precision::Exact(true.into())); + // SAFETY: The builder maintains all invariants: // - Offsets are monotonically increasing starting from 0 (guaranteed by builder logic). // - Bytes buffer contains exactly the data referenced by offsets. @@ -109,9 +118,13 @@ impl VarBinBuilder { mod tests { use vortex_dtype::DType; use vortex_dtype::Nullability::Nullable; + use vortex_error::VortexResult; use vortex_scalar::Scalar; use crate::arrays::varbin::builder::VarBinBuilder; + use crate::expr::stats::Precision; + use crate::expr::stats::Stat; + use crate::expr::stats::StatsProviderExt; #[test] fn test_builder() { @@ -129,4 +142,33 @@ mod tests { ); assert!(array.scalar_at(1).unwrap().is_null()); } + + #[test] + fn offsets_have_is_sorted_stat() -> VortexResult<()> { + let mut builder = VarBinBuilder::::with_capacity(0); + builder.append_value(b"aaa"); + builder.append_null(); + builder.append_value(b"bbb"); + let array = builder.finish(DType::Utf8(Nullable)); + + let is_sorted = array + .offsets() + .statistics() + .with_typed_stats_set(|s| s.get_as::(Stat::IsSorted)); + assert_eq!(is_sorted, Some(Precision::Exact(true))); + Ok(()) + } + + #[test] + fn empty_builder_offsets_have_is_sorted_stat() -> VortexResult<()> { + let builder = VarBinBuilder::::new(); + let array = builder.finish(DType::Utf8(Nullable)); + + let is_sorted = array + .offsets() + .statistics() + .with_typed_stats_set(|s| s.get_as::(Stat::IsSorted)); + assert_eq!(is_sorted, Some(Precision::Exact(true))); + Ok(()) + } } From d37f12a946963cf073b402ff0e92d2e9651cc82a Mon Sep 17 00:00:00 2001 From: Dimitar Dimitrov Date: Thu, 12 Feb 2026 12:54:03 +0000 Subject: [PATCH 02/15] fix(varbin): add debug_assert for offset sortedness Signed-off-by: Dimitar Dimitrov --- vortex-array/src/arrays/varbin/builder.rs | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/vortex-array/src/arrays/varbin/builder.rs b/vortex-array/src/arrays/varbin/builder.rs index 0e53349a891..f57f815fa77 100644 --- a/vortex-array/src/arrays/varbin/builder.rs +++ b/vortex-array/src/arrays/varbin/builder.rs @@ -99,6 +99,10 @@ impl VarBinBuilder { // The builder guarantees offsets are monotonically increasing, so we can set // this stat eagerly. This avoids an O(n) recomputation when the array is // deserialized and VarBinArray::validate checks sortedness. + debug_assert!( + offsets.statistics().compute_is_sorted().unwrap_or(false), + "VarBinBuilder offsets must be sorted" + ); offsets .statistics() .set(Stat::IsSorted, Precision::Exact(true.into())); From 09c1d19999b87582d9a3364c7c5579dde09dcb9d Mon Sep 17 00:00:00 2001 From: Dimitar Dimitrov Date: Thu, 12 Feb 2026 17:13:59 +0000 Subject: [PATCH 03/15] fix(runend): validate ends are strictly sorted and cache stats RunEndArray::validate didn't check that ends are strictly increasing, which is required for binary search to work correctly. Also caches IsSorted and IsStrictSorted on the ends after construction so downstream consumers don't recompute it. Signed-off-by: Dimitar Dimitrov --- encodings/runend/src/array.rs | 57 +++++++++++++++++++++++++++++++++++ 1 file changed, 57 insertions(+) diff --git a/encodings/runend/src/array.rs b/encodings/runend/src/array.rs index 90f271cdc7d..3f37333fd7f 100644 --- a/encodings/runend/src/array.rs +++ b/encodings/runend/src/array.rs @@ -16,6 +16,8 @@ use vortex_array::ProstMetadata; use vortex_array::SerializeMetadata; use vortex_array::arrays::PrimitiveVTable; use vortex_array::buffer::BufferHandle; +use vortex_array::expr::stats::Precision as StatPrecision; +use vortex_array::expr::stats::Stat; use vortex_array::search_sorted::SearchSorted; use vortex_array::search_sorted::SearchSortedSide; use vortex_array::serde::ArrayChildren; @@ -215,6 +217,11 @@ impl RunEndArray { return Ok(()); } + // Run ends must be strictly sorted for binary search to work correctly. + if let Some(is_strict_sorted) = ends.statistics().compute_is_strict_sorted() { + vortex_ensure!(is_strict_sorted, "run ends must be strictly sorted"); + } + // Skip host-only validation when ends are not host-resident. if !ends.is_host() { return Ok(()); @@ -319,6 +326,13 @@ impl RunEndArray { ) -> VortexResult { Self::validate(&ends, &values, offset, length)?; + // Run ends are always strictly sorted (and therefore sorted) by invariant. + // Cache this so downstream consumers don't need to recompute it. + ends.statistics() + .set(Stat::IsStrictSorted, StatPrecision::Exact(true.into())); + ends.statistics() + .set(Stat::IsSorted, StatPrecision::Exact(true.into())); + Ok(Self { ends, values, @@ -485,10 +499,14 @@ pub(super) fn run_end_canonicalize( mod tests { use vortex_array::IntoArray; use vortex_array::assert_arrays_eq; + use vortex_array::expr::stats::Precision as StatPrecision; + use vortex_array::expr::stats::Stat; + use vortex_array::expr::stats::StatsProviderExt; use vortex_buffer::buffer; use vortex_dtype::DType; use vortex_dtype::Nullability; use vortex_dtype::PType; + use vortex_error::VortexResult; use crate::RunEndArray; @@ -510,4 +528,43 @@ mod tests { let expected = buffer![1, 1, 2, 2, 2, 3, 3, 3, 3, 3].into_array(); assert_arrays_eq!(arr.to_array(), expected); } + + #[test] + fn unsorted_ends_rejected() { + let result = RunEndArray::try_new( + buffer![5u32, 2, 10].into_array(), + buffer![1i32, 2, 3].into_array(), + ); + assert!(result.is_err()); + } + + #[test] + fn duplicate_ends_rejected() { + let result = RunEndArray::try_new( + buffer![2u32, 2, 10].into_array(), + buffer![1i32, 2, 3].into_array(), + ); + assert!(result.is_err()); + } + + #[test] + fn ends_have_sorted_stats() -> VortexResult<()> { + let arr = RunEndArray::new( + buffer![2u32, 5, 10].into_array(), + buffer![1i32, 2, 3].into_array(), + ); + + let is_strict_sorted = arr + .ends() + .statistics() + .with_typed_stats_set(|s| s.get_as::(Stat::IsStrictSorted)); + assert_eq!(is_strict_sorted, Some(StatPrecision::Exact(true))); + + let is_sorted = arr + .ends() + .statistics() + .with_typed_stats_set(|s| s.get_as::(Stat::IsSorted)); + assert_eq!(is_sorted, Some(StatPrecision::Exact(true))); + Ok(()) + } } From c3c67b49b1dfbe86541ef50f78ad4fc41e820553 Mon Sep 17 00:00:00 2001 From: Dimitar Dimitrov Date: Thu, 12 Feb 2026 17:21:06 +0000 Subject: [PATCH 04/15] fix(sequence): eagerly set IsSorted and IsStrictSorted stats SequenceArray (A[i] = base + i * multiplier) can determine sortedness from the sign of the multiplier at construction time. Set both stats eagerly so we skip the kernel entirely. Signed-off-by: Dimitar Dimitrov --- encodings/sequence/src/array.rs | 73 ++++++++++++++++++++++++++++++++- 1 file changed, 71 insertions(+), 2 deletions(-) diff --git a/encodings/sequence/src/array.rs b/encodings/sequence/src/array.rs index 074c7f3b99b..463ef7dacaf 100644 --- a/encodings/sequence/src/array.rs +++ b/encodings/sequence/src/array.rs @@ -4,6 +4,7 @@ use std::hash::Hash; use num_traits::cast::FromPrimitive; +use num_traits::zero; use vortex_array::ArrayBufferVisitor; use vortex_array::ArrayChildVisitor; use vortex_array::ArrayRef; @@ -15,6 +16,8 @@ use vortex_array::ProstMetadata; use vortex_array::SerializeMetadata; use vortex_array::arrays::PrimitiveArray; use vortex_array::buffer::BufferHandle; +use vortex_array::expr::stats::Precision as StatPrecision; +use vortex_array::expr::stats::Stat; use vortex_array::serde::ArrayChildren; use vortex_array::stats::ArrayStats; use vortex_array::stats::StatsSetRef; @@ -127,13 +130,27 @@ impl SequenceArray { length: usize, ) -> Self { let dtype = DType::Primitive(ptype, nullability); + + // A sequence A[i] = base + i * multiplier is sorted iff multiplier >= 0, + // and strictly sorted iff multiplier > 0. + let (is_sorted, is_strict_sorted) = match_each_native_ptype!(ptype, |P| { + let m = multiplier.cast::

(); + (m >= zero::

(), m > zero::

()) + }); + + let stats_set = ArrayStats::default(); + stats_set.set(Stat::IsSorted, StatPrecision::Exact(is_sorted.into())); + stats_set.set( + Stat::IsStrictSorted, + StatPrecision::Exact(is_strict_sorted.into()), + ); + Self { base, multiplier, dtype, len: length, - // TODO(joe): add stats, on construct or on use? - stats_set: Default::default(), + stats_set, } } @@ -393,7 +410,11 @@ impl SequenceVTable { mod tests { use vortex_array::arrays::PrimitiveArray; use vortex_array::assert_arrays_eq; + use vortex_array::expr::stats::Precision as StatPrecision; + use vortex_array::expr::stats::Stat; + use vortex_array::expr::stats::StatsProviderExt; use vortex_dtype::Nullability; + use vortex_error::VortexResult; use vortex_scalar::Scalar; use vortex_scalar::ScalarValue; @@ -444,4 +465,52 @@ mod tests { assert!(SequenceArray::typed_new(127i8, 1i8, Nullability::NonNullable, 2).is_err()); assert!(SequenceArray::typed_new(-128i8, -1i8, Nullability::NonNullable, 2).is_err()); } + + #[test] + fn positive_multiplier_is_strict_sorted() -> VortexResult<()> { + let arr = SequenceArray::typed_new(0i64, 3, Nullability::NonNullable, 4)?; + + let is_sorted = arr + .statistics() + .with_typed_stats_set(|s| s.get_as::(Stat::IsSorted)); + assert_eq!(is_sorted, Some(StatPrecision::Exact(true))); + + let is_strict_sorted = arr + .statistics() + .with_typed_stats_set(|s| s.get_as::(Stat::IsStrictSorted)); + assert_eq!(is_strict_sorted, Some(StatPrecision::Exact(true))); + Ok(()) + } + + #[test] + fn zero_multiplier_is_sorted_not_strict() -> VortexResult<()> { + let arr = SequenceArray::typed_new(5i64, 0, Nullability::NonNullable, 4)?; + + let is_sorted = arr + .statistics() + .with_typed_stats_set(|s| s.get_as::(Stat::IsSorted)); + assert_eq!(is_sorted, Some(StatPrecision::Exact(true))); + + let is_strict_sorted = arr + .statistics() + .with_typed_stats_set(|s| s.get_as::(Stat::IsStrictSorted)); + assert_eq!(is_strict_sorted, Some(StatPrecision::Exact(false))); + Ok(()) + } + + #[test] + fn negative_multiplier_not_sorted() -> VortexResult<()> { + let arr = SequenceArray::typed_new(10i64, -1, Nullability::NonNullable, 4)?; + + let is_sorted = arr + .statistics() + .with_typed_stats_set(|s| s.get_as::(Stat::IsSorted)); + assert_eq!(is_sorted, Some(StatPrecision::Exact(false))); + + let is_strict_sorted = arr + .statistics() + .with_typed_stats_set(|s| s.get_as::(Stat::IsStrictSorted)); + assert_eq!(is_strict_sorted, Some(StatPrecision::Exact(false))); + Ok(()) + } } From 6cbf9b09465d2a4a28873c47a4af625e058ea2eb Mon Sep 17 00:00:00 2001 From: Dimitar Dimitrov Date: Fri, 13 Feb 2026 11:45:16 +0000 Subject: [PATCH 05/15] Clarify sortedness promises --- vortex-array/src/expr/stats/mod.rs | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/vortex-array/src/expr/stats/mod.rs b/vortex-array/src/expr/stats/mod.rs index cba33e2743c..1eb8414feeb 100644 --- a/vortex-array/src/expr/stats/mod.rs +++ b/vortex-array/src/expr/stats/mod.rs @@ -44,9 +44,11 @@ pub enum Stat { /// Whether all values are the same (nulls are not equal to other non-null values, /// so this is true iff all values are null or all values are the same non-null value) IsConstant = 0, - /// Whether the non-null values in the array are sorted (i.e., we skip nulls) + /// Whether the non-null values in the array are sorted in ascending order (i.e., we skip nulls) + /// This may later be extended to support descending order, but for now we only support ascending order. IsSorted = 1, - /// Whether the non-null values in the array are strictly sorted (i.e., sorted with no duplicates) + /// Whether the non-null values in the array are strictly sorted in ascending order (i.e., sorted with no duplicates) + /// This may later be extended to support descending order, but for now we only support ascending order. IsStrictSorted = 2, /// The maximum value in the array (ignoring nulls, unless all values are null) Max = 3, From c5fbeb7279b09f96cfc0559825a5458c81ce4693 Mon Sep 17 00:00:00 2001 From: Dimitar Dimitrov Date: Fri, 13 Feb 2026 11:45:41 +0000 Subject: [PATCH 06/15] Move runend sortedness checks to debug_assert --- encodings/runend/src/array.rs | 18 ++++++++++++++---- 1 file changed, 14 insertions(+), 4 deletions(-) diff --git a/encodings/runend/src/array.rs b/encodings/runend/src/array.rs index 3f37333fd7f..5323050f38e 100644 --- a/encodings/runend/src/array.rs +++ b/encodings/runend/src/array.rs @@ -217,10 +217,20 @@ impl RunEndArray { return Ok(()); } - // Run ends must be strictly sorted for binary search to work correctly. - if let Some(is_strict_sorted) = ends.statistics().compute_is_strict_sorted() { - vortex_ensure!(is_strict_sorted, "run ends must be strictly sorted"); - } + debug_assert!({ + // Run ends must be strictly sorted for binary search to work correctly. + let pre_validation = ends.statistics().to_owned().clone(); + + let is_sorted = ends + .statistics() + .compute_is_strict_sorted() + .unwrap_or(false); + + // Preserve the original statistics since compute_is_strict_sorted may have mutated them. + // We don't want to run with different stats in debug mode and outside. + ends.statistics().inherit(pre_validation.iter()); + is_sorted + }); // Skip host-only validation when ends are not host-resident. if !ends.is_host() { From 1cb26606b729de95c5be5147afdb48438e93426a Mon Sep 17 00:00:00 2001 From: Dimitar Dimitrov Date: Fri, 13 Feb 2026 11:46:38 +0000 Subject: [PATCH 07/15] Avoid synchronised set's for SequenceArray::unchecked_new --- encodings/sequence/src/array.rs | 33 +++++++++++++++++---------------- 1 file changed, 17 insertions(+), 16 deletions(-) diff --git a/encodings/sequence/src/array.rs b/encodings/sequence/src/array.rs index 463ef7dacaf..9ff53560053 100644 --- a/encodings/sequence/src/array.rs +++ b/encodings/sequence/src/array.rs @@ -1,10 +1,8 @@ // SPDX-License-Identifier: Apache-2.0 // SPDX-FileCopyrightText: Copyright the Vortex contributors -use std::hash::Hash; - use num_traits::cast::FromPrimitive; -use num_traits::zero; +use std::hash::Hash; use vortex_array::ArrayBufferVisitor; use vortex_array::ArrayChildVisitor; use vortex_array::ArrayRef; @@ -19,8 +17,8 @@ use vortex_array::buffer::BufferHandle; use vortex_array::expr::stats::Precision as StatPrecision; use vortex_array::expr::stats::Stat; use vortex_array::serde::ArrayChildren; -use vortex_array::stats::ArrayStats; use vortex_array::stats::StatsSetRef; +use vortex_array::stats::{ArrayStats, StatsSet}; use vortex_array::validity::Validity; use vortex_array::vtable; use vortex_array::vtable::ArrayId; @@ -133,24 +131,27 @@ impl SequenceArray { // A sequence A[i] = base + i * multiplier is sorted iff multiplier >= 0, // and strictly sorted iff multiplier > 0. - let (is_sorted, is_strict_sorted) = match_each_native_ptype!(ptype, |P| { - let m = multiplier.cast::

(); - (m >= zero::

(), m > zero::

()) - }); - - let stats_set = ArrayStats::default(); - stats_set.set(Stat::IsSorted, StatPrecision::Exact(is_sorted.into())); - stats_set.set( - Stat::IsStrictSorted, - StatPrecision::Exact(is_strict_sorted.into()), - ); + let m_int = multiplier.cast::(); + let is_sorted = m_int >= 0; + let is_strict_sorted = m_int > 0; + + // SAFETY: we don't have duplicate stats + let stats_set = unsafe { + StatsSet::new_unchecked(vec![ + (Stat::IsSorted, StatPrecision::Exact(is_sorted.into())), + ( + Stat::IsStrictSorted, + StatPrecision::Exact(is_strict_sorted.into()), + ), + ]) + }; Self { base, multiplier, dtype, len: length, - stats_set, + stats_set: ArrayStats::from(stats_set), } } From e50a078d5e4792be6ae81358ca42846087153c5b Mon Sep 17 00:00:00 2001 From: Dimitar Dimitrov Date: Fri, 13 Feb 2026 11:47:24 +0000 Subject: [PATCH 08/15] Remove sortedness check for VarBinArray --- vortex-array/src/arrays/varbin/array.rs | 5 ----- 1 file changed, 5 deletions(-) diff --git a/vortex-array/src/arrays/varbin/array.rs b/vortex-array/src/arrays/varbin/array.rs index c28e59db115..23e6442d59c 100644 --- a/vortex-array/src/arrays/varbin/array.rs +++ b/vortex-array/src/arrays/varbin/array.rs @@ -200,11 +200,6 @@ impl VarBinArray { "Offsets must have at least one element" ); - // Check offsets are sorted - if let Some(is_sorted) = offsets.statistics().compute_is_sorted() { - vortex_ensure!(is_sorted, "offsets must be sorted"); - } - // Skip host-only validation when offsets/bytes are not host-resident. if offsets.is_host() && bytes.is_on_host() { let last_offset = offsets From 8cd19f45c157a551efae8a4683609a8071e7de15 Mon Sep 17 00:00:00 2001 From: Dimitar Dimitrov Date: Fri, 13 Feb 2026 12:21:58 +0000 Subject: [PATCH 09/15] Remove assertions --- encodings/runend/src/array.rs | 18 ------------------ 1 file changed, 18 deletions(-) diff --git a/encodings/runend/src/array.rs b/encodings/runend/src/array.rs index 5323050f38e..407867bede3 100644 --- a/encodings/runend/src/array.rs +++ b/encodings/runend/src/array.rs @@ -539,24 +539,6 @@ mod tests { assert_arrays_eq!(arr.to_array(), expected); } - #[test] - fn unsorted_ends_rejected() { - let result = RunEndArray::try_new( - buffer![5u32, 2, 10].into_array(), - buffer![1i32, 2, 3].into_array(), - ); - assert!(result.is_err()); - } - - #[test] - fn duplicate_ends_rejected() { - let result = RunEndArray::try_new( - buffer![2u32, 2, 10].into_array(), - buffer![1i32, 2, 3].into_array(), - ); - assert!(result.is_err()); - } - #[test] fn ends_have_sorted_stats() -> VortexResult<()> { let arr = RunEndArray::new( From 712c98c8e82395aa00d12a6ba57f68163e95c086 Mon Sep 17 00:00:00 2001 From: Dimitar Dimitrov Date: Fri, 13 Feb 2026 12:28:45 +0000 Subject: [PATCH 10/15] fix: remove redundant clone in runend debug_assert Signed-off-by: Dimitar Dimitrov --- encodings/runend/src/array.rs | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/encodings/runend/src/array.rs b/encodings/runend/src/array.rs index 407867bede3..efbc53d3a30 100644 --- a/encodings/runend/src/array.rs +++ b/encodings/runend/src/array.rs @@ -219,7 +219,7 @@ impl RunEndArray { debug_assert!({ // Run ends must be strictly sorted for binary search to work correctly. - let pre_validation = ends.statistics().to_owned().clone(); + let pre_validation = ends.statistics().to_owned(); let is_sorted = ends .statistics() From 1f4d11e3b22c6387f7257b3c4d67e9735b541037 Mon Sep 17 00:00:00 2001 From: Dimitar Dimitrov Date: Fri, 13 Feb 2026 12:33:26 +0000 Subject: [PATCH 11/15] DCO Remediation Commit for Dimitar Dimitrov I, Dimitar Dimitrov , hereby add my Signed-off-by to this commit: 144540635b3f2dacf28e13d27a21a46e64ce27bf I, Dimitar Dimitrov , hereby add my Signed-off-by to this commit: b2b7817f4b1dd1bb4296a215dc808a7ed090cd3e I, Dimitar Dimitrov , hereby add my Signed-off-by to this commit: 5c932f3a89afb3fcd5d7640d0be5d7bb96b6a536 I, Dimitar Dimitrov , hereby add my Signed-off-by to this commit: 2f5b11b852383d8931b7b3baa253116e44e2602a I, Dimitar Dimitrov , hereby add my Signed-off-by to this commit: 8ae944c0a736cd7f08af23e6defb2386db558ec5 Signed-off-by: Dimitar Dimitrov From ab697f700a220703b7aefc3af71576619067648f Mon Sep 17 00:00:00 2001 From: Dimitar Dimitrov Date: Fri, 13 Feb 2026 12:37:12 +0000 Subject: [PATCH 12/15] chore: cargo fmt Signed-off-by: Dimitar Dimitrov --- encodings/sequence/src/array.rs | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/encodings/sequence/src/array.rs b/encodings/sequence/src/array.rs index 9ff53560053..efa8a2c12b1 100644 --- a/encodings/sequence/src/array.rs +++ b/encodings/sequence/src/array.rs @@ -1,8 +1,9 @@ // SPDX-License-Identifier: Apache-2.0 // SPDX-FileCopyrightText: Copyright the Vortex contributors -use num_traits::cast::FromPrimitive; use std::hash::Hash; + +use num_traits::cast::FromPrimitive; use vortex_array::ArrayBufferVisitor; use vortex_array::ArrayChildVisitor; use vortex_array::ArrayRef; @@ -17,8 +18,9 @@ use vortex_array::buffer::BufferHandle; use vortex_array::expr::stats::Precision as StatPrecision; use vortex_array::expr::stats::Stat; use vortex_array::serde::ArrayChildren; +use vortex_array::stats::ArrayStats; +use vortex_array::stats::StatsSet; use vortex_array::stats::StatsSetRef; -use vortex_array::stats::{ArrayStats, StatsSet}; use vortex_array::validity::Validity; use vortex_array::vtable; use vortex_array::vtable::ArrayId; From 929e818c45f542db0a6df78398c781fe4b000c32 Mon Sep 17 00:00:00 2001 From: Dimitar Dimitrov Date: Fri, 13 Feb 2026 12:43:33 +0000 Subject: [PATCH 13/15] fix: update varbin validation test to match removed monotonicity check The reviewer removed the offset monotonicity validation from VarBinArray::try_new, so the test now expects construction to succeed. Signed-off-by: Dimitar Dimitrov --- vortex-array/src/arrays/validation_tests.rs | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/vortex-array/src/arrays/validation_tests.rs b/vortex-array/src/arrays/validation_tests.rs index 5d539c8e6d3..31906dd27f2 100644 --- a/vortex-array/src/arrays/validation_tests.rs +++ b/vortex-array/src/arrays/validation_tests.rs @@ -98,8 +98,9 @@ mod tests { } #[test] - fn test_varbin_array_validation_failure_offsets_not_monotonic() { - // Invalid case: offsets are not monotonically increasing. + fn test_varbin_array_validation_non_monotonic_offsets_accepted() { + // VarBin does not validate monotonicity of offsets at construction time. + // Sortedness is enforced at the builder level instead. let offsets = buffer![0i32, 3, 2, 5].into_array(); // 3 -> 2 is decreasing. let bytes = ByteBuffer::from(vec![0u8, 1, 2, 3, 4]); let result = VarBinArray::try_new( @@ -109,8 +110,7 @@ mod tests { Validity::NonNullable, ); - assert!(matches!(result, Err(VortexError::InvalidArgument(_, _)))); - assert!(result.is_err()); + assert!(result.is_ok()); } #[test] From 6cd097d6887263b6949ca43242b4bfbc346198dd Mon Sep 17 00:00:00 2001 From: Dimitar Dimitrov Date: Fri, 13 Feb 2026 15:25:40 +0000 Subject: [PATCH 14/15] Remove sortedness checking Signed-off-by: Dimitar Dimitrov --- encodings/runend/src/array.rs | 34 ---------------------------------- 1 file changed, 34 deletions(-) diff --git a/encodings/runend/src/array.rs b/encodings/runend/src/array.rs index efbc53d3a30..8449431dac3 100644 --- a/encodings/runend/src/array.rs +++ b/encodings/runend/src/array.rs @@ -16,8 +16,6 @@ use vortex_array::ProstMetadata; use vortex_array::SerializeMetadata; use vortex_array::arrays::PrimitiveVTable; use vortex_array::buffer::BufferHandle; -use vortex_array::expr::stats::Precision as StatPrecision; -use vortex_array::expr::stats::Stat; use vortex_array::search_sorted::SearchSorted; use vortex_array::search_sorted::SearchSortedSide; use vortex_array::serde::ArrayChildren; @@ -336,13 +334,6 @@ impl RunEndArray { ) -> VortexResult { Self::validate(&ends, &values, offset, length)?; - // Run ends are always strictly sorted (and therefore sorted) by invariant. - // Cache this so downstream consumers don't need to recompute it. - ends.statistics() - .set(Stat::IsStrictSorted, StatPrecision::Exact(true.into())); - ends.statistics() - .set(Stat::IsSorted, StatPrecision::Exact(true.into())); - Ok(Self { ends, values, @@ -509,14 +500,10 @@ pub(super) fn run_end_canonicalize( mod tests { use vortex_array::IntoArray; use vortex_array::assert_arrays_eq; - use vortex_array::expr::stats::Precision as StatPrecision; - use vortex_array::expr::stats::Stat; - use vortex_array::expr::stats::StatsProviderExt; use vortex_buffer::buffer; use vortex_dtype::DType; use vortex_dtype::Nullability; use vortex_dtype::PType; - use vortex_error::VortexResult; use crate::RunEndArray; @@ -538,25 +525,4 @@ mod tests { let expected = buffer![1, 1, 2, 2, 2, 3, 3, 3, 3, 3].into_array(); assert_arrays_eq!(arr.to_array(), expected); } - - #[test] - fn ends_have_sorted_stats() -> VortexResult<()> { - let arr = RunEndArray::new( - buffer![2u32, 5, 10].into_array(), - buffer![1i32, 2, 3].into_array(), - ); - - let is_strict_sorted = arr - .ends() - .statistics() - .with_typed_stats_set(|s| s.get_as::(Stat::IsStrictSorted)); - assert_eq!(is_strict_sorted, Some(StatPrecision::Exact(true))); - - let is_sorted = arr - .ends() - .statistics() - .with_typed_stats_set(|s| s.get_as::(Stat::IsSorted)); - assert_eq!(is_sorted, Some(StatPrecision::Exact(true))); - Ok(()) - } } From 92faa3f8f5641f1e0793f8fe490ce2819a4eef67 Mon Sep 17 00:00:00 2001 From: Dimitar Dimitrov Date: Fri, 13 Feb 2026 18:11:27 +0000 Subject: [PATCH 15/15] DCO Remediation Commit for Dimitar Dimitrov I, Dimitar Dimitrov , hereby add my Signed-off-by to this commit: 6cbf9b09465d2a4a28873c47a4af625e058ea2eb I, Dimitar Dimitrov , hereby add my Signed-off-by to this commit: c5fbeb7279b09f96cfc0559825a5458c81ce4693 I, Dimitar Dimitrov , hereby add my Signed-off-by to this commit: 1cb26606b729de95c5be5147afdb48438e93426a I, Dimitar Dimitrov , hereby add my Signed-off-by to this commit: e50a078d5e4792be6ae81358ca42846087153c5b I, Dimitar Dimitrov , hereby add my Signed-off-by to this commit: 8cd19f45c157a551efae8a4683609a8071e7de15 Signed-off-by: Dimitar Dimitrov