Skip to content
112 changes: 112 additions & 0 deletions vortex-array/src/arrays/struct_/array.rs
Original file line number Diff line number Diff line change
Expand Up @@ -13,6 +13,7 @@ use vortex_error::VortexExpect;
use vortex_error::VortexResult;
use vortex_error::vortex_bail;
use vortex_error::vortex_err;
use vortex_mask::Mask;

use crate::Array;
use crate::ArrayRef;
Expand Down Expand Up @@ -451,4 +452,115 @@ impl StructArray {

Self::try_new_with_dtype(children, new_fields, self.len, self.validity.clone())
}

/// Push the struct-level validity into the children fields.
///
/// This method takes the top-level validity of the struct array and applies it to each child field
/// using a mask operation. The resulting struct array will have the struct-level nulls propagated
/// down to the individual fields.
///
/// # Parameters
///
/// * `preserve_struct_validity` - If `Some(true)`, the new struct array retains the original struct-level
/// validity. If `Some(false)` or `None`, the new struct array has `Validity::AllValid` since all null
/// information is now contained within the individual fields. Defaults to `false` when `None`.
///
/// # Returns
///
/// A new `StructArray` where each child field has been masked with the struct's validity.
///
/// # Examples
///
/// ```
/// use vortex_array::arrays::StructArray;
/// use vortex_array::validity::Validity;
/// use vortex_array::IntoArray;
/// use vortex_buffer::buffer;
///
/// // Create struct with top-level nulls
/// let struct_array = StructArray::try_new(
/// ["a", "b"].into(),
/// vec![
/// buffer![1i32, 2i32, 3i32].into_array(),
/// buffer![10i32, 20i32, 30i32].into_array(),
/// ],
/// 3,
/// Validity::from_iter([true, false, true]), // row 1 is null
/// ).unwrap();
///
/// // Push validity into children, preserving struct validity
/// let pushed = struct_array.push_validity_into_children(true).unwrap();
/// // pushed.fields()[0] now has nulls at position 1
/// // pushed.fields()[1] now has nulls at position 1
/// // pushed.validity still shows row 1 as null
///
/// // Push validity into children, removing struct validity (default behavior)
/// let pushed_no_struct = struct_array.push_validity_into_children_default().unwrap();
/// // or explicitly:
/// let pushed_no_struct = struct_array.push_validity_into_children(false).unwrap();
/// // pushed_no_struct.fields()[0] now has nulls at position 1
/// // pushed_no_struct.fields()[1] now has nulls at position 1
/// // pushed_no_struct.validity is AllValid
/// ```
/// Push validity into children with default behavior (preserve_struct_validity = false).
///
/// This is equivalent to calling `push_validity_into_children(None)` or
/// `push_validity_into_children(Some(false))`.
pub fn push_validity_into_children_default(&self) -> VortexResult<Self> {
self.push_validity_into_children(false)
}


pub fn push_validity_into_children(&self, preserve_struct_validity: bool) -> VortexResult<Self> {
use crate::compute::mask;

// Get the struct-level validity mask
let struct_validity_mask = self.validity_mask();

// If the struct has no nulls, return a clone
if struct_validity_mask.all_true() {
return if preserve_struct_validity {
Ok(self.clone())
} else {
// Remove struct validity if requested
Self::try_new(
self.names().clone(),
self.fields().clone(),
self.len(),
Validity::AllValid,
)
};
}

// Apply the struct validity mask to each child field
// We want to set nulls where the struct is null (i.e., where struct_validity_mask is false)
// So we need to invert the mask: where struct is invalid, set child to invalid
let null_mask = struct_validity_mask.iter_bools(|iter| {
Mask::from_iter(iter.map(|valid| !valid)) // invert: valid->invalid, invalid->valid
});

let masked_fields: Vec<ArrayRef> = self
.fields()
.iter()
.map(|field| {
// Use the mask function to apply null positions to each field
mask(field.as_ref(), &null_mask)
})
.collect::<VortexResult<Vec<_>>>()?;

// Determine the new struct validity (default to false = remove struct validity)
let new_struct_validity = if preserve_struct_validity {
self.validity.clone()
} else {
Validity::AllValid
};

// Construct the new struct array
Self::try_new(
self.names().clone(),
masked_fields,
self.len(),
new_struct_validity,
)
}
}
177 changes: 177 additions & 0 deletions vortex-array/src/arrays/struct_/tests.rs
Original file line number Diff line number Diff line change
Expand Up @@ -7,6 +7,7 @@ use vortex_dtype::FieldName;
use vortex_dtype::FieldNames;
use vortex_dtype::Nullability;
use vortex_dtype::PType;
use vortex_scalar::Scalar;

use crate::Array;
use crate::IntoArray;
Expand Down Expand Up @@ -150,3 +151,179 @@ fn test_uncompressed_size_in_bytes() {
assert_eq!(canonical_size, 2);
assert_eq!(uncompressed_size, Some(4000));
}

#[test]
fn test_push_validity_into_children_preserve_struct() {
// Create struct with top-level nulls
// structArray : [a, b]
// fields: [1, 2, 3] (a), [10, 20, 30] (b)
// validity: [true, false, true]
// row 1 is null at struct level
let struct_array = StructArray::try_new(
["a", "b"].into(),
vec![
buffer![1i32, 2i32, 3i32].into_array(),
buffer![10i32, 20i32, 30i32].into_array(),
],
3,
Validity::from_iter([true, false, true]), // row 1 is null at struct level
)
.unwrap();

// Push validity into children, preserving struct validity
let pushed = struct_array.push_validity_into_children(true).unwrap();

// Check that struct validity is preserved
assert_eq!(pushed.validity_mask(), struct_array.validity_mask());

// Check that children now have nulls where struct was null
let field_a = pushed.fields()[0].as_ref();
let field_b = pushed.fields()[1].as_ref();


assert!(field_a.is_valid(0));
assert!(!field_a.is_valid(1)); // Should be null due to struct null
assert!(field_a.is_valid(2));

assert!(field_b.is_valid(0));
assert!(!field_b.is_valid(1)); // Should be null due to struct null
assert!(field_b.is_valid(2));


// Original values should be preserved where valid
assert_eq!(field_a.scalar_at(0), 1i32.into());
assert_eq!(field_a.scalar_at(2), 3i32.into());
assert_eq!(field_b.scalar_at(0), 10i32.into());
assert_eq!(field_b.scalar_at(2), 30i32.into());


// Verify pushed struct array values (preserve_struct_validity = true)
assert!(pushed.is_valid(0)); // Row 0 should be valid
assert!(!pushed.is_valid(1)); // Row 1 should be null (preserved)
assert!(pushed.is_valid(2)); // Row 2 should be valid

// Row 0: {a: 1, b: 10} - should be valid struct with valid fields
let row0 = pushed.scalar_at(0);
assert!(row0.is_valid());

// Row 1: null - should be null struct (preserved from original)
let row1 = pushed.scalar_at(1);
assert!(!row1.is_valid());

// Row 2: {a: 3, b: 30} - should be valid struct with valid fields
let row2 = pushed.scalar_at(2);
assert!(row2.is_valid());

}

#[test]
fn test_push_validity_into_children_remove_struct() {

// Create struct with top-level nulls
let struct_array = StructArray::try_new(
["a", "b"].into(),
vec![
buffer![1i32, 2i32, 3i32].into_array(),
buffer![10i32, 20i32, 30i32].into_array(),
],
3,
Validity::from_iter([true, false, true]), // row 1 is null at struct level
)
.unwrap();


// Push validity into children, removing struct validity when default behavior is used (preserve_struct_validity = false)
let pushed = struct_array.push_validity_into_children_default().unwrap();


// Check that struct validity is now AllValid
assert!(pushed.validity_mask().all_true());

// Check that children still have nulls where struct was null
let field_a = pushed.fields()[0].as_ref();
let field_b = pushed.fields()[1].as_ref();


assert!(field_a.is_valid(0));
assert!(!field_a.is_valid(1)); // Should be null due to struct null
assert!(field_a.is_valid(2));

assert!(field_b.is_valid(0));
assert!(!field_b.is_valid(1)); // Should be null due to struct null
assert!(field_b.is_valid(2));

// Original values should be preserved where valid
assert_eq!(field_a.scalar_at(0), 1i32.into());
assert_eq!(field_a.scalar_at(2), 3i32.into());
assert_eq!(field_b.scalar_at(0), 10i32.into());
assert_eq!(field_b.scalar_at(2), 30i32.into());

// Verify null values using proper null scalar comparison
use vortex_dtype::{DType, Nullability, PType};
let null_i32_scalar = Scalar::null(DType::Primitive(PType::I32, Nullability::Nullable));
assert_eq!(field_a.scalar_at(1), null_i32_scalar);
assert_eq!(field_b.scalar_at(1), null_i32_scalar);

// Alternative: check if the scalar is null
assert!(!field_a.scalar_at(1).is_valid());
assert!(!field_b.scalar_at(1).is_valid());

// Verify pushed struct array values (preserve_struct_validity = false)
assert!(pushed.is_valid(0)); // Row 0 should be valid
assert!(pushed.is_valid(1)); // Row 1 should be valid (validity removed)
assert!(pushed.is_valid(2)); // Row 2 should be valid

// Row 0: {a: 1, b: 10} - should be valid struct with valid fields
let row0 = pushed.scalar_at(0);
assert!(row0.is_valid());

// Row 1: {a: null, b: null} - should be valid struct but with null fields
let row1 = pushed.scalar_at(1);
assert!(row1.is_valid()); // Struct is valid, but fields are null

// Row 2: {a: 3, b: 30} - should be valid struct with valid fields
let row2 = pushed.scalar_at(2);
assert!(row2.is_valid());

}

#[test]
fn test_push_validity_into_children_no_nulls() {
// Create struct without any nulls
let struct_array = StructArray::try_new(
["a", "b"].into(),
vec![
buffer![1i32, 2i32, 3i32].into_array(),
buffer![10i32, 20i32, 30i32].into_array(),
],
3,
Validity::AllValid,
)
.unwrap();


// Push validity into children (should be no-op when preserve=true)
let pushed_preserve = struct_array.push_validity_into_children(true).unwrap();
assert_eq!(pushed_preserve.validity_mask(), struct_array.validity_mask());

// Push validity into children (should change validity to AllValid when preserve=false)
let pushed_remove = struct_array.push_validity_into_children(false).unwrap();
assert!(pushed_remove.validity_mask().all_true());

// Fields should remain unchanged
for i in 0..struct_array.fields().len() {
assert_eq!(
pushed_preserve.fields()[i].scalar_at(0),
struct_array.fields()[i].scalar_at(0)
);
assert_eq!(
pushed_preserve.fields()[i].scalar_at(1),
struct_array.fields()[i].scalar_at(1)
);
assert_eq!(
pushed_preserve.fields()[i].scalar_at(2),
struct_array.fields()[i].scalar_at(2)
);
}

}