-
Notifications
You must be signed in to change notification settings - Fork 108
feat: Add push_validity_into_children methods to StructArray #5826
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
base: develop
Are you sure you want to change the base?
Changes from all commits
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
| Original file line number | Diff line number | Diff line change |
|---|---|---|
|
|
@@ -13,6 +13,7 @@ use vortex_error::VortexExpect; | |
| use vortex_error::VortexResult; | ||
| use vortex_error::vortex_bail; | ||
| use vortex_error::vortex_err; | ||
| use vortex_mask::Mask; | ||
|
|
||
| use crate::Array; | ||
| use crate::ArrayRef; | ||
|
|
@@ -451,4 +452,106 @@ impl StructArray { | |
|
|
||
| Self::try_new_with_dtype(children, new_fields, self.len, self.validity.clone()) | ||
| } | ||
|
|
||
| /// Push the struct-level validity into the children fields. | ||
| /// | ||
| /// This method takes the top-level validity of the struct array and applies it to each child field | ||
| /// using a mask operation. The resulting struct array will have the struct-level nulls propagated | ||
| /// down to the individual fields. | ||
| /// | ||
| /// # Parameters | ||
| /// | ||
| /// * `preserve_struct_validity` - If true, the new struct array retains the original struct-level | ||
| /// validity. If false, the new struct array has `Validity::AllValid` since all null information | ||
| /// is now contained within the individual fields. | ||
| /// | ||
| /// # Returns | ||
| /// | ||
| /// A new `StructArray` where each child field has been masked with the struct's validity. | ||
| /// | ||
| /// # Examples | ||
| /// | ||
| /// ``` | ||
| /// use vortex_array::arrays::StructArray; | ||
| /// use vortex_array::validity::Validity; | ||
| /// use vortex_array::IntoArray; | ||
| /// use vortex_buffer::buffer; | ||
| /// | ||
| /// // Create struct with top-level nulls | ||
| /// let struct_array = StructArray::try_new( | ||
| /// ["a", "b"].into(), | ||
| /// vec![ | ||
| /// buffer![1i32, 2i32, 3i32].into_array(), | ||
| /// buffer![10i32, 20i32, 30i32].into_array(), | ||
| /// ], | ||
| /// 3, | ||
| /// Validity::from_iter([true, false, true]), // row 1 is null | ||
| /// ).unwrap(); | ||
| /// | ||
| /// // Push validity into children, preserving struct validity | ||
| /// let pushed = struct_array.push_validity_into_children(true).unwrap(); | ||
| /// // pushed.fields()[0] now has nulls at position 1 | ||
| /// // pushed.fields()[1] now has nulls at position 1 | ||
| /// // pushed.validity still shows row 1 as null | ||
| /// | ||
| /// // Push validity into children, removing struct validity | ||
| /// let pushed_no_struct = struct_array.push_validity_into_children(false).unwrap(); | ||
| /// // pushed_no_struct.fields()[0] now has nulls at position 1 | ||
| /// // pushed_no_struct.fields()[1] now has nulls at position 1 | ||
| /// // pushed_no_struct.validity is AllValid | ||
| /// ``` | ||
|
Comment on lines
+480
to
+502
Contributor
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. I think it would be more helpful to comment what the struct array looks like instead of requiring the reader to parse what the created struct array becomes after |
||
| /// Push validity into children with default behavior (preserve_struct_validity = false). | ||
|
|
||
| pub fn push_validity_into_children(&self, preserve_struct_validity: bool) -> VortexResult<Self> { | ||
| use crate::compute::mask; | ||
|
Contributor
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Can you move this import to the top?
Contributor
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. I also prefer to see |
||
|
|
||
| // Get the struct-level validity mask. | ||
| let struct_validity_mask = self.validity_mask(); | ||
|
|
||
| // If the struct has no nulls, return a clone. | ||
| if struct_validity_mask.all_true() { | ||
| return if preserve_struct_validity { | ||
| Ok(self.clone()) | ||
| } else { | ||
| // Remove struct validity if requested. | ||
| Self::try_new( | ||
| self.names().clone(), | ||
| self.fields().clone(), | ||
| self.len(), | ||
| Validity::AllValid, | ||
|
Contributor
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Hmmm I wonder if we should make the validity of the top-level struct array |
||
| ) | ||
| }; | ||
| } | ||
|
|
||
| // Apply the struct validity mask to each child field. | ||
| // We want to set nulls where the struct is null (i.e., where struct_validity_mask is false). | ||
| // So we need to invert the mask: where struct is invalid, set child to invalid. | ||
| let null_mask = struct_validity_mask.iter_bools(|iter| { | ||
| Mask::from_iter(iter.map(|valid| !valid)) // invert: valid->invalid, invalid->valid | ||
| }); | ||
|
|
||
| let masked_fields: Vec<ArrayRef> = self | ||
| .fields() | ||
| .iter() | ||
| .map(|field| { | ||
| // Use the mask function to apply null positions to each field. | ||
| mask(field.as_ref(), &null_mask) | ||
| }) | ||
| .collect::<VortexResult<Vec<_>>>()?; | ||
|
Comment on lines
+529
to
+540
Contributor
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. This inverted logic makes me think more that we should just move this to the new world now instead of merging this and then getting rid of it immediately. @gatesn Any thoughts about this? |
||
|
|
||
| // Determine the new struct validity (default to false = remove struct validity). | ||
| let new_struct_validity = if preserve_struct_validity { | ||
| self.validity.clone() | ||
| } else { | ||
| Validity::AllValid | ||
| }; | ||
|
|
||
| // Construct the new struct array. | ||
| Self::try_new( | ||
| self.names().clone(), | ||
| masked_fields, | ||
| self.len(), | ||
| new_struct_validity, | ||
| ) | ||
| } | ||
| } | ||
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
It may not be immediately obvious to someone reading this that the child buffers are non-nullable