1616// under the License.
1717
1818use crate :: builder:: buffer_builder:: { Int32BufferBuilder , Int8BufferBuilder } ;
19- use crate :: builder:: BufferBuilder ;
20- use crate :: { make_array, ArrowPrimitiveType , UnionArray } ;
19+ use crate :: builder:: { ArrayBuilder , BufferBuilder } ;
20+ use crate :: { make_array, ArrayRef , ArrowPrimitiveType , UnionArray } ;
2121use arrow_buffer:: NullBufferBuilder ;
22- use arrow_buffer:: { ArrowNativeType , Buffer } ;
22+ use arrow_buffer:: { ArrowNativeType , Buffer , ScalarBuffer } ;
2323use arrow_data:: ArrayDataBuilder ;
2424use arrow_schema:: { ArrowError , DataType , Field } ;
2525use std:: any:: Any ;
@@ -42,12 +42,14 @@ struct FieldData {
4242}
4343
4444/// A type-erased [`BufferBuilder`] used by [`FieldData`]
45- trait FieldDataValues : std:: fmt:: Debug {
45+ trait FieldDataValues : std:: fmt:: Debug + Send + Sync {
4646 fn as_mut_any ( & mut self ) -> & mut dyn Any ;
4747
4848 fn append_null ( & mut self ) ;
4949
5050 fn finish ( & mut self ) -> Buffer ;
51+
52+ fn finish_cloned ( & self ) -> Buffer ;
5153}
5254
5355impl < T : ArrowNativeType > FieldDataValues for BufferBuilder < T > {
@@ -62,6 +64,10 @@ impl<T: ArrowNativeType> FieldDataValues for BufferBuilder<T> {
6264 fn finish ( & mut self ) -> Buffer {
6365 self . finish ( )
6466 }
67+
68+ fn finish_cloned ( & self ) -> Buffer {
69+ Buffer :: from_slice_ref ( self . as_slice ( ) )
70+ }
6571}
6672
6773impl FieldData {
@@ -138,7 +144,7 @@ impl FieldData {
138144/// assert_eq!(union.value_offset(1), 1);
139145/// assert_eq!(union.value_offset(2), 2);
140146/// ```
141- #[ derive( Debug ) ]
147+ #[ derive( Debug , Default ) ]
142148pub struct UnionBuilder {
143149 /// The current number of slots in the array
144150 len : usize ,
@@ -310,4 +316,172 @@ impl UnionBuilder {
310316 children,
311317 )
312318 }
319+
320+ /// Builds this builder creating a new `UnionArray` without consuming the builder.
321+ ///
322+ /// This is used for the `finish_cloned` implementation in `ArrayBuilder`.
323+ fn build_cloned ( & self ) -> Result < UnionArray , ArrowError > {
324+ let mut children = Vec :: with_capacity ( self . fields . len ( ) ) ;
325+ let union_fields: Vec < _ > = self
326+ . fields
327+ . iter ( )
328+ . map ( |( name, field_data) | {
329+ let FieldData {
330+ type_id,
331+ data_type,
332+ values_buffer,
333+ slots,
334+ null_buffer_builder,
335+ } = field_data;
336+
337+ let array_ref = make_array ( unsafe {
338+ ArrayDataBuilder :: new ( data_type. clone ( ) )
339+ . add_buffer ( values_buffer. finish_cloned ( ) )
340+ . len ( * slots)
341+ . nulls ( null_buffer_builder. finish_cloned ( ) )
342+ . build_unchecked ( )
343+ } ) ;
344+ children. push ( array_ref) ;
345+ (
346+ * type_id,
347+ Arc :: new ( Field :: new ( name. clone ( ) , data_type. clone ( ) , false ) ) ,
348+ )
349+ } )
350+ . collect ( ) ;
351+ UnionArray :: try_new (
352+ union_fields. into_iter ( ) . collect ( ) ,
353+ ScalarBuffer :: from ( self . type_id_builder . as_slice ( ) . to_vec ( ) ) ,
354+ self . value_offset_builder
355+ . as_ref ( )
356+ . map ( |builder| ScalarBuffer :: from ( builder. as_slice ( ) . to_vec ( ) ) ) ,
357+ children,
358+ )
359+ }
360+ }
361+
362+ impl ArrayBuilder for UnionBuilder {
363+ /// Returns the number of array slots in the builder
364+ fn len ( & self ) -> usize {
365+ self . len
366+ }
367+
368+ /// Builds the array
369+ fn finish ( & mut self ) -> ArrayRef {
370+ // Even simpler - just move the builder using mem::take and replace with default
371+ let builder = std:: mem:: take ( self ) ;
372+
373+ // Since UnionBuilder controls all invariants, this should never fail
374+ Arc :: new ( builder. build ( ) . unwrap ( ) )
375+ }
376+
377+ /// Builds the array without resetting the underlying builder
378+ fn finish_cloned ( & self ) -> ArrayRef {
379+ // We construct the UnionArray carefully to ensure try_new cannot fail.
380+ // Since UnionBuilder controls all the invariants, this should never panic.
381+ Arc :: new ( self . build_cloned ( ) . unwrap_or_else ( |err| {
382+ panic ! ( "UnionBuilder::build_cloned failed unexpectedly: {}" , err)
383+ } ) )
384+ }
385+
386+ /// Returns the builder as a non-mutable `Any` reference
387+ fn as_any ( & self ) -> & dyn Any {
388+ self
389+ }
390+
391+ /// Returns the builder as a mutable `Any` reference
392+ fn as_any_mut ( & mut self ) -> & mut dyn Any {
393+ self
394+ }
395+
396+ /// Returns the boxed builder as a box of `Any`
397+ fn into_box_any ( self : Box < Self > ) -> Box < dyn Any > {
398+ self
399+ }
400+ }
401+
402+ #[ cfg( test) ]
403+ mod tests {
404+ use super :: * ;
405+ use crate :: array:: Array ;
406+ use crate :: cast:: AsArray ;
407+ use crate :: types:: { Float64Type , Int32Type } ;
408+
409+ #[ test]
410+ fn test_union_builder_array_builder_trait ( ) {
411+ // Test that UnionBuilder implements ArrayBuilder trait
412+ let mut builder = UnionBuilder :: new_dense ( ) ;
413+
414+ // Add some data
415+ builder. append :: < Int32Type > ( "a" , 1 ) . unwrap ( ) ;
416+ builder. append :: < Float64Type > ( "b" , 3.0 ) . unwrap ( ) ;
417+ builder. append :: < Int32Type > ( "a" , 4 ) . unwrap ( ) ;
418+
419+ assert_eq ! ( builder. len( ) , 3 ) ;
420+
421+ // Test finish_cloned (non-destructive)
422+ let array1 = builder. finish_cloned ( ) ;
423+ assert_eq ! ( array1. len( ) , 3 ) ;
424+
425+ // Verify values in cloned array
426+ let union1 = array1. as_any ( ) . downcast_ref :: < UnionArray > ( ) . unwrap ( ) ;
427+ assert_eq ! ( union1. type_ids( ) , & [ 0 , 1 , 0 ] ) ;
428+ assert_eq ! ( union1. offsets( ) . unwrap( ) . as_ref( ) , & [ 0 , 0 , 1 ] ) ;
429+ let int_array1 = union1. child ( 0 ) . as_primitive :: < Int32Type > ( ) ;
430+ let float_array1 = union1. child ( 1 ) . as_primitive :: < Float64Type > ( ) ;
431+ assert_eq ! ( int_array1. value( 0 ) , 1 ) ;
432+ assert_eq ! ( int_array1. value( 1 ) , 4 ) ;
433+ assert_eq ! ( float_array1. value( 0 ) , 3.0 ) ;
434+
435+ // Builder should still be usable after finish_cloned
436+ builder. append :: < Float64Type > ( "b" , 5.0 ) . unwrap ( ) ;
437+ assert_eq ! ( builder. len( ) , 4 ) ;
438+
439+ // Test finish (destructive)
440+ let array2 = builder. finish ( ) ;
441+ assert_eq ! ( array2. len( ) , 4 ) ;
442+
443+ // Verify values in final array
444+ let union2 = array2. as_any ( ) . downcast_ref :: < UnionArray > ( ) . unwrap ( ) ;
445+ assert_eq ! ( union2. type_ids( ) , & [ 0 , 1 , 0 , 1 ] ) ;
446+ assert_eq ! ( union2. offsets( ) . unwrap( ) . as_ref( ) , & [ 0 , 0 , 1 , 1 ] ) ;
447+ let int_array2 = union2. child ( 0 ) . as_primitive :: < Int32Type > ( ) ;
448+ let float_array2 = union2. child ( 1 ) . as_primitive :: < Float64Type > ( ) ;
449+ assert_eq ! ( int_array2. value( 0 ) , 1 ) ;
450+ assert_eq ! ( int_array2. value( 1 ) , 4 ) ;
451+ assert_eq ! ( float_array2. value( 0 ) , 3.0 ) ;
452+ assert_eq ! ( float_array2. value( 1 ) , 5.0 ) ;
453+ }
454+
455+ #[ test]
456+ fn test_union_builder_type_erased ( ) {
457+ // Test type-erased usage with Box<dyn ArrayBuilder>
458+ let mut builders: Vec < Box < dyn ArrayBuilder > > = vec ! [ Box :: new( UnionBuilder :: new_sparse( ) ) ] ;
459+
460+ // Downcast and use
461+ let union_builder = builders[ 0 ]
462+ . as_any_mut ( )
463+ . downcast_mut :: < UnionBuilder > ( )
464+ . unwrap ( ) ;
465+ union_builder. append :: < Int32Type > ( "x" , 10 ) . unwrap ( ) ;
466+ union_builder. append :: < Float64Type > ( "y" , 20.0 ) . unwrap ( ) ;
467+
468+ assert_eq ! ( builders[ 0 ] . len( ) , 2 ) ;
469+
470+ let result = builders
471+ . into_iter ( )
472+ . map ( |mut b| b. finish ( ) )
473+ . collect :: < Vec < _ > > ( ) ;
474+ assert_eq ! ( result[ 0 ] . len( ) , 2 ) ;
475+
476+ // Verify sparse union values
477+ let union = result[ 0 ] . as_any ( ) . downcast_ref :: < UnionArray > ( ) . unwrap ( ) ;
478+ assert_eq ! ( union . type_ids( ) , & [ 0 , 1 ] ) ;
479+ assert ! ( union . offsets( ) . is_none( ) ) ; // Sparse union has no offsets
480+ let int_array = union. child ( 0 ) . as_primitive :: < Int32Type > ( ) ;
481+ let float_array = union. child ( 1 ) . as_primitive :: < Float64Type > ( ) ;
482+ assert_eq ! ( int_array. value( 0 ) , 10 ) ;
483+ assert ! ( int_array. is_null( 1 ) ) ; // Null in sparse layout
484+ assert ! ( float_array. is_null( 0 ) ) ; // Null in sparse layout
485+ assert_eq ! ( float_array. value( 1 ) , 20.0 ) ;
486+ }
313487}
0 commit comments