@@ -74,12 +74,12 @@ pub struct VariantArrayBuilder {
7474 nulls : NullBufferBuilder ,
7575 /// buffer for all the metadata
7676 metadata_buffer : Vec < u8 > ,
77- /// ( offset, len) pairs for locations of metadata in the buffer
78- metadata_locations : Vec < ( usize , usize ) > ,
77+ /// ending offset for each serialized metadata dictionary in the buffer
78+ metadata_offsets : Vec < usize > ,
7979 /// buffer for values
8080 value_buffer : Vec < u8 > ,
81- /// ( offset, len) pairs for locations of values in the buffer
82- value_locations : Vec < ( usize , usize ) > ,
81+ /// ending offset for each serialized variant value in the buffer
82+ value_offsets : Vec < usize > ,
8383 /// The fields of the final `StructArray`
8484 ///
8585 /// TODO: 1) Add extension type metadata
@@ -96,9 +96,9 @@ impl VariantArrayBuilder {
9696 Self {
9797 nulls : NullBufferBuilder :: new ( row_capacity) ,
9898 metadata_buffer : Vec :: new ( ) , // todo allocation capacity
99- metadata_locations : Vec :: with_capacity ( row_capacity) ,
99+ metadata_offsets : Vec :: with_capacity ( row_capacity) ,
100100 value_buffer : Vec :: new ( ) ,
101- value_locations : Vec :: with_capacity ( row_capacity) ,
101+ value_offsets : Vec :: with_capacity ( row_capacity) ,
102102 fields : Fields :: from ( vec ! [ metadata_field, value_field] ) ,
103103 }
104104 }
@@ -108,15 +108,15 @@ impl VariantArrayBuilder {
108108 let Self {
109109 mut nulls,
110110 metadata_buffer,
111- metadata_locations ,
111+ metadata_offsets ,
112112 value_buffer,
113- value_locations ,
113+ value_offsets ,
114114 fields,
115115 } = self ;
116116
117- let metadata_array = binary_view_array_from_buffers ( metadata_buffer, metadata_locations ) ;
117+ let metadata_array = binary_view_array_from_buffers ( metadata_buffer, metadata_offsets ) ;
118118
119- let value_array = binary_view_array_from_buffers ( value_buffer, value_locations ) ;
119+ let value_array = binary_view_array_from_buffers ( value_buffer, value_offsets ) ;
120120
121121 // The build the final struct array
122122 let inner = StructArray :: new (
@@ -136,13 +136,8 @@ impl VariantArrayBuilder {
136136 pub fn append_null ( & mut self ) {
137137 self . nulls . append_null ( ) ;
138138 // The subfields are expected to be non-nullable according to the parquet variant spec.
139- let metadata_offset = self . metadata_buffer . len ( ) ;
140- let metadata_length = 0 ;
141- self . metadata_locations
142- . push ( ( metadata_offset, metadata_length) ) ;
143- let value_offset = self . value_buffer . len ( ) ;
144- let value_length = 0 ;
145- self . value_locations . push ( ( value_offset, value_length) ) ;
139+ self . metadata_offsets . push ( self . metadata_buffer . len ( ) ) ;
140+ self . value_offsets . push ( self . value_buffer . len ( ) ) ;
146141 }
147142
148143 /// Append the [`Variant`] to the builder as the next row
@@ -186,10 +181,7 @@ impl VariantArrayBuilder {
186181 /// assert!(variant_array.value(1).as_object().is_some());
187182 /// ```
188183 pub fn variant_builder ( & mut self ) -> VariantArrayVariantBuilder < ' _ > {
189- // append directly into the metadata and value buffers
190- let metadata_buffer = std:: mem:: take ( & mut self . metadata_buffer ) ;
191- let value_buffer = std:: mem:: take ( & mut self . value_buffer ) ;
192- VariantArrayVariantBuilder :: new ( self , metadata_buffer, value_buffer)
184+ VariantArrayVariantBuilder :: new ( self )
193185 }
194186}
195187
@@ -236,11 +228,10 @@ impl<'a> VariantArrayVariantBuilder<'a> {
236228 ///
237229 /// Note this is not public as this is a structure that is logically
238230 /// part of the [`VariantArrayBuilder`] and relies on its internal structure
239- fn new (
240- array_builder : & ' a mut VariantArrayBuilder ,
241- metadata_buffer : Vec < u8 > ,
242- value_buffer : Vec < u8 > ,
243- ) -> Self {
231+ fn new ( array_builder : & ' a mut VariantArrayBuilder ) -> Self {
232+ // append directly into the metadata and value buffers
233+ let metadata_buffer = std:: mem:: take ( & mut array_builder. metadata_buffer ) ;
234+ let value_buffer = std:: mem:: take ( & mut array_builder. value_buffer ) ;
244235 let metadata_offset = metadata_buffer. len ( ) ;
245236 let value_offset = value_buffer. len ( ) ;
246237 VariantArrayVariantBuilder {
@@ -276,27 +267,25 @@ impl<'a> VariantArrayVariantBuilder<'a> {
276267 let ( metadata_buffer, value_buffer) = std:: mem:: take ( & mut self . variant_builder ) . finish ( ) ;
277268
278269 // Sanity Check: if the buffers got smaller, something went wrong (previous data was lost)
279- let metadata_len = metadata_buffer
280- . len ( )
281- . checked_sub ( metadata_offset )
282- . expect ( "metadata length decreased unexpectedly" ) ;
283- let value_len = value_buffer
284- . len ( )
285- . checked_sub ( value_offset )
286- . expect ( "value length decreased unexpectedly" ) ;
270+ assert ! (
271+ metadata_offset <= metadata_buffer . len( ) ,
272+ "metadata length decreased unexpectedly"
273+ ) ;
274+ assert ! (
275+ value_offset <= value_buffer . len( ) ,
276+ "value length decreased unexpectedly"
277+ ) ;
287278
288279 // commit the changes by putting the
289- // offsets and lengths into the parent array builder.
290- self . array_builder
291- . metadata_locations
292- . push ( ( metadata_offset, metadata_len) ) ;
293- self . array_builder
294- . value_locations
295- . push ( ( value_offset, value_len) ) ;
296- self . array_builder . nulls . append_non_null ( ) ;
280+ // ending offsets into the parent array builder.
281+ let builder = & mut self . array_builder ;
282+ builder. metadata_offsets . push ( metadata_buffer. len ( ) ) ;
283+ builder. value_offsets . push ( value_buffer. len ( ) ) ;
284+ builder. nulls . append_non_null ( ) ;
285+
297286 // put the buffers back into the array builder
298- self . array_builder . metadata_buffer = metadata_buffer;
299- self . array_builder . value_buffer = value_buffer;
287+ builder . metadata_buffer = metadata_buffer;
288+ builder . value_buffer = value_buffer;
300289 }
301290}
302291
@@ -338,19 +327,21 @@ impl Drop for VariantArrayVariantBuilder<'_> {
338327 }
339328}
340329
341- fn binary_view_array_from_buffers (
342- buffer : Vec < u8 > ,
343- locations : Vec < ( usize , usize ) > ,
344- ) -> BinaryViewArray {
345- let mut builder = BinaryViewBuilder :: with_capacity ( locations. len ( ) ) ;
330+ fn binary_view_array_from_buffers ( buffer : Vec < u8 > , offsets : Vec < usize > ) -> BinaryViewArray {
331+ // All offsets are less than or equal to the buffer length, so we can safely cast all offsets
332+ // inside the loop below, as long as the buffer length fits in u32.
333+ u32:: try_from ( buffer. len ( ) ) . expect ( "buffer length should fit in u32" ) ;
334+
335+ let mut builder = BinaryViewBuilder :: with_capacity ( offsets. len ( ) ) ;
346336 let block = builder. append_block ( buffer. into ( ) ) ;
347337 // TODO this can be much faster if it creates the views directly during append
348- for ( offset , length ) in locations {
349- let offset = offset . try_into ( ) . expect ( "offset should fit in u32" ) ;
350- let length = length . try_into ( ) . expect ( "length should fit in u32" ) ;
338+ let mut start = 0 ;
339+ for end in offsets {
340+ let end = end as u32 ; // Safe cast: validated max offset fits in u32 above
351341 builder
352- . try_append_view ( block, offset , length )
342+ . try_append_view ( block, start , end - start )
353343 . expect ( "Failed to append view" ) ;
344+ start = end;
354345 }
355346 builder. finish ( )
356347}
0 commit comments