Skip to content

Commit be0ede7

Browse files
authored
[Variant] ParentState handles finish/rollback for builders (apache#8185)
# Which issue does this PR close? - Closes apache#8182 (directly) - Closes apache#8170 (as a side effect) - Closes apache#8180 (as a side effect) # Rationale for this change Make finish/rollback handling simpler, robust and uniform by pulling it inside `ParentState` (instead of each builder doing it manually) and making it fully eager (instead of a mix of eager and lazy). # What changes are included in this PR? See above. # Are these changes tested? Yes, existing unit tests cover it (including some that needed adjustment due to behavior changes), along with some new testing. # Are there any user-facing changes? `ObjectBuilder` methods `new_list` and `new_object` now panic if a duplicate field name is provided, and new fallible versions `try_new_list` and `try_new_object` are provided. `ObjectBuilder::finish` signature remains fallible, but it always returns `Result::Ok` * TODO: apache#8184 Existing `VariantBuilderExt` methods `new_list` and `new_object` now panic if they encounter a duplicate field name; new fallible versions `try_new_list` and `try_new_object` are provided.
1 parent 8875504 commit be0ede7

File tree

3 files changed

+304
-237
lines changed

3 files changed

+304
-237
lines changed

parquet-variant-compute/src/variant_array_builder.rs

Lines changed: 5 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -19,7 +19,7 @@
1919
2020
use crate::VariantArray;
2121
use arrow::array::{ArrayRef, BinaryViewArray, BinaryViewBuilder, NullBufferBuilder, StructArray};
22-
use arrow_schema::{DataType, Field, Fields};
22+
use arrow_schema::{ArrowError, DataType, Field, Fields};
2323
use parquet_variant::{ListBuilder, ObjectBuilder, Variant, VariantBuilder, VariantBuilderExt};
2424
use std::sync::Arc;
2525

@@ -222,12 +222,12 @@ impl VariantBuilderExt for VariantArrayVariantBuilder<'_> {
222222
self.variant_builder.append_value(value);
223223
}
224224

225-
fn new_list(&mut self) -> ListBuilder<'_> {
226-
self.variant_builder.new_list()
225+
fn try_new_list(&mut self) -> Result<ListBuilder<'_>, ArrowError> {
226+
Ok(self.variant_builder.new_list())
227227
}
228228

229-
fn new_object(&mut self) -> ObjectBuilder<'_> {
230-
self.variant_builder.new_object()
229+
fn try_new_object(&mut self) -> Result<ObjectBuilder<'_>, ArrowError> {
230+
Ok(self.variant_builder.new_object())
231231
}
232232
}
233233

parquet-variant-json/src/from_json.rs

Lines changed: 10 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -111,14 +111,14 @@ fn append_json(json: &Value, builder: &mut impl VariantBuilderExt) -> Result<(),
111111
}
112112
Value::String(s) => builder.append_value(s.as_str()),
113113
Value::Array(arr) => {
114-
let mut list_builder = builder.new_list();
114+
let mut list_builder = builder.try_new_list()?;
115115
for val in arr {
116116
append_json(val, &mut list_builder)?;
117117
}
118118
list_builder.finish();
119119
}
120120
Value::Object(obj) => {
121-
let mut obj_builder = builder.new_object();
121+
let mut obj_builder = builder.try_new_object()?;
122122
for (key, value) in obj.iter() {
123123
let mut field_builder = ObjectFieldBuilder {
124124
key,
@@ -142,12 +142,12 @@ impl VariantBuilderExt for ObjectFieldBuilder<'_, '_, '_> {
142142
self.builder.insert(self.key, value);
143143
}
144144

145-
fn new_list(&mut self) -> ListBuilder<'_> {
146-
self.builder.new_list(self.key)
145+
fn try_new_list(&mut self) -> Result<ListBuilder<'_>, ArrowError> {
146+
self.builder.try_new_list(self.key)
147147
}
148148

149-
fn new_object(&mut self) -> ObjectBuilder<'_> {
150-
self.builder.new_object(self.key)
149+
fn try_new_object(&mut self) -> Result<ObjectBuilder<'_>, ArrowError> {
150+
self.builder.try_new_object(self.key)
151151
}
152152
}
153153

@@ -627,10 +627,10 @@ mod test {
627627
// Verify metadata size = 1 + 2 + 2 * 497 + 3 * 496
628628
assert_eq!(metadata.len(), 2485);
629629
// Verify value size.
630-
// Size of innermost_list: 1 + 1 + 258 + 256 = 516
631-
// Size of inner object: 1 + 4 + 256 + 257 * 3 + 256 * 516 = 133128
632-
// Size of json: 1 + 4 + 512 + 1028 + 256 * 133128 = 34082313
633-
assert_eq!(value.len(), 34082313);
630+
// Size of innermost_list: 1 + 1 + 2*(128 + 1) + 2*128 = 516
631+
// Size of inner object: 1 + 4 + 2*256 + 3*(256 + 1) + 256 * 516 = 133384
632+
// Size of json: 1 + 4 + 2*256 + 4*(256 + 1) + 256 * 133384 = 34147849
633+
assert_eq!(value.len(), 34147849);
634634

635635
let mut variant_builder = VariantBuilder::new();
636636
let mut object_builder = variant_builder.new_object();

0 commit comments

Comments
 (0)