From 13f25cd346de526673b38349cbd90d76ba52467c Mon Sep 17 00:00:00 2001 From: Fokko Driesprong Date: Wed, 12 Nov 2025 19:26:20 +0100 Subject: [PATCH] feat: Don't drop additional statistics This is a behavioral change. In Iceberg-Rust we require upper/lower bounds to be part of the schema. But in some cases, this in't the case, most obvious schema evolution. In PyIceberg we expect these values in some tests: ``` FAILED tests/integration/test_inspect_table.py::test_inspect_files[2] - AssertionError: Difference in column lower_bounds: {} != {2147483546: b's3://warehouse/default/table_metadata_files/data/00000-0-8d621c18-079b-4217-afd8-559ce216e875.parquet', 2147483545: b'\x00\x00\x00\x00\x00\x00\x00\x00'} assert {} == {2147483545: ...e875.parquet'} Right contains 2 more items: {2147483545: b'\x00\x00\x00\x00\x00\x00\x00\x00', 2147483546: b's3://warehouse/default/table_metadata_files/data/00000-0-8d621c1' b'8-079b-4217-afd8-559ce216e875.parquet'} Full diff: { + , - 2147483545: b'\x00\x00\x00\x00\x00\x00\x00\x00', - 2147483546: b's3://warehouse/default/table_metadata_files/data/00000-0-8d621c1' - b'8-079b-4217-afd8-559ce216e875.parquet', } !!!!!!!!!!!!!!!!!!!!!!!!!! stopping after 1 failures !!!!!!!!!!!!!!!!!!!!!!!!!!! ==== 1 failed, 238 passed, 32 skipped, 3123 deselected in 61.56s (0:01:01) ===== ``` This is a positional delete where the field-IDs are constant, but never part of a schema (they are reserved). --- crates/iceberg/src/spec/manifest/_serde.rs | 58 ++++++++++++++++++++-- crates/iceberg/src/spec/manifest/mod.rs | 6 ++- 2 files changed, 58 insertions(+), 6 deletions(-) diff --git a/crates/iceberg/src/spec/manifest/_serde.rs b/crates/iceberg/src/spec/manifest/_serde.rs index 7738af46d4..140c722960 100644 --- a/crates/iceberg/src/spec/manifest/_serde.rs +++ b/crates/iceberg/src/spec/manifest/_serde.rs @@ -245,7 +245,7 @@ struct BytesEntry { fn parse_bytes_entry(v: Vec, schema: &Schema) -> Result, Error> { let mut m = HashMap::with_capacity(v.len()); for entry in v { - // We ignore the entry if the field is not found in the schema, due to schema evolution. + // Try to find the field in the schema to get proper type information if let Some(field) = schema.field_by_id(entry.key) { let data_type = field .field_type @@ -258,6 +258,10 @@ fn parse_bytes_entry(v: Vec, schema: &Schema) -> Result