Skip to content

Commit c6887ff

Browse files
authored
Fix arrow-avro type resolver register bug (apache#8046)
# Which issue does this PR close? - Closes apache#8045 # Rationale for this change Simple bug fix where the order of `name` and `namespace` is wrong in Resolver field registration method. # What changes are included in this PR? One-line bug fix and corresponding tests. # Are these changes tested? - Added a test in the same file to test the fix. Made sure it doesn't pass with the original code. # Are there any user-facing changes? No
1 parent 3e7c887 commit c6887ff

File tree

4 files changed

+403
-20
lines changed

4 files changed

+403
-20
lines changed

arrow-avro/src/codec.rs

Lines changed: 160 additions & 20 deletions
Original file line numberDiff line numberDiff line change
@@ -431,7 +431,7 @@ struct Resolver<'a> {
431431

432432
impl<'a> Resolver<'a> {
433433
fn register(&mut self, name: &'a str, namespace: Option<&'a str>, schema: AvroDataType) {
434-
self.map.insert((name, namespace.unwrap_or("")), schema);
434+
self.map.insert((namespace.unwrap_or(""), name), schema);
435435
}
436436

437437
fn resolve(&self, name: &str, namespace: Option<&'a str>) -> Result<AvroDataType, ArrowError> {
@@ -660,11 +660,8 @@ fn make_data_type<'a>(
660660
#[cfg(test)]
661661
mod tests {
662662
use super::*;
663-
use crate::schema::{
664-
Attributes, ComplexType, Fixed, PrimitiveType, Record, Schema, Type, TypeName,
665-
};
663+
use crate::schema::{Attributes, PrimitiveType, Schema, Type, TypeName};
666664
use serde_json;
667-
use std::collections::HashMap;
668665

669666
fn create_schema_with_logical_type(
670667
primitive_type: PrimitiveType,
@@ -681,21 +678,6 @@ mod tests {
681678
})
682679
}
683680

684-
fn create_fixed_schema(size: usize, logical_type: &'static str) -> Schema<'static> {
685-
let attributes = Attributes {
686-
logical_type: Some(logical_type),
687-
additional: Default::default(),
688-
};
689-
690-
Schema::Complex(ComplexType::Fixed(Fixed {
691-
name: "fixed_type",
692-
namespace: None,
693-
aliases: Vec::new(),
694-
size,
695-
attributes,
696-
}))
697-
}
698-
699681
#[test]
700682
fn test_date_logical_type() {
701683
let schema = create_schema_with_logical_type(PrimitiveType::Int, "date");
@@ -897,4 +879,162 @@ mod tests {
897879
_ => panic!("Expected SchemaError"),
898880
}
899881
}
882+
883+
#[test]
884+
fn test_nested_record_type_reuse_without_namespace() {
885+
let schema_str = r#"
886+
{
887+
"type": "record",
888+
"name": "Record",
889+
"fields": [
890+
{
891+
"name": "nested",
892+
"type": {
893+
"type": "record",
894+
"name": "Nested",
895+
"fields": [
896+
{ "name": "nested_int", "type": "int" }
897+
]
898+
}
899+
},
900+
{ "name": "nestedRecord", "type": "Nested" },
901+
{ "name": "nestedArray", "type": { "type": "array", "items": "Nested" } },
902+
{ "name": "nestedMap", "type": { "type": "map", "values": "Nested" } }
903+
]
904+
}
905+
"#;
906+
907+
let schema: Schema = serde_json::from_str(schema_str).unwrap();
908+
909+
let mut resolver = Resolver::default();
910+
let avro_data_type = make_data_type(&schema, None, &mut resolver, false, false).unwrap();
911+
912+
if let Codec::Struct(fields) = avro_data_type.codec() {
913+
assert_eq!(fields.len(), 4);
914+
915+
// nested
916+
assert_eq!(fields[0].name(), "nested");
917+
let nested_data_type = fields[0].data_type();
918+
if let Codec::Struct(nested_fields) = nested_data_type.codec() {
919+
assert_eq!(nested_fields.len(), 1);
920+
assert_eq!(nested_fields[0].name(), "nested_int");
921+
assert!(matches!(nested_fields[0].data_type().codec(), Codec::Int32));
922+
} else {
923+
panic!(
924+
"'nested' field is not a struct but {:?}",
925+
nested_data_type.codec()
926+
);
927+
}
928+
929+
// nestedRecord
930+
assert_eq!(fields[1].name(), "nestedRecord");
931+
let nested_record_data_type = fields[1].data_type();
932+
assert_eq!(
933+
nested_record_data_type.codec().data_type(),
934+
nested_data_type.codec().data_type()
935+
);
936+
937+
// nestedArray
938+
assert_eq!(fields[2].name(), "nestedArray");
939+
if let Codec::List(item_type) = fields[2].data_type().codec() {
940+
assert_eq!(
941+
item_type.codec().data_type(),
942+
nested_data_type.codec().data_type()
943+
);
944+
} else {
945+
panic!("'nestedArray' field is not a list");
946+
}
947+
948+
// nestedMap
949+
assert_eq!(fields[3].name(), "nestedMap");
950+
if let Codec::Map(value_type) = fields[3].data_type().codec() {
951+
assert_eq!(
952+
value_type.codec().data_type(),
953+
nested_data_type.codec().data_type()
954+
);
955+
} else {
956+
panic!("'nestedMap' field is not a map");
957+
}
958+
} else {
959+
panic!("Top-level schema is not a struct");
960+
}
961+
}
962+
963+
#[test]
964+
fn test_nested_enum_type_reuse_with_namespace() {
965+
let schema_str = r#"
966+
{
967+
"type": "record",
968+
"name": "Record",
969+
"namespace": "record_ns",
970+
"fields": [
971+
{
972+
"name": "status",
973+
"type": {
974+
"type": "enum",
975+
"name": "Status",
976+
"namespace": "enum_ns",
977+
"symbols": ["ACTIVE", "INACTIVE", "PENDING"]
978+
}
979+
},
980+
{ "name": "backupStatus", "type": "enum_ns.Status" },
981+
{ "name": "statusHistory", "type": { "type": "array", "items": "enum_ns.Status" } },
982+
{ "name": "statusMap", "type": { "type": "map", "values": "enum_ns.Status" } }
983+
]
984+
}
985+
"#;
986+
987+
let schema: Schema = serde_json::from_str(schema_str).unwrap();
988+
989+
let mut resolver = Resolver::default();
990+
let avro_data_type = make_data_type(&schema, None, &mut resolver, false, false).unwrap();
991+
992+
if let Codec::Struct(fields) = avro_data_type.codec() {
993+
assert_eq!(fields.len(), 4);
994+
995+
// status
996+
assert_eq!(fields[0].name(), "status");
997+
let status_data_type = fields[0].data_type();
998+
if let Codec::Enum(symbols) = status_data_type.codec() {
999+
assert_eq!(symbols.as_ref(), &["ACTIVE", "INACTIVE", "PENDING"]);
1000+
} else {
1001+
panic!(
1002+
"'status' field is not an enum but {:?}",
1003+
status_data_type.codec()
1004+
);
1005+
}
1006+
1007+
// backupStatus
1008+
assert_eq!(fields[1].name(), "backupStatus");
1009+
let backup_status_data_type = fields[1].data_type();
1010+
assert_eq!(
1011+
backup_status_data_type.codec().data_type(),
1012+
status_data_type.codec().data_type()
1013+
);
1014+
1015+
// statusHistory
1016+
assert_eq!(fields[2].name(), "statusHistory");
1017+
if let Codec::List(item_type) = fields[2].data_type().codec() {
1018+
assert_eq!(
1019+
item_type.codec().data_type(),
1020+
status_data_type.codec().data_type()
1021+
);
1022+
} else {
1023+
panic!("'statusHistory' field is not a list");
1024+
}
1025+
1026+
// statusMap
1027+
assert_eq!(fields[3].name(), "statusMap");
1028+
if let Codec::Map(value_type) = fields[3].data_type().codec() {
1029+
assert_eq!(
1030+
value_type.codec().data_type(),
1031+
status_data_type.codec().data_type()
1032+
);
1033+
} else {
1034+
panic!("'statusMap' field is not a map");
1035+
}
1036+
} else {
1037+
panic!("Top-level schema is not a struct");
1038+
}
1039+
}
9001040
}

0 commit comments

Comments
 (0)