From a4263168e2d0c1508f538fbe2304fbdd7d5da3f5 Mon Sep 17 00:00:00 2001 From: Neil Conway Date: Sat, 18 Apr 2026 15:53:02 -0400 Subject: [PATCH 1/3] perf: Optimize `qualified_name` --- datafusion/common/src/dfschema.rs | 55 ++++++++++++++++++++++++++++--- 1 file changed, 51 insertions(+), 4 deletions(-) diff --git a/datafusion/common/src/dfschema.rs b/datafusion/common/src/dfschema.rs index de0aacf9e8bcd..8462cb33f6452 100644 --- a/datafusion/common/src/dfschema.rs +++ b/datafusion/common/src/dfschema.rs @@ -1338,11 +1338,37 @@ impl SchemaExt for Schema { } } +/// Build a fully-qualified field name string. This is equivalent to +/// `format!("{q}.{name}")` when `qualifier` is `Some`, or just `name` when +/// `None`. We avoid going through the `fmt` machinery for performance reasons. pub fn qualified_name(qualifier: Option<&TableReference>, name: &str) -> String { - match qualifier { - Some(q) => format!("{q}.{name}"), - None => name.to_string(), - } + let (a, b, c) = match qualifier { + None => return name.to_string(), + Some(TableReference::Bare { table }) => (table.as_ref(), "", ""), + Some(TableReference::Partial { schema, table }) => { + (schema.as_ref(), table.as_ref(), "") + } + Some(TableReference::Full { + catalog, + schema, + table, + }) => (catalog.as_ref(), schema.as_ref(), table.as_ref()), + }; + + // Reserve capacity for at most 3 separators; this might slighty over-allocate. + let mut s = String::with_capacity(a.len() + b.len() + c.len() + 3 + name.len()); + s.push_str(a); + if !b.is_empty() { + s.push('.'); + s.push_str(b); + } + if !c.is_empty() { + s.push('.'); + s.push_str(c); + } + s.push('.'); + s.push_str(name); + s } #[cfg(test)] @@ -1351,6 +1377,27 @@ mod tests { use super::*; + /// `qualified_name` doesn't use `TableReference::Display` for performance + /// reasons, but check that the output is consistent. + #[test] + fn qualified_name_agrees_with_display() { + let cases: &[(Option, &str)] = &[ + (None, "col"), + (Some(TableReference::bare("t")), "c0"), + (Some(TableReference::partial("s", "t")), "c0"), + (Some(TableReference::full("c", "s", "t")), "c0"), + (Some(TableReference::bare("mytable")), "some_column_name"), + ]; + for (qualifier, name) in cases { + let actual = qualified_name(qualifier.as_ref(), name); + let expected = match qualifier { + Some(q) => format!("{q}.{name}"), + None => name.to_string(), + }; + assert_eq!(actual, expected, "qualifier={qualifier:?} name={name}"); + } + } + #[test] fn qualifier_in_name() -> Result<()> { let col = Column::from_name("t1.c0"); From ec144c1313535429a0d18a161685c5115865d6d4 Mon Sep 17 00:00:00 2001 From: Neil Conway Date: Sat, 18 Apr 2026 16:02:17 -0400 Subject: [PATCH 2/3] Fix typo in comment --- datafusion/common/src/dfschema.rs | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/datafusion/common/src/dfschema.rs b/datafusion/common/src/dfschema.rs index 8462cb33f6452..d845a29d72483 100644 --- a/datafusion/common/src/dfschema.rs +++ b/datafusion/common/src/dfschema.rs @@ -1355,7 +1355,7 @@ pub fn qualified_name(qualifier: Option<&TableReference>, name: &str) -> String }) => (catalog.as_ref(), schema.as_ref(), table.as_ref()), }; - // Reserve capacity for at most 3 separators; this might slighty over-allocate. + // Reserve capacity for at most 3 separators; this might slightly over-allocate. let mut s = String::with_capacity(a.len() + b.len() + c.len() + 3 + name.len()); s.push_str(a); if !b.is_empty() { From ae66678aea9003fcf2442a2342d83a68775de353 Mon Sep 17 00:00:00 2001 From: Neil Conway Date: Sat, 18 Apr 2026 16:51:58 -0400 Subject: [PATCH 3/3] Bug fix for empty segments --- datafusion/common/src/dfschema.rs | 36 ++++++++++++++++++++++--------- 1 file changed, 26 insertions(+), 10 deletions(-) diff --git a/datafusion/common/src/dfschema.rs b/datafusion/common/src/dfschema.rs index d845a29d72483..50b632e8211de 100644 --- a/datafusion/common/src/dfschema.rs +++ b/datafusion/common/src/dfschema.rs @@ -1342,27 +1342,34 @@ impl SchemaExt for Schema { /// `format!("{q}.{name}")` when `qualifier` is `Some`, or just `name` when /// `None`. We avoid going through the `fmt` machinery for performance reasons. pub fn qualified_name(qualifier: Option<&TableReference>, name: &str) -> String { - let (a, b, c) = match qualifier { + let qualifier = match qualifier { None => return name.to_string(), - Some(TableReference::Bare { table }) => (table.as_ref(), "", ""), - Some(TableReference::Partial { schema, table }) => { - (schema.as_ref(), table.as_ref(), "") + Some(q) => q, + }; + let (a, b, c) = match qualifier { + TableReference::Bare { table } => (table.as_ref(), None, None), + TableReference::Partial { schema, table } => { + (schema.as_ref(), Some(table.as_ref()), None) } - Some(TableReference::Full { + TableReference::Full { catalog, schema, table, - }) => (catalog.as_ref(), schema.as_ref(), table.as_ref()), + } => ( + catalog.as_ref(), + Some(schema.as_ref()), + Some(table.as_ref()), + ), }; - // Reserve capacity for at most 3 separators; this might slightly over-allocate. - let mut s = String::with_capacity(a.len() + b.len() + c.len() + 3 + name.len()); + let extra = b.unwrap_or("").len() + c.unwrap_or("").len(); + let mut s = String::with_capacity(a.len() + extra + 3 + name.len()); s.push_str(a); - if !b.is_empty() { + if let Some(b) = b { s.push('.'); s.push_str(b); } - if !c.is_empty() { + if let Some(c) = c { s.push('.'); s.push_str(c); } @@ -1387,6 +1394,15 @@ mod tests { (Some(TableReference::partial("s", "t")), "c0"), (Some(TableReference::full("c", "s", "t")), "c0"), (Some(TableReference::bare("mytable")), "some_column_name"), + // Empty segments must be preserved so that distinct qualified + // fields don't collide in `DFSchema::field_names()`. + (Some(TableReference::bare("")), "col"), + (Some(TableReference::partial("s", "")), "col"), + (Some(TableReference::partial("", "t")), "col"), + (Some(TableReference::full("c", "", "t")), "col"), + (Some(TableReference::full("", "s", "t")), "col"), + (Some(TableReference::full("c", "s", "")), "col"), + (Some(TableReference::full("", "", "")), "col"), ]; for (qualifier, name) in cases { let actual = qualified_name(qualifier.as_ref(), name);