From 7d98e8b4028520985f8ab8d04e6dbe9f68466f69 Mon Sep 17 00:00:00 2001 From: Martin Hilton Date: Tue, 3 Mar 2026 16:13:13 +0000 Subject: [PATCH 1/3] feat: support nanosecond date_part Support using 'nanosecond' as a part in the date_part function. If nanosecond is requested then the date_part will return the seconds, and smaller units, scaled to nanoseconds. This is consistent with the behaviour of 'millisecond' and 'microsecond'. In order to accomodate the required range of results, a request for 'nanosecond' will return a 64-bit integer, rather than a 32-bit integer as is returned for everything else (except 'epoch'). --- .../functions/src/datetime/date_part.rs | 44 ++++++++++++- .../test_files/datetime/date_part.slt | 65 ++++++++++++++----- 2 files changed, 92 insertions(+), 17 deletions(-) diff --git a/datafusion/functions/src/datetime/date_part.rs b/datafusion/functions/src/datetime/date_part.rs index e3080c9d1a007..86e35d3b0ad26 100644 --- a/datafusion/functions/src/datetime/date_part.rs +++ b/datafusion/functions/src/datetime/date_part.rs @@ -20,7 +20,7 @@ use std::str::FromStr; use std::sync::Arc; use arrow::array::timezone::Tz; -use arrow::array::{Array, ArrayRef, Float64Array, Int32Array}; +use arrow::array::{Array, ArrayRef, Float64Array, Int32Array, Int64Array}; use arrow::compute::kernels::cast_utils::IntervalUnit; use arrow::compute::{DatePart, binary, date_part}; use arrow::datatypes::DataType::{ @@ -167,6 +167,8 @@ impl ScalarUDFImpl for DatePartFunc { .map(|part| { if is_epoch(part) { Field::new(self.name(), DataType::Float64, nullable) + } else if is_nanosecond(part) { + Field::new(self.name(), DataType::Int64, nullable) } else { Field::new(self.name(), DataType::Int32, nullable) } @@ -218,7 +220,7 @@ impl ScalarUDFImpl for DatePartFunc { IntervalUnit::Second => seconds_as_i32(array.as_ref(), Second)?, IntervalUnit::Millisecond => seconds_as_i32(array.as_ref(), Millisecond)?, IntervalUnit::Microsecond => seconds_as_i32(array.as_ref(), Microsecond)?, - IntervalUnit::Nanosecond => seconds_as_i32(array.as_ref(), Nanosecond)?, + IntervalUnit::Nanosecond => seconds_ns(array.as_ref())?, // century and decade are not supported by `DatePart`, although they are supported in postgres _ => return exec_err!("Date part '{part}' not supported"), } @@ -321,6 +323,12 @@ fn is_epoch(part: &str) -> bool { matches!(part.to_lowercase().as_str(), "epoch") } +fn is_nanosecond(part: &str) -> bool { + IntervalUnit::from_str(part_normalization(part)) + .map(|p| matches!(p, IntervalUnit::Nanosecond)) + .unwrap_or(false) +} + fn date_to_scalar(date: NaiveDate, target_type: &DataType) -> Option { Some(match target_type { Date32 => ScalarValue::Date32(Some(Date32Type::from_naive_date(date))), @@ -517,3 +525,35 @@ fn epoch(array: &dyn Array) -> Result { }; Ok(Arc::new(f)) } + +/// Invoke [`date_part`] on an `array` (e.g. Timestamp) and convert the +/// result to a total number of nanoseconds as an Int64 array. +fn seconds_ns(array: &dyn Array) -> Result { + let secs = date_part(array, DatePart::Second)?; + // This assumes array is primitive and not a dictionary + let secs = as_int32_array(secs.as_ref())?; + let subsecs = date_part(array, DatePart::Nanosecond)?; + let subsecs = as_int32_array(subsecs.as_ref())?; + + // Special case where there are no nulls. + if subsecs.null_count() == 0 { + let r: Int64Array = binary(secs, subsecs, |secs, subsecs| { + (secs as i64) * 1_000_000_000 + (subsecs as i64) + })?; + Ok(Arc::new(r)) + } else { + // Nulls in secs are preserved, nulls in subsecs are treated as zero to account for the case + // where the number of nanoseconds overflows. + let r: Int64Array = secs + .iter() + .zip(subsecs) + .map(|(secs, subsecs)| { + secs.map(|secs| { + let subsecs = subsecs.unwrap_or(0); + (secs as i64) * 1_000_000_000 + (subsecs as i64) + }) + }) + .collect(); + Ok(Arc::new(r)) + } +} diff --git a/datafusion/sqllogictest/test_files/datetime/date_part.slt b/datafusion/sqllogictest/test_files/datetime/date_part.slt index 79d6d8ac05098..4c73ce6bbbf07 100644 --- a/datafusion/sqllogictest/test_files/datetime/date_part.slt +++ b/datafusion/sqllogictest/test_files/datetime/date_part.slt @@ -212,6 +212,22 @@ SELECT date_part('microsecond', ts_nano_no_tz), date_part('microsecond', ts_nano 123456 123456 123456 123000 123000 123000 123456 123456 123456 123000 123000 123000 +# date_part (nanosecond) +query IIIIII +SELECT date_part('nanosecond', ts_nano_no_tz), date_part('nanosecond', ts_nano_utc), date_part('nanosecond', ts_nano_eastern), date_part('nanosecond', ts_milli_no_tz), date_part('nanosecond', ts_milli_utc), date_part('nanosecond', ts_milli_eastern) FROM source_ts; +---- +0 0 0 0 0 0 +0 0 0 0 0 0 +0 0 0 0 0 0 +0 0 0 0 0 0 +0 0 0 0 0 0 +0 0 0 0 0 0 +0 0 0 0 0 0 +30000000000 30000000000 30000000000 30000000000 30000000000 30000000000 +123000000 123000000 123000000 123000000 123000000 123000000 +123456000 123456000 123456000 123000000 123000000 123000000 +123456789 123456789 123456789 123000000 123000000 123000000 + ### Cleanup statement ok drop table source_ts; @@ -448,8 +464,10 @@ SELECT EXTRACT(microsecond FROM timestamp '2020-09-08T12:00:12.12345678+00:00') ---- 12123456 -query error DataFusion error: This feature is not implemented: Date part Nanosecond not supported +query I SELECT EXTRACT(nanosecond FROM timestamp '2020-09-08T12:00:12.12345678+00:00') +---- +12123456780 query I SELECT EXTRACT("second" FROM timestamp '2020-09-08T12:00:12.12345678+00:00') @@ -466,8 +484,10 @@ SELECT EXTRACT("microsecond" FROM timestamp '2020-09-08T12:00:12.12345678+00:00' ---- 12123456 -query error DataFusion error: This feature is not implemented: Date part Nanosecond not supported +query I SELECT EXTRACT("nanosecond" FROM timestamp '2020-09-08T12:00:12.12345678+00:00') +---- +12123456780 query I SELECT EXTRACT('second' FROM timestamp '2020-09-08T12:00:12.12345678+00:00') @@ -484,9 +504,10 @@ SELECT EXTRACT('microsecond' FROM timestamp '2020-09-08T12:00:12.12345678+00:00' ---- 12123456 -query error DataFusion error: This feature is not implemented: Date part Nanosecond not supported +query I SELECT EXTRACT('nanosecond' FROM timestamp '2020-09-08T12:00:12.12345678+00:00') - +---- +12123456780 # Keep precision when coercing Utf8 to Timestamp query I @@ -504,9 +525,10 @@ SELECT date_part('microsecond', timestamp '2020-09-08T12:00:12.12345678+00:00') ---- 12123456 -query error DataFusion error: This feature is not implemented: Date part Nanosecond not supported +query I SELECT date_part('nanosecond', timestamp '2020-09-08T12:00:12.12345678+00:00') - +---- +12123456780 query I SELECT date_part('second', '2020-09-08T12:00:12.12345678+00:00') @@ -523,8 +545,10 @@ SELECT date_part('microsecond', '2020-09-08T12:00:12.12345678+00:00') ---- 12123456 -query error DataFusion error: This feature is not implemented: Date part Nanosecond not supported +query I SELECT date_part('nanosecond', '2020-09-08T12:00:12.12345678+00:00') +---- +12123456780 # test_date_part_time @@ -579,8 +603,10 @@ SELECT extract(microsecond from arrow_cast('23:32:50'::time, 'Time32(Second)')) ---- 50000000 -query error DataFusion error: This feature is not implemented: Date part Nanosecond not supported +query I SELECT extract(nanosecond from arrow_cast('23:32:50'::time, 'Time32(Second)')) +---- +50000000000 query R SELECT date_part('epoch', arrow_cast('23:32:50'::time, 'Time32(Second)')) @@ -643,8 +669,10 @@ SELECT extract(microsecond from arrow_cast('23:32:50.123'::time, 'Time32(Millise ---- 50123000 -query error DataFusion error: This feature is not implemented: Date part Nanosecond not supported +query I SELECT extract(nanosecond from arrow_cast('23:32:50.123'::time, 'Time32(Millisecond)')) +---- +50123000000 query R SELECT date_part('epoch', arrow_cast('23:32:50.123'::time, 'Time32(Millisecond)')) @@ -707,8 +735,10 @@ SELECT extract(microsecond from arrow_cast('23:32:50.123456'::time, 'Time64(Micr ---- 50123456 -query error DataFusion error: This feature is not implemented: Date part Nanosecond not supported +query I SELECT extract(nanosecond from arrow_cast('23:32:50.123456'::time, 'Time64(Microsecond)')) +---- +50123456000 query R SELECT date_part('epoch', arrow_cast('23:32:50.123456'::time, 'Time64(Microsecond)')) @@ -797,8 +827,10 @@ SELECT extract(us from arrow_cast('23:32:50.123456789'::time, 'Time64(Nanosecond ---- 50123456 -query error DataFusion error: This feature is not implemented: Date part Nanosecond not supported +query I SELECT date_part('nanosecond', arrow_cast('23:32:50.123456789'::time, 'Time64(Nanosecond)')) +---- +50123456789 query R SELECT date_part('epoch', arrow_cast('23:32:50.123456789'::time, 'Time64(Nanosecond)')) @@ -1172,8 +1204,11 @@ SELECT (date_part('microsecond', now()) = EXTRACT(microsecond FROM now())) ---- true -query error DataFusion error: This feature is not implemented: Date part Nanosecond not supported +query B SELECT (date_part('nanosecond', now()) = EXTRACT(nanosecond FROM now())) +---- +true + query I SELECT date_part('ISODOW', CAST('2000-01-01' AS DATE)) @@ -1211,7 +1246,7 @@ query D select c from t1 where extract(year from c) <> 2024; ---- 1990-01-01 -2030-01-01 +2030-01-01 query D select c from t1 where extract(year from c) > 2024; @@ -1495,7 +1530,7 @@ query TT explain select c from t1 where extract (nanosecond from c) = 2024 ---- logical_plan -01)Filter: date_part(Utf8("NANOSECOND"), t1.c) = Int32(2024) +01)Filter: date_part(Utf8("NANOSECOND"), t1.c) = Int64(2024) 02)--TableScan: t1 projection=[c] physical_plan 01)FilterExec: date_part(NANOSECOND, c@0) = 2024 @@ -1582,7 +1617,7 @@ query D select c1_date32 from t2 where extract(year from c1_date32) <> 2024; ---- 1990-05-20 -2030-12-31 +2030-12-31 query P select c2_ts_sec from t2 where extract(year from c2_ts_sec) > 2024; From 8d02135ad38ca74e1ccda747422039f6e7fb4a5f Mon Sep 17 00:00:00 2001 From: Andrew Lamb Date: Wed, 4 Mar 2026 15:53:31 -0500 Subject: [PATCH 2/3] Add comments --- datafusion/functions/src/datetime/date_part.rs | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/datafusion/functions/src/datetime/date_part.rs b/datafusion/functions/src/datetime/date_part.rs index 86e35d3b0ad26..f3d676f11d54e 100644 --- a/datafusion/functions/src/datetime/date_part.rs +++ b/datafusion/functions/src/datetime/date_part.rs @@ -168,6 +168,7 @@ impl ScalarUDFImpl for DatePartFunc { if is_epoch(part) { Field::new(self.name(), DataType::Float64, nullable) } else if is_nanosecond(part) { + // See notes on [seconds_ns] for rationale Field::new(self.name(), DataType::Int64, nullable) } else { Field::new(self.name(), DataType::Int32, nullable) @@ -528,6 +529,10 @@ fn epoch(array: &dyn Array) -> Result { /// Invoke [`date_part`] on an `array` (e.g. Timestamp) and convert the /// result to a total number of nanoseconds as an Int64 array. +/// +/// This returns an Int64 rather than Int32 because there 1 billion +/// `nanosecond`s in each second, so representing up to 60 seconds as +/// nanoseconds can be values up to 60 billion, which does not fit in Int32. fn seconds_ns(array: &dyn Array) -> Result { let secs = date_part(array, DatePart::Second)?; // This assumes array is primitive and not a dictionary From ffcb8ce4cdc67e5c5dab239f1ed3589bb96602d0 Mon Sep 17 00:00:00 2001 From: Andrew Lamb Date: Wed, 4 Mar 2026 15:53:39 -0500 Subject: [PATCH 3/3] Add tests for nulls/comments --- .../test_files/datetime/date_part.slt | 27 +++++++++++++++++++ 1 file changed, 27 insertions(+) diff --git a/datafusion/sqllogictest/test_files/datetime/date_part.slt b/datafusion/sqllogictest/test_files/datetime/date_part.slt index 4c73ce6bbbf07..07dc1302b9ece 100644 --- a/datafusion/sqllogictest/test_files/datetime/date_part.slt +++ b/datafusion/sqllogictest/test_files/datetime/date_part.slt @@ -449,6 +449,12 @@ SELECT arrow_typeof(date_part('minute', to_timestamp('2020-09-08T12:12:00+00:00' ---- Int32 +# nanosecond can exceed Int32 and returns Int64 +query T +SELECT arrow_typeof(date_part('nanosecond', to_timestamp('2020-09-08T12:12:00+00:00'))) +---- +Int64 + query I SELECT EXTRACT(second FROM timestamp '2020-09-08T12:00:12.12345678+00:00') ---- @@ -464,6 +470,7 @@ SELECT EXTRACT(microsecond FROM timestamp '2020-09-08T12:00:12.12345678+00:00') ---- 12123456 +# note the output is more than Int32 can store query I SELECT EXTRACT(nanosecond FROM timestamp '2020-09-08T12:00:12.12345678+00:00') ---- @@ -550,6 +557,26 @@ SELECT date_part('nanosecond', '2020-09-08T12:00:12.12345678+00:00') ---- 12123456780 +query I +SELECT EXTRACT(nanosecond FROM ts) +FROM (VALUES + (timestamp '2020-09-08T12:00:12.12345678+00:00'), + (NULL::timestamp) +) AS t(ts) +---- +12123456780 +NULL + +query I +SELECT date_part('nanosecond', ts) +FROM (VALUES + (timestamp '2020-09-08T12:00:12.12345678+00:00'), + (NULL::timestamp) +) AS t(ts) +---- +12123456780 +NULL + # test_date_part_time ## time32 seconds