Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
49 changes: 47 additions & 2 deletions datafusion/functions/src/datetime/date_part.rs
Original file line number Diff line number Diff line change
Expand Up @@ -20,7 +20,7 @@ use std::str::FromStr;
use std::sync::Arc;

use arrow::array::timezone::Tz;
use arrow::array::{Array, ArrayRef, Float64Array, Int32Array};
use arrow::array::{Array, ArrayRef, Float64Array, Int32Array, Int64Array};
use arrow::compute::kernels::cast_utils::IntervalUnit;
use arrow::compute::{DatePart, binary, date_part};
use arrow::datatypes::DataType::{
Expand Down Expand Up @@ -167,6 +167,9 @@ impl ScalarUDFImpl for DatePartFunc {
.map(|part| {
if is_epoch(part) {
Field::new(self.name(), DataType::Float64, nullable)
} else if is_nanosecond(part) {
// See notes on [seconds_ns] for rationale
Field::new(self.name(), DataType::Int64, nullable)
} else {
Field::new(self.name(), DataType::Int32, nullable)
}
Expand Down Expand Up @@ -218,7 +221,7 @@ impl ScalarUDFImpl for DatePartFunc {
IntervalUnit::Second => seconds_as_i32(array.as_ref(), Second)?,
IntervalUnit::Millisecond => seconds_as_i32(array.as_ref(), Millisecond)?,
IntervalUnit::Microsecond => seconds_as_i32(array.as_ref(), Microsecond)?,
IntervalUnit::Nanosecond => seconds_as_i32(array.as_ref(), Nanosecond)?,
IntervalUnit::Nanosecond => seconds_ns(array.as_ref())?,
// century and decade are not supported by `DatePart`, although they are supported in postgres
_ => return exec_err!("Date part '{part}' not supported"),
}
Expand Down Expand Up @@ -321,6 +324,12 @@ fn is_epoch(part: &str) -> bool {
matches!(part.to_lowercase().as_str(), "epoch")
}

fn is_nanosecond(part: &str) -> bool {
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Is it worth adding a note that this is required due to the fact that nanosecond needs a different return type

IntervalUnit::from_str(part_normalization(part))
.map(|p| matches!(p, IntervalUnit::Nanosecond))
.unwrap_or(false)
}

fn date_to_scalar(date: NaiveDate, target_type: &DataType) -> Option<ScalarValue> {
Some(match target_type {
Date32 => ScalarValue::Date32(Some(Date32Type::from_naive_date(date))),
Expand Down Expand Up @@ -517,3 +526,39 @@ fn epoch(array: &dyn Array) -> Result<ArrayRef> {
};
Ok(Arc::new(f))
}

/// Invoke [`date_part`] on an `array` (e.g. Timestamp) and convert the
/// result to a total number of nanoseconds as an Int64 array.
///
/// This returns an Int64 rather than Int32 because there 1 billion
/// `nanosecond`s in each second, so representing up to 60 seconds as
/// nanoseconds can be values up to 60 billion, which does not fit in Int32.
fn seconds_ns(array: &dyn Array) -> Result<ArrayRef> {
let secs = date_part(array, DatePart::Second)?;
// This assumes array is primitive and not a dictionary
let secs = as_int32_array(secs.as_ref())?;
let subsecs = date_part(array, DatePart::Nanosecond)?;
let subsecs = as_int32_array(subsecs.as_ref())?;

// Special case where there are no nulls.
if subsecs.null_count() == 0 {
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Would it be safer to also check sec.nulls too? It does feel like any nulls in subsecs would also be in secs and vica versa

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I re-reviewed the code and since they both come from calling arrow date_part I don't think this is necessary

let r: Int64Array = binary(secs, subsecs, |secs, subsecs| {
(secs as i64) * 1_000_000_000 + (subsecs as i64)
})?;
Ok(Arc::new(r))
} else {
// Nulls in secs are preserved, nulls in subsecs are treated as zero to account for the case
// where the number of nanoseconds overflows.
let r: Int64Array = secs
.iter()
.zip(subsecs)
.map(|(secs, subsecs)| {
secs.map(|secs| {
let subsecs = subsecs.unwrap_or(0);
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I didn't see any test coverage of nulls 🤔 -- maybe we can add some

(secs as i64) * 1_000_000_000 + (subsecs as i64)
})
})
.collect();
Ok(Arc::new(r))
}
}
92 changes: 77 additions & 15 deletions datafusion/sqllogictest/test_files/datetime/date_part.slt
Original file line number Diff line number Diff line change
Expand Up @@ -212,6 +212,22 @@ SELECT date_part('microsecond', ts_nano_no_tz), date_part('microsecond', ts_nano
123456 123456 123456 123000 123000 123000
123456 123456 123456 123000 123000 123000

# date_part (nanosecond)
query IIIIII
SELECT date_part('nanosecond', ts_nano_no_tz), date_part('nanosecond', ts_nano_utc), date_part('nanosecond', ts_nano_eastern), date_part('nanosecond', ts_milli_no_tz), date_part('nanosecond', ts_milli_utc), date_part('nanosecond', ts_milli_eastern) FROM source_ts;
----
0 0 0 0 0 0
0 0 0 0 0 0
0 0 0 0 0 0
0 0 0 0 0 0
0 0 0 0 0 0
0 0 0 0 0 0
0 0 0 0 0 0
30000000000 30000000000 30000000000 30000000000 30000000000 30000000000
123000000 123000000 123000000 123000000 123000000 123000000
123456000 123456000 123456000 123000000 123000000 123000000
123456789 123456789 123456789 123000000 123000000 123000000

### Cleanup
statement ok
drop table source_ts;
Expand Down Expand Up @@ -433,6 +449,12 @@ SELECT arrow_typeof(date_part('minute', to_timestamp('2020-09-08T12:12:00+00:00'
----
Int32

# nanosecond can exceed Int32 and returns Int64
query T
SELECT arrow_typeof(date_part('nanosecond', to_timestamp('2020-09-08T12:12:00+00:00')))
----
Int64

query I
SELECT EXTRACT(second FROM timestamp '2020-09-08T12:00:12.12345678+00:00')
----
Expand All @@ -448,8 +470,11 @@ SELECT EXTRACT(microsecond FROM timestamp '2020-09-08T12:00:12.12345678+00:00')
----
12123456

query error DataFusion error: This feature is not implemented: Date part Nanosecond not supported
# note the output is more than Int32 can store
query I
SELECT EXTRACT(nanosecond FROM timestamp '2020-09-08T12:00:12.12345678+00:00')
----
12123456780

query I
SELECT EXTRACT("second" FROM timestamp '2020-09-08T12:00:12.12345678+00:00')
Expand All @@ -466,8 +491,10 @@ SELECT EXTRACT("microsecond" FROM timestamp '2020-09-08T12:00:12.12345678+00:00'
----
12123456

query error DataFusion error: This feature is not implemented: Date part Nanosecond not supported
query I
SELECT EXTRACT("nanosecond" FROM timestamp '2020-09-08T12:00:12.12345678+00:00')
----
12123456780

query I
SELECT EXTRACT('second' FROM timestamp '2020-09-08T12:00:12.12345678+00:00')
Expand All @@ -484,9 +511,10 @@ SELECT EXTRACT('microsecond' FROM timestamp '2020-09-08T12:00:12.12345678+00:00'
----
12123456

query error DataFusion error: This feature is not implemented: Date part Nanosecond not supported
query I
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

nice

SELECT EXTRACT('nanosecond' FROM timestamp '2020-09-08T12:00:12.12345678+00:00')

----
12123456780

# Keep precision when coercing Utf8 to Timestamp
query I
Expand All @@ -504,9 +532,10 @@ SELECT date_part('microsecond', timestamp '2020-09-08T12:00:12.12345678+00:00')
----
12123456

query error DataFusion error: This feature is not implemented: Date part Nanosecond not supported
query I
SELECT date_part('nanosecond', timestamp '2020-09-08T12:00:12.12345678+00:00')

----
12123456780

query I
SELECT date_part('second', '2020-09-08T12:00:12.12345678+00:00')
Expand All @@ -523,8 +552,30 @@ SELECT date_part('microsecond', '2020-09-08T12:00:12.12345678+00:00')
----
12123456

query error DataFusion error: This feature is not implemented: Date part Nanosecond not supported
query I
SELECT date_part('nanosecond', '2020-09-08T12:00:12.12345678+00:00')
----
12123456780

query I
SELECT EXTRACT(nanosecond FROM ts)
FROM (VALUES
(timestamp '2020-09-08T12:00:12.12345678+00:00'),
(NULL::timestamp)
) AS t(ts)
----
12123456780
NULL

query I
SELECT date_part('nanosecond', ts)
FROM (VALUES
(timestamp '2020-09-08T12:00:12.12345678+00:00'),
(NULL::timestamp)
) AS t(ts)
----
12123456780
NULL

# test_date_part_time

Expand Down Expand Up @@ -579,8 +630,10 @@ SELECT extract(microsecond from arrow_cast('23:32:50'::time, 'Time32(Second)'))
----
50000000

query error DataFusion error: This feature is not implemented: Date part Nanosecond not supported
query I
SELECT extract(nanosecond from arrow_cast('23:32:50'::time, 'Time32(Second)'))
----
50000000000

query R
SELECT date_part('epoch', arrow_cast('23:32:50'::time, 'Time32(Second)'))
Expand Down Expand Up @@ -643,8 +696,10 @@ SELECT extract(microsecond from arrow_cast('23:32:50.123'::time, 'Time32(Millise
----
50123000

query error DataFusion error: This feature is not implemented: Date part Nanosecond not supported
query I
SELECT extract(nanosecond from arrow_cast('23:32:50.123'::time, 'Time32(Millisecond)'))
----
50123000000

query R
SELECT date_part('epoch', arrow_cast('23:32:50.123'::time, 'Time32(Millisecond)'))
Expand Down Expand Up @@ -707,8 +762,10 @@ SELECT extract(microsecond from arrow_cast('23:32:50.123456'::time, 'Time64(Micr
----
50123456

query error DataFusion error: This feature is not implemented: Date part Nanosecond not supported
query I
SELECT extract(nanosecond from arrow_cast('23:32:50.123456'::time, 'Time64(Microsecond)'))
----
50123456000

query R
SELECT date_part('epoch', arrow_cast('23:32:50.123456'::time, 'Time64(Microsecond)'))
Expand Down Expand Up @@ -797,8 +854,10 @@ SELECT extract(us from arrow_cast('23:32:50.123456789'::time, 'Time64(Nanosecond
----
50123456

query error DataFusion error: This feature is not implemented: Date part Nanosecond not supported
query I
SELECT date_part('nanosecond', arrow_cast('23:32:50.123456789'::time, 'Time64(Nanosecond)'))
----
50123456789

query R
SELECT date_part('epoch', arrow_cast('23:32:50.123456789'::time, 'Time64(Nanosecond)'))
Expand Down Expand Up @@ -1172,8 +1231,11 @@ SELECT (date_part('microsecond', now()) = EXTRACT(microsecond FROM now()))
----
true

query error DataFusion error: This feature is not implemented: Date part Nanosecond not supported
query B
SELECT (date_part('nanosecond', now()) = EXTRACT(nanosecond FROM now()))
----
true


query I
SELECT date_part('ISODOW', CAST('2000-01-01' AS DATE))
Expand Down Expand Up @@ -1211,7 +1273,7 @@ query D
select c from t1 where extract(year from c) <> 2024;
----
1990-01-01
2030-01-01
2030-01-01

query D
select c from t1 where extract(year from c) > 2024;
Expand Down Expand Up @@ -1495,7 +1557,7 @@ query TT
explain select c from t1 where extract (nanosecond from c) = 2024
----
logical_plan
01)Filter: date_part(Utf8("NANOSECOND"), t1.c) = Int32(2024)
01)Filter: date_part(Utf8("NANOSECOND"), t1.c) = Int64(2024)
02)--TableScan: t1 projection=[c]
physical_plan
01)FilterExec: date_part(NANOSECOND, c@0) = 2024
Expand Down Expand Up @@ -1582,7 +1644,7 @@ query D
select c1_date32 from t2 where extract(year from c1_date32) <> 2024;
----
1990-05-20
2030-12-31
2030-12-31

query P
select c2_ts_sec from t2 where extract(year from c2_ts_sec) > 2024;
Expand Down