Skip to content

Commit cd0db28

Browse files
authored
refactor(query): decrease call tzdb parse (#19004)
* optimize: decrease call tzdb parse * feat(query): cache LUTs for fast datetime math - add the databend-common-timezone crate and bump jiff to 0.2.16 so the workspace shares a single LUT-backed converter - use the new helpers inside IO parsing, expression utilities, timestamp scalars, and age() to preserve offsets/DST behaviour even for pre-1970 dates - expand SQLLogic/unit coverage plus add a datetime_fast_path bench
1 parent 783fef7 commit cd0db28

File tree

16 files changed

+1275
-110
lines changed

16 files changed

+1275
-110
lines changed

Cargo.lock

Lines changed: 17 additions & 6 deletions
Some generated files are not rendered by default. Learn more about customizing how changed files appear on GitHub.

Cargo.toml

Lines changed: 3 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -27,6 +27,7 @@ members = [
2727
"src/common/tracing",
2828
"src/common/storage",
2929
"src/common/vector",
30+
"src/common/timezone",
3031
"src/common/license",
3132
"src/common/version",
3233
"src/query/ast",
@@ -175,6 +176,7 @@ databend-common-storages-stage = { path = "src/query/storages/stage" }
175176
databend-common-storages-stream = { path = "src/query/storages/stream" }
176177
databend-common-storages-system = { path = "src/query/storages/system" }
177178
databend-common-telemetry = { path = "src/common/telemetry" }
179+
databend-common-timezone = { path = "src/common/timezone" }
178180
databend-common-tracing = { path = "src/common/tracing" }
179181
databend-common-users = { path = "src/query/users" }
180182
databend-common-vector = { path = "src/common/vector" }
@@ -352,7 +354,7 @@ jaq-core = "1.5.1"
352354
jaq-interpret = "1.5.0"
353355
jaq-parse = "1.0.3"
354356
jaq-std = "1.6.0"
355-
jiff = { version = "0.2.10", features = ["serde", "tzdb-bundle-always"] }
357+
jiff = { version = "0.2.16", features = ["serde", "tzdb-bundle-always"] }
356358
jsonb = "0.5.5"
357359
jwt-simple = { version = "0.12.10", default-features = false, features = ["pure-rust"] }
358360
lenient_semver = "0.4.2"

src/common/io/Cargo.toml

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -14,6 +14,7 @@ chrono = { workspace = true }
1414
chrono-tz = { workspace = true }
1515
databend-common-base = { workspace = true }
1616
databend-common-exception = { workspace = true }
17+
databend-common-timezone = { workspace = true }
1718
enquote = { workspace = true }
1819
enumflags2 = { workspace = true }
1920
ethnum = { workspace = true }

src/common/io/src/cursor_ext/cursor_read_datetime_ext.rs

Lines changed: 50 additions & 55 deletions
Original file line numberDiff line numberDiff line change
@@ -26,7 +26,7 @@ use chrono_tz::Tz;
2626
use databend_common_exception::ErrorCode;
2727
use databend_common_exception::Result;
2828
use databend_common_exception::ToErrorCode;
29-
use jiff::civil::date;
29+
use databend_common_timezone::fast_utc_from_local;
3030
use jiff::civil::Date;
3131
use jiff::tz::Offset;
3232
use jiff::tz::TimeZone;
@@ -45,7 +45,6 @@ pub trait BufferReadDateTimeExt {
4545
fn read_timestamp_text(&mut self, tz: &TimeZone) -> Result<DateTimeResType>;
4646
fn parse_time_offset(
4747
&mut self,
48-
tz: &TimeZone,
4948
buf: &mut Vec<u8>,
5049
dt: &Zoned,
5150
west_tz: bool,
@@ -87,14 +86,12 @@ where T: AsRef<[u8]>
8786
// Only support HH:mm format
8887
fn parse_time_offset(
8988
&mut self,
90-
tz: &TimeZone,
9189
buf: &mut Vec<u8>,
9290
dt: &Zoned,
9391
west_tz: bool,
9492
calc_offset: impl Fn(i64, i64, &Zoned) -> Result<Zoned>,
9593
) -> Result<Zoned> {
9694
fn get_hour_minute_offset(
97-
tz: &TimeZone,
9895
dt: &Zoned,
9996
west_tz: bool,
10097
calc_offset: &impl Fn(i64, i64, &Zoned) -> Result<Zoned>,
@@ -104,24 +101,14 @@ where T: AsRef<[u8]>
104101
if (hour_offset == 14 && minute_offset == 0)
105102
|| ((0..60).contains(&minute_offset) && hour_offset < 14)
106103
{
107-
if dt.year() < 1970 {
108-
Ok(date(1970, 1, 1)
109-
.at(0, 0, 0, 0)
110-
.to_zoned(tz.clone())
111-
.map_err_to_code(ErrorCode::BadBytes, || format!("dt parse error"))?)
112-
} else {
113-
let current_tz_sec = dt.offset().seconds();
114-
let mut val_tz_sec =
115-
Offset::from_seconds(hour_offset * 3600 + minute_offset * 60)
116-
.map_err_to_code(ErrorCode::BadBytes, || {
117-
"calc offset failed.".to_string()
118-
})?
119-
.seconds();
120-
if west_tz {
121-
val_tz_sec = -val_tz_sec;
122-
}
123-
calc_offset(current_tz_sec.into(), val_tz_sec.into(), dt)
104+
let current_tz_sec = dt.offset().seconds();
105+
let mut val_tz_sec = Offset::from_seconds(hour_offset * 3600 + minute_offset * 60)
106+
.map_err_to_code(ErrorCode::BadBytes, || "calc offset failed.".to_string())?
107+
.seconds();
108+
if west_tz {
109+
val_tz_sec = -val_tz_sec;
124110
}
111+
calc_offset(current_tz_sec.into(), val_tz_sec.into(), dt)
125112
} else {
126113
Err(ErrorCode::BadBytes(format!(
127114
"Invalid Timezone Offset: The minute offset '{}' is outside the valid range. Expected range is [00-59] within a timezone gap of [-14:00, +14:00]",
@@ -146,16 +133,9 @@ where T: AsRef<[u8]>
146133
let minute_offset: i32 =
147134
lexical_core::FromLexical::from_lexical(buf.as_slice()).map_err_to_code(ErrorCode::BadBytes, || "minute offset parse error".to_string())?;
148135
// max utc: 14:00, min utc: 00:00
149-
get_hour_minute_offset(
150-
tz,
151-
dt,
152-
west_tz,
153-
&calc_offset,
154-
hour_offset,
155-
minute_offset,
156-
)
136+
get_hour_minute_offset(dt, west_tz, &calc_offset, hour_offset, minute_offset)
157137
} else {
158-
get_hour_minute_offset(tz, dt, west_tz, &calc_offset, hour_offset, 0)
138+
get_hour_minute_offset(dt, west_tz, &calc_offset, hour_offset, 0)
159139
}
160140
} else {
161141
Err(ErrorCode::BadBytes(format!(
@@ -174,14 +154,7 @@ where T: AsRef<[u8]>
174154
buf.clear();
175155
// max utc: 14:00, min utc: 00:00
176156
if (0..15).contains(&hour_offset) {
177-
get_hour_minute_offset(
178-
tz,
179-
dt,
180-
west_tz,
181-
&calc_offset,
182-
hour_offset,
183-
minute_offset,
184-
)
157+
get_hour_minute_offset(dt, west_tz, &calc_offset, hour_offset, minute_offset)
185158
} else {
186159
Err(ErrorCode::BadBytes(format!(
187160
"Invalid Timezone Offset: The hour offset '{}' is outside the valid range. Expected range is [00-14] within a timezone gap of [-14:00, +14:00]",
@@ -279,13 +252,9 @@ where T: AsRef<[u8]>
279252
buf.clear();
280253
let calc_offset = |current_tz_sec: i64, val_tz_sec: i64, dt: &Zoned| {
281254
let offset = (current_tz_sec - val_tz_sec) * 1000 * 1000;
282-
let mut ts = dt.timestamp().as_microsecond();
283-
ts += offset;
284-
let (mut secs, mut micros) = (ts / 1_000_000, ts % 1_000_000);
285-
if ts < 0 {
286-
secs -= 1;
287-
micros += 1_000_000;
288-
}
255+
let ts = dt.timestamp().as_microsecond() + offset;
256+
let secs = ts.div_euclid(1_000_000);
257+
let micros = ts.rem_euclid(1_000_000);
289258
Ok(Timestamp::new(secs, (micros as i32) * 1000)
290259
.map_err_to_code(ErrorCode::BadBytes, || {
291260
format!("Datetime {} add offset {} with error", dt, offset)
@@ -302,15 +271,13 @@ where T: AsRef<[u8]>
302271
)?))
303272
} else if self.ignore_byte(b'+') {
304273
Ok(DateTimeResType::Datetime(self.parse_time_offset(
305-
tz,
306274
&mut buf,
307275
&dt,
308276
false,
309277
calc_offset,
310278
)?))
311279
} else if self.ignore_byte(b'-') {
312280
Ok(DateTimeResType::Datetime(self.parse_time_offset(
313-
tz,
314281
&mut buf,
315282
&dt,
316283
true,
@@ -324,6 +291,8 @@ where T: AsRef<[u8]>
324291
// only date part
325292
if need_date {
326293
Ok(DateTimeResType::Date(d))
294+
} else if let Some(zoned) = fast_local_to_zoned(tz, &d, 0, 0, 0, 0) {
295+
Ok(DateTimeResType::Datetime(zoned))
327296
} else {
328297
Ok(DateTimeResType::Datetime(
329298
d.to_zoned(tz.clone())
@@ -336,15 +305,41 @@ where T: AsRef<[u8]>
336305
}
337306
}
338307

339-
// Can not directly unwrap, because of DST.
340-
// e.g.
341-
// set timezone='Europe/London';
342-
// -- if unwrap() will cause session panic.
343-
// -- https://github.com/chronotope/chrono/blob/v0.4.24/src/offset/mod.rs#L186
344-
// select to_date(to_timestamp('2021-03-28 01:00:00'));
345-
// Now add a setting enable_dst_hour_fix to control this behavior. If true, try to add a hour.
308+
/// Convert a local civil time into a `Zoned` instant by first attempting the
309+
/// LUT-based `fast_utc_from_local`. When the LUT cannot represent the request
310+
/// (e.g. outside 1900–2299 or in a DST gap), fall back to Jiff's slower but
311+
/// fully general conversion. The behavior mirrors ClickHouse/Jiff: gaps return
312+
/// `None`, folds prefer the later instant.
313+
fn fast_local_to_zoned(
314+
tz: &TimeZone,
315+
date: &Date,
316+
hour: u8,
317+
minute: u8,
318+
second: u8,
319+
micro: u32,
320+
) -> Option<Zoned> {
321+
let micros = fast_utc_from_local(
322+
tz,
323+
i32::from(date.year()),
324+
date.month() as u8,
325+
date.day() as u8,
326+
hour,
327+
minute,
328+
second,
329+
micro,
330+
)?;
331+
let ts = Timestamp::from_microsecond(micros).ok()?;
332+
Some(ts.to_zoned(tz.clone()))
333+
}
334+
346335
fn get_local_time(tz: &TimeZone, d: &Date, times: &mut Vec<u32>) -> Result<Zoned> {
347-
d.at(times[0] as i8, times[1] as i8, times[2] as i8, 0)
336+
let hour = times[0] as u8;
337+
let minute = times[1] as u8;
338+
let second = times[2] as u8;
339+
if let Some(zoned) = fast_local_to_zoned(tz, d, hour, minute, second, 0) {
340+
return Ok(zoned);
341+
}
342+
d.at(hour as i8, minute as i8, second as i8, 0)
348343
.to_zoned(tz.clone())
349344
.map_err_to_code(ErrorCode::BadBytes, || {
350345
format!("Invalid time provided in times: {:?}", times)

src/common/io/tests/it/cursor_ext/read_datetime_ext.rs

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -53,7 +53,7 @@ fn test_read_timestamp_text() -> Result<()> {
5353
"2020-01-01T11:11:11.123+00:00[UTC]",
5454
"2055-02-03T02:00:20.234+00:00[UTC]",
5555
"2055-02-03T18:00:20.234+00:00[UTC]",
56-
"1970-01-01T00:00:00+00:00[UTC]",
56+
"1022-05-15T19:25:02+00:00[UTC]",
5757
];
5858
let mut res = vec![];
5959
for _ in 0..expected.len() {
@@ -123,7 +123,7 @@ fn test_read_date_text() -> Result<()> {
123123
"2020-01-01",
124124
"2055-02-03",
125125
"2055-02-03",
126-
"1970-01-01",
126+
"1022-05-15",
127127
"2055-01-01",
128128
];
129129

src/common/timezone/Cargo.toml

Lines changed: 16 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,16 @@
1+
[package]
2+
name = "databend-common-timezone"
3+
version = { workspace = true }
4+
authors = { workspace = true }
5+
license = { workspace = true }
6+
publish = { workspace = true }
7+
edition = { workspace = true }
8+
9+
[dependencies]
10+
jiff = { workspace = true }
11+
12+
[dev-dependencies]
13+
rand = { workspace = true }
14+
15+
[lints]
16+
workspace = true

0 commit comments

Comments
 (0)