Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Original file line number Diff line number Diff line change
Expand Up @@ -233,6 +233,7 @@ object QueryPlanSerde extends Logging with CometExprShim with CometTypeShim {
classOf[FromUnixTime] -> CometFromUnixTime,
classOf[FromUTCTimestamp] -> CometFromUTCTimestamp,
classOf[ToUTCTimestamp] -> CometToUTCTimestamp,
classOf[GetTimestamp] -> CometGetTimestamp,
classOf[LastDay] -> CometLastDay,
classOf[Hour] -> CometHour,
classOf[MakeDate] -> CometMakeDate,
Expand Down
4 changes: 3 additions & 1 deletion spark/src/main/scala/org/apache/comet/serde/datetime.scala
Original file line number Diff line number Diff line change
Expand Up @@ -21,7 +21,7 @@ package org.apache.comet.serde

import java.util.Locale

import org.apache.spark.sql.catalyst.expressions.{AddMonths, Attribute, ConvertTimezone, DateAdd, DateDiff, DateFormatClass, DateFromUnixDate, DateSub, DayOfMonth, DayOfWeek, DayOfYear, Days, FromUTCTimestamp, GetDateField, Hour, Hours, LastDay, Literal, MakeDate, MakeTimestamp, MicrosToTimestamp, MillisToTimestamp, Minute, Month, MonthsBetween, NextDay, Quarter, Second, SecondsToTimestamp, ToUnixTimestamp, ToUTCTimestamp, TruncDate, TruncTimestamp, UnixDate, UnixMicros, UnixMillis, UnixSeconds, UnixTimestamp, WeekDay, WeekOfYear, Year}
import org.apache.spark.sql.catalyst.expressions.{AddMonths, Attribute, ConvertTimezone, DateAdd, DateDiff, DateFormatClass, DateFromUnixDate, DateSub, DayOfMonth, DayOfWeek, DayOfYear, Days, FromUTCTimestamp, GetDateField, GetTimestamp, Hour, Hours, LastDay, Literal, MakeDate, MakeTimestamp, MicrosToTimestamp, MillisToTimestamp, Minute, Month, MonthsBetween, NextDay, Quarter, Second, SecondsToTimestamp, ToUnixTimestamp, ToUTCTimestamp, TruncDate, TruncTimestamp, UnixDate, UnixMicros, UnixMillis, UnixSeconds, UnixTimestamp, WeekDay, WeekOfYear, Year}
import org.apache.spark.sql.internal.SQLConf
import org.apache.spark.sql.types.{DateType, DoubleType, FloatType, IntegerType, LongType, StringType, TimestampNTZType, TimestampType}
import org.apache.spark.unsafe.types.UTF8String
Expand Down Expand Up @@ -789,3 +789,5 @@ object CometUnixMillis extends CometCodegenDispatch[UnixMillis]
object CometUnixMicros extends CometCodegenDispatch[UnixMicros]

object CometToUnixTimestamp extends CometCodegenDispatch[ToUnixTimestamp]

object CometGetTimestamp extends CometCodegenDispatch[GetTimestamp]
Original file line number Diff line number Diff line change
@@ -0,0 +1,95 @@
-- Licensed to the Apache Software Foundation (ASF) under one
-- or more contributor license agreements. See the NOTICE file
-- distributed with this work for additional information
-- regarding copyright ownership. The ASF licenses this file
-- to you under the Apache License, Version 2.0 (the
-- "License"); you may not use this file except in compliance
-- with the License. You may obtain a copy of the License at
--
-- http://www.apache.org/licenses/LICENSE-2.0
--
-- Unless required by applicable law or agreed to in writing,
-- software distributed under the License is distributed on an
-- "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-- KIND, either express or implied. See the License for the
-- specific language governing permissions and limitations
-- under the License.

-- Routes GetTimestamp through the codegen dispatcher.
-- GetTimestamp is generated by to_timestamp(string, format), to_date(string, format)
-- and try_to_timestamp(string, format).
-- Config: spark.sql.session.timeZone=UTC
-- Config: spark.comet.exec.scalaUDF.codegen.enabled=true

statement
CREATE TABLE test_get_timestamp(s string) USING parquet

statement
INSERT INTO test_get_timestamp VALUES
('2024-06-15 10:30:45'),
('1970-01-01 00:00:00'),
('1969-12-31 23:59:59'),
('2024-13-01 00:00:00'),
('garbage'),
(''),
(NULL)

-- to_timestamp(string, format) -> GetTimestamp with TimestampType output
query
SELECT to_timestamp(s, 'yyyy-MM-dd HH:mm:ss') FROM test_get_timestamp

-- to_date(string, format) -> Cast(GetTimestamp(...), DateType)
query
SELECT to_date(s, 'yyyy-MM-dd HH:mm:ss') FROM test_get_timestamp

-- try_to_timestamp(string, format) -> GetTimestamp with failOnError=false
query
SELECT try_to_timestamp(s, 'yyyy-MM-dd HH:mm:ss') FROM test_get_timestamp

-- literal arguments
query
SELECT to_timestamp('2024-06-15 10:30:45', 'yyyy-MM-dd HH:mm:ss')

query
SELECT to_date('2024-06-15', 'yyyy-MM-dd')

query
SELECT try_to_timestamp('foo', 'yyyy-MM-dd')

query
SELECT to_timestamp(NULL, 'yyyy-MM-dd HH:mm:ss')

-- date-only format
statement
CREATE TABLE test_get_timestamp_dates(s string) USING parquet

statement
INSERT INTO test_get_timestamp_dates VALUES
('2024-06-15'),
('1970-01-01'),
(NULL)

query
SELECT to_timestamp(s, 'yyyy-MM-dd') FROM test_get_timestamp_dates

query
SELECT to_date(s, 'yyyy-MM-dd') FROM test_get_timestamp_dates

-- column-as-format (non-literal format)
statement
CREATE TABLE test_get_timestamp_fmt(s string, fmt string) USING parquet

statement
INSERT INTO test_get_timestamp_fmt VALUES
('2024-06-15 10:30:45', 'yyyy-MM-dd HH:mm:ss'),
('2024-06-15', 'yyyy-MM-dd'),
('06/15/2024', 'MM/dd/yyyy'),
(NULL, 'yyyy-MM-dd'),
('2024-06-15', NULL)

query
SELECT to_timestamp(s, fmt) FROM test_get_timestamp_fmt

-- to_timestamp_ntz(string, format) -> GetTimestamp with TimestampNTZType output
query
SELECT to_timestamp_ntz(s, 'yyyy-MM-dd HH:mm:ss') FROM test_get_timestamp
Original file line number Diff line number Diff line change
@@ -0,0 +1,46 @@
-- Licensed to the Apache Software Foundation (ASF) under one
-- or more contributor license agreements. See the NOTICE file
-- distributed with this work for additional information
-- regarding copyright ownership. The ASF licenses this file
-- to you under the Apache License, Version 2.0 (the
-- "License"); you may not use this file except in compliance
-- with the License. You may obtain a copy of the License at
--
-- http://www.apache.org/licenses/LICENSE-2.0
--
-- Unless required by applicable law or agreed to in writing,
-- software distributed under the License is distributed on an
-- "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-- KIND, either express or implied. See the License for the
-- specific language governing permissions and limitations
-- under the License.

-- ANSI mode: GetTimestamp throws on parse failure. The codegen dispatcher inherits
-- the throw from Spark's own GetTimestamp.doGenCode. The time parser policy is pinned
-- to CORRECTED so the JDK java.time formatter (and the CANNOT_PARSE_TIMESTAMP error class)
-- is exercised regardless of the runtime default.
-- Config: spark.sql.session.timeZone=UTC
-- Config: spark.sql.ansi.enabled=true
-- Config: spark.sql.legacy.timeParserPolicy=CORRECTED
-- Config: spark.comet.exec.scalaUDF.codegen.enabled=true
-- The CANNOT_PARSE_TIMESTAMP error class was standardized in Spark 3.5.
-- MinSparkVersion: 3.5

query expect_error(CANNOT_PARSE_TIMESTAMP)
SELECT to_timestamp('not a date', 'yyyy-MM-dd')

query expect_error(CANNOT_PARSE_TIMESTAMP)
SELECT to_timestamp('2024-13-99', 'yyyy-MM-dd')

query expect_error(CANNOT_PARSE_TIMESTAMP)
SELECT to_date('not a date', 'yyyy-MM-dd')

-- try_to_timestamp does NOT throw under ANSI mode (failOnError=false)
query
SELECT try_to_timestamp('not a date', 'yyyy-MM-dd')

-- Sentinel: confirms Comet ran the expression natively. If the dispatcher silently rejects
-- GetTimestamp, the error queries above pass vacuously via Spark fallback. This valid
-- query uses checkSparkAnswerAndOperator and fails if Comet did not execute it natively.
query
SELECT to_timestamp('2024-06-15 10:30:45', 'yyyy-MM-dd HH:mm:ss')
Loading