From b714781148b2005d20c38a7d6547f43a860c8a36 Mon Sep 17 00:00:00 2001
From: Andy Grove <agrove@apache.org>
Date: Wed, 27 May 2026 08:46:19 -0600
Subject: [PATCH 1/4] docs: audit date/time expressions across Spark 3.4.3,
 3.5.8, 4.0.1

Add per-version audit sub-bullets to every implemented date/time
expression in spark_expressions_support.md using the
audit-comet-expression skill. Covers 38 SQL function names across the
33 backing Comet serde objects (some serdes back multiple SQL names,
e.g. day/dayofmonth, date_add/dateadd, date_diff/datediff).

For each function, the sub-bullets record:
- Whether the Spark class is identical across 3.4.3, 3.5.8, 4.0.1
- Spark 4.0 changes (universally the NullIntolerant trait /
  nullIntolerant: Boolean refactor, plus StringTypeWithCollation
  widening on string inputs and some error-helper renames)
- Known divergences between Comet and Spark, with tracking-issue links

The audit was driven by 8 parallel agents, each handling a related
group of expressions (codegen-dispatched, date field extractors,
Hour/Minute/Second, scalar function wrappers, timezone/unix,
truncation, format, Iceberg transforms).

Out of scope: current_timezone, date_part, datepart, extract,
localtimestamp route through Spark optimizer rewrites or evaluate to
constants and do not have dedicated Comet serdes; days and hours
are V2 partition transforms with no SQL function name and so do not
appear in this section.
---
 .../spark_expressions_support.md              | 128 ++++++++++++++++++
 1 file changed, 128 insertions(+)

diff --git a/docs/source/contributor-guide/spark_expressions_support.md b/docs/source/contributor-guide/spark_expressions_support.md
index ccb816c668..c6337c0e41 100644
--- a/docs/source/contributor-guide/spark_expressions_support.md
+++ b/docs/source/contributor-guide/spark_expressions_support.md
@@ -215,86 +215,214 @@
 ### datetime_funcs
 
 - [x] add_months
+  - Spark 3.4.3 (audited 2026-05-27): identical to 3.5.8.
+  - Spark 3.5.8 (audited 2026-05-27): baseline. `inputTypes = Seq(DateType, IntegerType)`; returns `DateType`; codegen delegates to `DateTimeUtils.dateAddMonths`.
+  - Spark 4.0.1 (audited 2026-05-27): `NullIntolerant` trait replaced by `override def nullIntolerant: Boolean = true` on `AddMonthsBase`; behaviour and codegen unchanged.
 - [x] convert_timezone
+  - Spark 3.4.3 (audited 2026-05-27): identical to 3.5.8.
+  - Spark 3.5.8 (audited 2026-05-27): baseline. Ternary `(sourceTz, targetTz, sourceTs)`; `inputTypes = Seq(StringType, StringType, TimestampNTZType)`; delegates to `DateTimeUtils.convertTimestampNtzToAnotherTz`.
+  - Spark 4.0.1 (audited 2026-05-27): timezone `inputTypes` widened to `StringTypeWithCollation(supportsTrimCollation = true)`; behaviour unchanged for ASCII timezone strings.
+  - Known divergence: Comet composes `to_utc_timestamp` then `from_utc_timestamp` and its native timezone parser only accepts IANA zone IDs and `+HH:MM` offsets, so legacy forms like `GMT+1`, `UTC+1`, or three-letter abbreviations throw a native parse error at execution (https://github.com/apache/datafusion-comet/issues/2013).
 - [ ] curdate
 - [ ] current_date
 - [ ] current_time
 - [ ] current_timestamp
 - [x] current_timezone
 - [x] date_add
+  - Spark 3.4.3 (audited 2026-05-27): baseline. `(DateType, IntegerType|ShortType|ByteType) -> DateType`; `nullSafeEval` returns `startDays + d.intValue()` with Java int wrap-around; no ANSI branch.
+  - Spark 3.5.8 (audited 2026-05-27): identical to 3.4.3.
+  - Spark 4.0.1 (audited 2026-05-27): semantics unchanged; `NullIntolerant` trait replaced by `override def nullIntolerant: Boolean = true`.
 - [x] date_diff
+  - Spark 3.4.3 (audited 2026-05-27): baseline. `(DateType, DateType) -> IntegerType`; `nullSafeEval` is `endDays - startDays` with Java int wrap-around.
+  - Spark 3.5.8 (audited 2026-05-27): identical to 3.4.3.
+  - Spark 4.0.1 (audited 2026-05-27): semantics unchanged; `NullIntolerant` trait replaced by `override def nullIntolerant: Boolean = true`.
+  - Known divergence: the native impl uses non-wrapping `i32 -`, which would panic in debug builds on extreme inputs (Spark wraps); practically unreachable for date inputs.
 - [x] date_format
+  - Spark 3.4.3 (audited 2026-05-27): identical to 3.5.8.
+  - Spark 3.5.8 (audited 2026-05-27): baseline. `(TimestampType, StringType) -> StringType`; format string is parsed via `TimestampFormatter` (`DateTimeFormatter` under `CORRECTED` policy, `SimpleDateFormat` under `LEGACY` policy).
+  - Spark 4.0.1 (audited 2026-05-27): trait set updated to use `DefaultStringProducingExpression`; `nullIntolerant` becomes a field; format `inputTypes` widened to `StringTypeWithCollation(supportsTrimCollation = true)`. Behaviour unchanged for ASCII format strings.
+  - Known divergence: only a curated allow-list of `SimpleDateFormat` patterns runs natively (via DataFusion `to_char`). Non-UTC session timezones with a whitelisted format require `spark.comet.expr.dateFormat.allowIncompatible=true`. Non-literal formats, non-whitelisted formats, and the default disabled-codegen path route through Spark's `DateFormatClass.doGenCode` only when `spark.comet.exec.scalaUDF.codegen.enabled=true`; otherwise the operator falls back to Spark. `spark.sql.legacy.timeParserPolicy=LEGACY` is honoured only on the codegen-dispatch / Spark-fallback paths; the native allow-list assumes corrected semantics.
 - [x] date_from_unix_date
+  - Spark 3.4.3 (audited 2026-05-27): baseline. `IntegerType -> DateType`; `nullSafeEval` is the identity on days-since-epoch.
+  - Spark 3.5.8 (audited 2026-05-27): identical to 3.4.3.
+  - Spark 4.0.1 (audited 2026-05-27): semantics unchanged; `NullIntolerant` trait replaced by `override def nullIntolerant: Boolean = true`.
 - [x] date_part
 - [x] date_sub
+  - Spark 3.4.3 (audited 2026-05-27): baseline. Mirror of `DateAdd` (`startDays - d.intValue()`); same input types and wrap-around behaviour; no ANSI branch.
+  - Spark 3.5.8 (audited 2026-05-27): identical to 3.4.3.
+  - Spark 4.0.1 (audited 2026-05-27): semantics unchanged; `NullIntolerant` trait replaced by `override def nullIntolerant: Boolean = true`.
 - [x] date_trunc
+  - Spark 3.4.3 (audited 2026-05-27): identical to 3.5.8.
+  - Spark 3.5.8 (audited 2026-05-27): baseline. `inputTypes = Seq(StringType, TimestampType)`; format parsed by `parseTruncLevel` (case-insensitive) and supports `YEAR`/`YYYY`/`YY`, `QUARTER`, `MONTH`/`MM`/`MON`, `WEEK`, `DAY`/`DD`, `HOUR`, `MINUTE`, `SECOND`, `MILLISECOND`, `MICROSECOND`. Unknown levels return NULL. Truncation is `TimeZoneAware` and uses `zoneId` for day-and-coarser units.
+  - Spark 4.0.1 (audited 2026-05-27): format `inputTypes` widened to `StringTypeWithCollation(supportsTrimCollation = true)`; truncation semantics unchanged for ASCII format strings.
+  - Known divergence: Comet returns incorrect results in non-UTC session timezones for day-and-coarser units (https://github.com/apache/datafusion-comet/issues/2649); marked `Incompatible` when the resolved zone is not `UTC` / `Etc/UTC`. Non-literal and unsupported format strings raise a native execution error instead of returning NULL.
 - [x] dateadd
+  - Spark 3.4.3 (audited 2026-05-27): SQL alias for `date_add`; see that entry.
+  - Spark 3.5.8 (audited 2026-05-27): SQL alias for `date_add`; see that entry.
+  - Spark 4.0.1 (audited 2026-05-27): SQL alias for `date_add`; see that entry.
 - [x] datediff
+  - Spark 3.4.3 (audited 2026-05-27): SQL alias for `date_diff`; see that entry.
+  - Spark 3.5.8 (audited 2026-05-27): SQL alias for `date_diff`; see that entry.
+  - Spark 4.0.1 (audited 2026-05-27): SQL alias for `date_diff`; see that entry.
 - [x] datepart
 - [x] day
+  - Spark 3.4.3 (audited 2026-05-27): identical to 3.5.8.
+  - Spark 3.5.8 (audited 2026-05-27): baseline. SQL alias for `DayOfMonth`; delegates to `DateTimeUtils.getDayOfMonth` via `LocalDate.getDayOfMonth` (1..31).
+  - Spark 4.0.1 (audited 2026-05-27): identical semantics; `GetDateField` drops the `NullIntolerant` mixin in favour of `nullIntolerant: Boolean = true`.
 - [ ] dayname
 - [x] dayofmonth
+  - Spark 3.4.3 (audited 2026-05-27): identical to 3.5.8.
+  - Spark 3.5.8 (audited 2026-05-27): baseline. `DayOfMonth extends GetDateField`; delegates to `DateTimeUtils.getDayOfMonth` (1..31).
+  - Spark 4.0.1 (audited 2026-05-27): identical semantics; `GetDateField` drops the `NullIntolerant` mixin in favour of `nullIntolerant: Boolean = true`.
 - [x] dayofweek
+  - Spark 3.4.3 (audited 2026-05-27): identical to 3.5.8.
+  - Spark 3.5.8 (audited 2026-05-27): baseline. `DayOfWeek extends GetDateField`; returns 1..7 with Sunday=1 via `LocalDate.getDayOfWeek.plus(1).getValue`.
+  - Spark 4.0.1 (audited 2026-05-27): identical semantics; `GetDateField` drops the `NullIntolerant` mixin in favour of `nullIntolerant: Boolean = true`.
 - [x] dayofyear
+  - Spark 3.4.3 (audited 2026-05-27): identical to 3.5.8.
+  - Spark 3.5.8 (audited 2026-05-27): baseline. `DayOfYear extends GetDateField`; returns 1..366 via `LocalDate.getDayOfYear`.
+  - Spark 4.0.1 (audited 2026-05-27): identical semantics; `GetDateField` drops the `NullIntolerant` mixin in favour of `nullIntolerant: Boolean = true`.
 - [x] extract
 - [x] from_unixtime
+  - Spark 3.4.3 (audited 2026-05-27): identical to 3.5.8.
+  - Spark 3.5.8 (audited 2026-05-27): baseline. `BinaryExpression` with `inputTypes = Seq(LongType, StringType)`; returns `StringType`; uses session `zoneId` to format the resulting timestamp.
+  - Spark 4.0.1 (audited 2026-05-27): now `DefaultStringProducingExpression`; format `inputTypes` widened to `StringTypeWithCollation(supportsTrimCollation = true)`; `nullIntolerant` set via override instead of the `NullIntolerant` trait.
+  - Known divergence: Comet only honours the default format pattern `yyyy-MM-dd HH:mm:ss`; any other format falls back to Spark. Implemented by composing DataFusion's `from_unixtime` and `to_char`, so DataFusion's valid timestamp range differs from Spark (https://github.com/apache/datafusion/issues/16594) and Spark datetime patterns are not honoured even when supplied (https://github.com/apache/datafusion/issues/16577).
 - [x] from_utc_timestamp
   - Spark 3.4.3 (audited 2026-05-12): identical to 3.5.8.
   - Spark 3.5.8 (audited 2026-05-12): baseline.
   - Spark 4.0.1 (audited 2026-05-12): `inputTypes` widened to `StringTypeWithCollation`; behaviour unchanged for ASCII timezone strings.
   - Known divergence: Comet's native timezone parser does not accept Spark's legacy zone forms (`GMT+1`, `UTC+1`, three-letter abbreviations like `PST`). Such timezones throw a native parse error at execution.
 - [x] hour
+  - Spark 3.4.3 (audited 2026-05-27): identical to 3.5.8.
+  - Spark 3.5.8 (audited 2026-05-27): baseline. `case class Hour` extends `GetTimeField`.
+  - Spark 4.0.1 (audited 2026-05-27): `case class Hour` is unchanged; parent `GetTimeField` trait refactored to override `nullIntolerant: Boolean = true` instead of mixing in `NullIntolerant` (no behavioural change).
+  - Known divergence: for `TimestampNTZType` inputs Comet's native path applies session-timezone conversion (Spark treats `TIMESTAMP_NTZ` as wall-clock and ignores session timezone), so the returned hour can differ. Marked `Incompatible` and gated by `spark.comet.expr.allowIncompatible` (https://github.com/apache/datafusion-comet/issues/3180).
 - [x] last_day
+  - Spark 3.4.3 (audited 2026-05-27): baseline. `DateType -> DateType`; computes `DateTimeUtils.getLastDayOfMonth`; no ANSI branch.
+  - Spark 3.5.8 (audited 2026-05-27): identical to 3.4.3.
+  - Spark 4.0.1 (audited 2026-05-27): semantics unchanged; `NullIntolerant` trait replaced by `override def nullIntolerant: Boolean = true`.
 - [x] localtimestamp
 - [x] make_date
+  - Spark 3.4.3 (audited 2026-05-27): baseline. `(IntegerType, IntegerType, IntegerType) -> DateType`; under `spark.sql.ansi.enabled=true` invalid `(year, month, day)` throws `ansiDateTimeError`, else returns NULL. Documented valid year range is 1 to 9999.
+  - Spark 3.5.8 (audited 2026-05-27): identical to 3.4.3.
+  - Spark 4.0.1 (audited 2026-05-27): error helper renamed to `ansiDateTimeArgumentOutOfRange`; behaviour otherwise unchanged.
+  - Known divergence: `SparkMakeDate` in `native/spark-expr/src/datetime_funcs/make_date.rs` always returns NULL on invalid input and never raises, so Comet diverges from Spark when `spark.sql.ansi.enabled=true`. It also accepts year 0 and negative years (chrono's proleptic calendar) which Spark rejects.
 - [ ] make_dt_interval
 - [ ] make_interval
 - [ ] make_time
 - [x] make_timestamp
+  - Spark 3.4.3 (audited 2026-05-27): identical to 3.5.8.
+  - Spark 3.5.8 (audited 2026-05-27): baseline. Septenary `(year, month, day, hour, min, sec[, timezone])` with `sec: DecimalType(16,6)`; honours `spark.sql.ansi.enabled` (throws on invalid input, else NULL); timezone input is `StringType`; result type follows `spark.sql.timestampType`.
+  - Spark 4.0.1 (audited 2026-05-27): timezone input widened to `StringTypeWithCollation(supportsTrimCollation = true)`; ANSI error helpers renamed (`ansiDateTimeArgumentOutOfRange`, `invalidFractionOfSecondError(value)`); `NullIntolerant` trait replaced by `nullIntolerant: Boolean = true`; new sibling `TryMakeTimestamp` added but routed through a separate expression.
 - [ ] make_timestamp_ltz
 - [ ] make_timestamp_ntz
 - [ ] make_ym_interval
 - [x] minute
+  - Spark 3.4.3 (audited 2026-05-27): identical to 3.5.8.
+  - Spark 3.5.8 (audited 2026-05-27): baseline. `case class Minute` extends `GetTimeField`.
+  - Spark 4.0.1 (audited 2026-05-27): `case class Minute` is unchanged; parent `GetTimeField` trait refactored to override `nullIntolerant: Boolean = true` instead of mixing in `NullIntolerant` (no behavioural change).
+  - Known divergence: for `TimestampNTZType` inputs Comet's native path applies session-timezone conversion (Spark treats `TIMESTAMP_NTZ` as wall-clock and ignores session timezone), so the returned minute can differ. Marked `Incompatible` and gated by `spark.comet.expr.allowIncompatible` (https://github.com/apache/datafusion-comet/issues/3180).
 - [x] month
+  - Spark 3.4.3 (audited 2026-05-27): identical to 3.5.8.
+  - Spark 3.5.8 (audited 2026-05-27): baseline. `Month extends GetDateField`; delegates to `DateTimeUtils.getMonth` via `LocalDate.getMonthValue` (1..12).
+  - Spark 4.0.1 (audited 2026-05-27): identical semantics; `GetDateField` drops the `NullIntolerant` mixin in favour of `nullIntolerant: Boolean = true`.
 - [ ] monthname
 - [x] months_between
+  - Spark 3.4.3 (audited 2026-05-27): identical to 3.5.8.
+  - Spark 3.5.8 (audited 2026-05-27): baseline. Ternary `(date1: Timestamp, date2: Timestamp, roundOff: Boolean)` returning `DoubleType`; `TimeZoneAwareExpression`; codegen delegates to `DateTimeUtils.monthsBetween`.
+  - Spark 4.0.1 (audited 2026-05-27): `NullIntolerant` trait dropped in favour of `nullIntolerant: Boolean = true` override; signature and runtime behaviour unchanged.
 - [x] next_day
+  - Spark 3.4.3 (audited 2026-05-27): baseline. `(DateType, StringType) -> DateType`; under `spark.sql.ansi.enabled=true` an unrecognised `dayOfWeek` throws `ansiIllegalArgumentError`, else returns NULL. Allowed tokens come from `DateTimeUtils.getDayOfWeekFromString` (`SU/SUN/SUNDAY`, `MO/MON/MONDAY`, ...), case-insensitive via `Locale.ROOT`, no trimming.
+  - Spark 3.5.8 (audited 2026-05-27): identical to 3.4.3.
+  - Spark 4.0.1 (audited 2026-05-27): error type changed to `SparkIllegalArgumentException`; `inputTypes` now uses `StringTypeWithCollation(supportsTrimCollation = true)`.
+  - Known divergence: `datafusion-spark::SparkNextDay` returns NULL for malformed `dayOfWeek` regardless of `spark.sql.ansi.enabled`, so ANSI mode does not throw. It also `trim()`s the day-of-week argument before matching, so `' MO '` succeeds natively while Spark would treat it as invalid.
 - [ ] now
 - [x] quarter
+  - Spark 3.4.3 (audited 2026-05-27): identical to 3.5.8.
+  - Spark 3.5.8 (audited 2026-05-27): baseline. `Quarter extends GetDateField`; returns 1..4 via `IsoFields.QUARTER_OF_YEAR`.
+  - Spark 4.0.1 (audited 2026-05-27): identical semantics; `GetDateField` drops the `NullIntolerant` mixin in favour of `nullIntolerant: Boolean = true`.
 - [x] second
+  - Spark 3.4.3 (audited 2026-05-27): identical to 3.5.8.
+  - Spark 3.5.8 (audited 2026-05-27): baseline. `case class Second` extends `GetTimeField`.
+  - Spark 4.0.1 (audited 2026-05-27): `case class Second` is unchanged; parent `GetTimeField` trait refactored to override `nullIntolerant: Boolean = true` instead of mixing in `NullIntolerant` (no behavioural change).
+  - Known divergence: for `TimestampNTZType` inputs Comet's native path applies session-timezone conversion (Spark treats `TIMESTAMP_NTZ` as wall-clock and ignores session timezone), so the returned second can differ. Marked `Incompatible` and gated by `spark.comet.expr.allowIncompatible` (https://github.com/apache/datafusion-comet/issues/3180).
 - [ ] session_window
 - [ ] time_diff
 - [ ] time_trunc
 - [x] timestamp_micros
+  - Spark 3.4.3 (audited 2026-05-27): identical to 3.5.8.
+  - Spark 3.5.8 (audited 2026-05-27): baseline. `MicrosToTimestamp` extends `IntegralToTimestampBase` with `upScaleFactor = 1`; accepts `IntegralType`, returns `TimestampType`; codegen is identity.
+  - Spark 4.0.1 (audited 2026-05-27): `IntegralToTimestampBase` drops the `NullIntolerant` trait in favour of `nullIntolerant: Boolean = true`; behaviour unchanged.
 - [x] timestamp_millis
+  - Spark 3.4.3 (audited 2026-05-27): identical to 3.5.8.
+  - Spark 3.5.8 (audited 2026-05-27): baseline. `MillisToTimestamp` extends `IntegralToTimestampBase` with `upScaleFactor = MICROS_PER_MILLIS (1000)`; multiply overflow throws via `Math.multiplyExact`.
+  - Spark 4.0.1 (audited 2026-05-27): same as 3.5.8 modulo the `NullIntolerant` trait/method refactor in `IntegralToTimestampBase`.
 - [x] timestamp_seconds
+  - Spark 3.4.3 (audited 2026-05-27): identical to 3.5.8.
+  - Spark 3.5.8 (audited 2026-05-27): baseline. `inputTypes = Seq(NumericType)` accepting integral, decimal, float, and double; integral values use `Math.multiplyExact` (overflow throws); float and double return NULL on NaN or Infinity.
+  - Spark 4.0.1 (audited 2026-05-27): `nullIntolerant` set via override instead of the `NullIntolerant` trait; otherwise identical to 3.5.8.
+  - Known divergence: Comet's Rust impl supports only Int32, Int64, Float32, and Float64. `DecimalType`, `ByteType`, and `ShortType` fall back to Spark. Int64 overflow returns a `ComputeError` matching Spark's `ArithmeticException`. NaN and Infinity map to NULL on the float and double paths.
 - [ ] to_date
 - [ ] to_time
 - [ ] to_timestamp
 - [ ] to_timestamp_ltz
 - [ ] to_timestamp_ntz
 - [x] to_unix_timestamp
+  - Spark 3.4.3 (audited 2026-05-27): identical to 3.5.8.
+  - Spark 3.5.8 (audited 2026-05-27): baseline. `inputTypes = Seq(TypeCollection(StringType, DateType, TimestampType, TimestampNTZType), StringType)`; returns `LongType` seconds; honours `spark.sql.ansi.enabled` (throws on parse error, else NULL); `TimeZoneAwareExpression`.
+  - Spark 4.0.1 (audited 2026-05-27): both the value and the format argument become `StringTypeWithCollation(supportsTrimCollation = true)`; a new `suggestedFuncOnFail = "try_to_timestamp"` field is added on `ToTimestamp` (advisory).
+  - Known divergence: routed through the JVM codegen dispatcher rather than a native kernel, so behaviour is bit-identical to Spark only when `spark.comet.exec.scalaUDF.codegen.enabled=true`; when the flag is off the operator falls back to Spark.
 - [x] to_utc_timestamp
   - Spark 3.4.3 (audited 2026-05-12): identical to 3.5.8.
   - Spark 3.5.8 (audited 2026-05-12): baseline.
   - Spark 4.0.1 (audited 2026-05-12): `inputTypes` widened to `StringTypeWithCollation`; behaviour unchanged for ASCII timezone strings.
   - Known divergence: Comet's native timezone parser does not accept Spark's legacy zone forms (`GMT+1`, `UTC+1`, three-letter abbreviations like `PST`). Such timezones throw a native parse error at execution.
 - [x] trunc
+  - Spark 3.4.3 (audited 2026-05-27): identical to 3.5.8.
+  - Spark 3.5.8 (audited 2026-05-27): baseline. `inputTypes = Seq(DateType, StringType)`; `parseTruncLevel` is case-insensitive and accepts `YEAR`/`YYYY`/`YY`, `QUARTER`, `MONTH`/`MM`/`MON`, `WEEK`. Unknown or sub-week levels (`DAY`, `HOUR`, ...) return NULL because `MIN_LEVEL_OF_DATE_TRUNC` is `TRUNC_TO_WEEK`.
+  - Spark 4.0.1 (audited 2026-05-27): format `inputTypes` widened to `StringTypeWithCollation(supportsTrimCollation = true)`; truncation semantics unchanged for ASCII format strings.
+  - Known divergence: Comet's native kernel raises an execution error for unknown format strings instead of returning NULL, so non-literal formats are flagged `Incompatible`. Sub-week formats such as `DAY`/`DD` are rejected with `Unsupported` (Spark would return NULL) and fall back to Spark.
 - [ ] try_make_interval
 - [ ] try_make_timestamp
 - [ ] try_to_date
 - [ ] try_to_time
 - [ ] try_to_timestamp
 - [x] unix_date
+  - Spark 3.4.3 (audited 2026-05-27): identical to 3.5.8.
+  - Spark 3.5.8 (audited 2026-05-27): baseline. `UnixDate(child)`: `inputTypes = Seq(DateType)`, `dataType = IntegerType`; `nullSafeEval` returns the underlying days-since-epoch int unchanged.
+  - Spark 4.0.1 (audited 2026-05-27): semantics unchanged; `NullIntolerant` trait is replaced by `nullIntolerant: Boolean = true`.
 - [x] unix_micros
+  - Spark 3.4.3 (audited 2026-05-27): identical to 3.5.8.
+  - Spark 3.5.8 (audited 2026-05-27): baseline. `UnixMicros` extends `TimestampToLongBase` with `scaleFactor = 1`; codegen reduces to identity on the underlying micros.
+  - Spark 4.0.1 (audited 2026-05-27): same as 3.5.8 modulo the `NullIntolerant` trait/method refactor.
 - [x] unix_millis
+  - Spark 3.4.3 (audited 2026-05-27): identical to 3.5.8.
+  - Spark 3.5.8 (audited 2026-05-27): baseline. `UnixMillis` extends `TimestampToLongBase` with `scaleFactor = MICROS_PER_MILLIS`; floor-divides timestamp micros by 1000.
+  - Spark 4.0.1 (audited 2026-05-27): same as 3.5.8 modulo the `NullIntolerant` trait/method refactor.
 - [x] unix_seconds
+  - Spark 3.4.3 (audited 2026-05-27): identical to 3.5.8.
+  - Spark 3.5.8 (audited 2026-05-27): baseline. `UnixSeconds` extends `TimestampToLongBase` with `scaleFactor = MICROS_PER_SECOND`; accepts `TimestampType` only; returns `LongType`; floor-divides micros by the scale factor.
+  - Spark 4.0.1 (audited 2026-05-27): `TimestampToLongBase` swaps the `NullIntolerant` trait for `nullIntolerant: Boolean = true`; numerics unchanged.
 - [x] unix_timestamp
+  - Spark 3.4.3 (audited 2026-05-27): identical to 3.5.8.
+  - Spark 3.5.8 (audited 2026-05-27): baseline. Inherits from `UnixTime` / `ToTimestamp`; `inputTypes = Seq(TypeCollection(StringType, DateType, TimestampType, TimestampNTZType), StringType)`; result is `LongType`; honours `failOnError` for ANSI parse errors on the string path.
+  - Spark 4.0.1 (audited 2026-05-27): `ToTimestamp.inputTypes` widens the string slot to `StringTypeWithCollation(supportsTrimCollation = true)`; behaviour unchanged for non-collated strings.
+  - Known divergence: Comet's native path only accepts `TimestampType`, `DateType`, and `TimestampNTZType` (string inputs fall back to Spark). For `TimestampType` and `DateType` the session timezone is applied via `array_with_timezone`; for `TimestampNTZType` the microsecond value is divided directly without timezone adjustment.
 - [x] weekday
+  - Spark 3.4.3 (audited 2026-05-27): identical to 3.5.8.
+  - Spark 3.5.8 (audited 2026-05-27): baseline. `WeekDay extends GetDateField`; returns 0..6 with Monday=0 via `LocalDate.getDayOfWeek.ordinal()`.
+  - Spark 4.0.1 (audited 2026-05-27): identical semantics; `GetDateField` drops the `NullIntolerant` mixin in favour of `nullIntolerant: Boolean = true`.
 - [x] weekofyear
+  - Spark 3.4.3 (audited 2026-05-27): identical to 3.5.8.
+  - Spark 3.5.8 (audited 2026-05-27): baseline. `WeekOfYear extends GetDateField`; returns the ISO-8601 week-of-week-based-year via `IsoFields.WEEK_OF_WEEK_BASED_YEAR` (Monday start, week 1 has more than 3 days). Comet maps this to DataFusion's `datepart('week', ...)` which uses Arrow's `iso_week().week()`, matching Spark.
+  - Spark 4.0.1 (audited 2026-05-27): identical semantics; `GetDateField` drops the `NullIntolerant` mixin in favour of `nullIntolerant: Boolean = true`.
 - [ ] window
 - [ ] window_time
 - [x] year
+  - Spark 3.4.3 (audited 2026-05-27): identical to 3.5.8.
+  - Spark 3.5.8 (audited 2026-05-27): baseline. `Year extends GetDateField`; delegates to `DateTimeUtils.getYear` via `LocalDate.getYear`.
+  - Spark 4.0.1 (audited 2026-05-27): identical semantics; `GetDateField` drops the `NullIntolerant` mixin in favour of `nullIntolerant: Boolean = true`.
 
 ### generator_funcs
 

From 5cc97d47cd005f721515aa6fb8216269c50ec10c Mon Sep 17 00:00:00 2001
From: Andy Grove <agrove@apache.org>
Date: Wed, 27 May 2026 08:46:44 -0600
Subject: [PATCH 2/4] test: capture datetime audit correctness findings as
 ignored SQL tests

Captured tests for the three correctness divergences found during the
datetime audit. Each test is in query ignore(<issue-url>) mode and
will activate when the corresponding upstream fix lands.

- next_day.sql gains a divergence test for whitespace trimming
  (Comet trims ' MO '; Spark does not). ignore(#4450).
- next_day_ansi.sql is new and asserts that next_day throws under
  spark.sql.ansi.enabled=true for malformed dayOfWeek. Comet
  currently returns NULL. ignore(#4449).
- make_date_ansi.sql is new and asserts that make_date throws under
  spark.sql.ansi.enabled=true for invalid (year, month, day). Comet
  currently returns NULL. ignore(#4451).

A fourth audit finding (make_date year 0 / negative years) was
verified against Spark's own implementation and turned out to be a
non-divergence; the issue was closed and no test added.

None of the three remaining bugs are trivial to fix here: both
SparkNextDay and SparkMakeDate live upstream in the datafusion-spark
crate, so the fixes need to flow through that project. The captured
tests will switch from ignore(...) to their intended assertion mode
when the upstream changes land.
---
 .../expressions/datetime/make_date_ansi.sql   | 36 +++++++++++++++++++
 .../expressions/datetime/next_day.sql         |  5 +++
 .../expressions/datetime/next_day_ansi.sql    | 30 ++++++++++++++++
 3 files changed, 71 insertions(+)
 create mode 100644 spark/src/test/resources/sql-tests/expressions/datetime/make_date_ansi.sql
 create mode 100644 spark/src/test/resources/sql-tests/expressions/datetime/next_day_ansi.sql

diff --git a/spark/src/test/resources/sql-tests/expressions/datetime/make_date_ansi.sql b/spark/src/test/resources/sql-tests/expressions/datetime/make_date_ansi.sql
new file mode 100644
index 0000000000..04a28e3e1c
--- /dev/null
+++ b/spark/src/test/resources/sql-tests/expressions/datetime/make_date_ansi.sql
@@ -0,0 +1,36 @@
+-- Licensed to the Apache Software Foundation (ASF) under one
+-- or more contributor license agreements.  See the NOTICE file
+-- distributed with this work for additional information
+-- regarding copyright ownership.  The ASF licenses this file
+-- to you under the Apache License, Version 2.0 (the
+-- "License"); you may not use this file except in compliance
+-- with the License.  You may obtain a copy of the License at
+--
+--   http://www.apache.org/licenses/LICENSE-2.0
+--
+-- Unless required by applicable law or agreed to in writing,
+-- software distributed under the License is distributed on an
+-- "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+-- KIND, either express or implied.  See the License for the
+-- specific language governing permissions and limitations
+-- under the License.
+
+-- ANSI mode: Spark's MakeDate wraps the java.time.DateTimeException from LocalDate.of in
+-- ansiDateTimeArgumentOutOfRange (4.0) / ansiDateTimeError (3.4/3.5) when
+-- spark.sql.ansi.enabled=true. Comet's native SparkMakeDate always returns NULL on
+-- invalid input and never raises, so it does not throw under ANSI. The ignored queries
+-- below capture the divergence; remove ignore(...) when
+-- https://github.com/apache/datafusion-comet/issues/4451 is fixed.
+-- Config: spark.sql.ansi.enabled=true
+
+-- February 30 is not a valid date.
+query ignore(https://github.com/apache/datafusion-comet/issues/4451)
+SELECT make_date(2024, 2, 30)
+
+-- Month 13 is out of range.
+query ignore(https://github.com/apache/datafusion-comet/issues/4451)
+SELECT make_date(2024, 13, 1)
+
+-- Day 0 is out of range.
+query ignore(https://github.com/apache/datafusion-comet/issues/4451)
+SELECT make_date(2024, 6, 0)
diff --git a/spark/src/test/resources/sql-tests/expressions/datetime/next_day.sql b/spark/src/test/resources/sql-tests/expressions/datetime/next_day.sql
index 057c9daea6..65cccb9356 100644
--- a/spark/src/test/resources/sql-tests/expressions/datetime/next_day.sql
+++ b/spark/src/test/resources/sql-tests/expressions/datetime/next_day.sql
@@ -72,3 +72,8 @@ SELECT next_day(date('2023-01-01'), 'Monday'), next_day(date('2023-01-01'), 'Sun
 -- null handling
 query
 SELECT next_day(NULL, 'Monday'), next_day(date('2023-01-01'), NULL)
+
+-- Comet's native impl trims whitespace before matching the day name; Spark does not, so
+-- ' MO ' is invalid in Spark (NULL) but matches Monday in Comet.
+query ignore(https://github.com/apache/datafusion-comet/issues/4450)
+SELECT next_day(date('2024-01-01'), ' MO '), next_day(date('2024-01-01'), 'MO ')
diff --git a/spark/src/test/resources/sql-tests/expressions/datetime/next_day_ansi.sql b/spark/src/test/resources/sql-tests/expressions/datetime/next_day_ansi.sql
new file mode 100644
index 0000000000..9f8f8e435f
--- /dev/null
+++ b/spark/src/test/resources/sql-tests/expressions/datetime/next_day_ansi.sql
@@ -0,0 +1,30 @@
+-- Licensed to the Apache Software Foundation (ASF) under one
+-- or more contributor license agreements.  See the NOTICE file
+-- distributed with this work for additional information
+-- regarding copyright ownership.  The ASF licenses this file
+-- to you under the Apache License, Version 2.0 (the
+-- "License"); you may not use this file except in compliance
+-- with the License.  You may obtain a copy of the License at
+--
+--   http://www.apache.org/licenses/LICENSE-2.0
+--
+-- Unless required by applicable law or agreed to in writing,
+-- software distributed under the License is distributed on an
+-- "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+-- KIND, either express or implied.  See the License for the
+-- specific language governing permissions and limitations
+-- under the License.
+
+-- ANSI mode: Spark's NextDay throws SparkIllegalArgumentException on a malformed
+-- dayOfWeek. Comet's native impl (datafusion-spark::SparkNextDay) always returns NULL,
+-- so it does not throw under ANSI. The expect_error pattern below will be the assertion
+-- once https://github.com/apache/datafusion-comet/issues/4449 is fixed; until then the
+-- queries are ignored so the suite stays green.
+-- Config: spark.sql.ansi.enabled=true
+
+-- Comet returns NULL where Spark throws.
+query ignore(https://github.com/apache/datafusion-comet/issues/4449)
+SELECT next_day(date('2024-01-01'), 'NOT_A_DAY')
+
+query ignore(https://github.com/apache/datafusion-comet/issues/4449)
+SELECT next_day(date('2024-01-01'), '')

From 8fb79052d561178d84c8466d43bde7ac77449e3e Mon Sep 17 00:00:00 2001
From: Andy Grove <agrove@apache.org>
Date: Wed, 27 May 2026 08:47:01 -0600
Subject: [PATCH 3/4] fix: apply support-level consistency fixes surfaced by
 datetime audit

Mechanical fixes for the support-level / reason alignment issues found
during the datetime audit. No behavioural changes; the only observable
effect is that EXPLAIN-time fallback messages now include the specific
reason instead of a generic "not fully compatible with Spark".

- TimeFieldSerde companion (new) hoists the shared TimestampNTZ reason
  string used by CometHour, CometMinute, and CometSecond, mirroring
  the existing UTCTimestampSerde pattern. The three serdes now share
  one reason and one support-level helper.
- CometTruncDate extracts the duplicated reason strings into private
  vals and corrects the wording drift (the inline reason said "Invalid"
  while the docs reason said "Non-literal"; they now match).
- CometTruncTimestamp adds the missing non-literal-format reason to
  getIncompatibleReasons, adds the missing getUnsupportedReasons
  override for unsupported format literals, and extracts both reasons
  into private vals.
- CometSecondsToTimestamp adds the missing getUnsupportedReasons
  override so the Compatibility Guide reflects which input types are
  supported.
- CometHours and CometDays add getSupportLevel and getUnsupportedReasons
  overrides so the unsupported-input-type fallback surfaces in EXPLAIN
  output and the Compatibility Guide; the dispatcher now handles the
  fall-back uniformly and the withInfo call in convert is no longer
  needed for those branches.
- CometFromUnixTime moves the format-pattern check out of convert into
  getSupportLevel (returning Unsupported for non-default patterns and
  Incompatible for the DataFusion timestamp-range issue on default
  patterns). Reasons are shared via private vals; getUnsupportedReasons
  and getIncompatibleReasons both populated. As a side effect the
  fallback message for non-default formats now includes the specific
  reason ("Only the default datetime format pattern...") rather than
  the generic "not fully compatible with Spark"; updated the existing
  from_unix_time.sql expect_fallback assertions accordingly.

The CometDateFormat and CometUnixTimestamp findings need deeper
semantics analysis and are left for follow-up.
---
 .../org/apache/comet/serde/datetime.scala     | 163 ++++++++++--------
 .../org/apache/comet/serde/unixtime.scala     |  29 +++-
 .../expressions/datetime/from_unix_time.sql   |   4 +-
 3 files changed, 110 insertions(+), 86 deletions(-)

diff --git a/spark/src/main/scala/org/apache/comet/serde/datetime.scala b/spark/src/main/scala/org/apache/comet/serde/datetime.scala
index e2995274ad..01018ff712 100644
--- a/spark/src/main/scala/org/apache/comet/serde/datetime.scala
+++ b/spark/src/main/scala/org/apache/comet/serde/datetime.scala
@@ -23,7 +23,7 @@ import java.util.Locale
 
 import org.apache.spark.sql.catalyst.expressions.{AddMonths, Attribute, ConvertTimezone, DateAdd, DateDiff, DateFormatClass, DateFromUnixDate, DateSub, DayOfMonth, DayOfWeek, DayOfYear, Days, FromUTCTimestamp, GetDateField, Hour, Hours, LastDay, Literal, MakeDate, MakeTimestamp, MicrosToTimestamp, MillisToTimestamp, Minute, Month, MonthsBetween, NextDay, Quarter, Second, SecondsToTimestamp, ToUnixTimestamp, ToUTCTimestamp, TruncDate, TruncTimestamp, UnixDate, UnixMicros, UnixMillis, UnixSeconds, UnixTimestamp, WeekDay, WeekOfYear, Year}
 import org.apache.spark.sql.internal.SQLConf
-import org.apache.spark.sql.types.{DateType, DoubleType, FloatType, IntegerType, LongType, StringType, TimestampNTZType, TimestampType}
+import org.apache.spark.sql.types.{DataType, DateType, DoubleType, FloatType, IntegerType, LongType, StringType, TimestampNTZType, TimestampType}
 import org.apache.spark.unsafe.types.UTF8String
 
 import org.apache.comet.CometConf
@@ -179,23 +179,24 @@ object CometQuarter extends CometExpressionSerde[Quarter] with CometExprGetDateF
   }
 }
 
-object CometHour extends CometExpressionSerde[Hour] {
+private object TimeFieldSerde {
+  val timestampNtzIncompatReason: String =
+    "Incorrectly applies timezone conversion to TimestampNTZ inputs" +
+      " (https://github.com/apache/datafusion-comet/issues/3180)"
 
-  val incompatReason: String = "Incorrectly applies timezone conversion to TimestampNTZ inputs" +
-    " (https://github.com/apache/datafusion-comet/issues/3180)"
+  def supportLevelForChild(childType: DataType): SupportLevel = childType match {
+    case TimestampNTZType => Incompatible(Some(timestampNtzIncompatReason))
+    case _ => Compatible()
+  }
+}
 
-  override def getIncompatibleReasons(): Seq[String] = Seq(incompatReason)
+object CometHour extends CometExpressionSerde[Hour] {
 
-  override def getSupportLevel(expr: Hour): SupportLevel = {
-    if (expr.child.dataType == TimestampNTZType) {
-      Incompatible(
-        Some(
-          "Incorrectly applies timezone conversion to TimestampNTZ inputs" +
-            " (https://github.com/apache/datafusion-comet/issues/3180)"))
-    } else {
-      Compatible()
-    }
-  }
+  override def getIncompatibleReasons(): Seq[String] =
+    Seq(TimeFieldSerde.timestampNtzIncompatReason)
+
+  override def getSupportLevel(expr: Hour): SupportLevel =
+    TimeFieldSerde.supportLevelForChild(expr.child.dataType)
 
   override def convert(
       expr: Hour,
@@ -224,20 +225,11 @@ object CometHour extends CometExpressionSerde[Hour] {
 
 object CometMinute extends CometExpressionSerde[Minute] {
 
-  override def getIncompatibleReasons(): Seq[String] = Seq(
-    "Incorrectly applies timezone conversion to TimestampNTZ inputs" +
-      " (https://github.com/apache/datafusion-comet/issues/3180)")
-
-  override def getSupportLevel(expr: Minute): SupportLevel = {
-    if (expr.child.dataType == TimestampNTZType) {
-      Incompatible(
-        Some(
-          "Incorrectly applies timezone conversion to TimestampNTZ inputs" +
-            " (https://github.com/apache/datafusion-comet/issues/3180)"))
-    } else {
-      Compatible()
-    }
-  }
+  override def getIncompatibleReasons(): Seq[String] =
+    Seq(TimeFieldSerde.timestampNtzIncompatReason)
+
+  override def getSupportLevel(expr: Minute): SupportLevel =
+    TimeFieldSerde.supportLevelForChild(expr.child.dataType)
 
   override def convert(
       expr: Minute,
@@ -266,20 +258,11 @@ object CometMinute extends CometExpressionSerde[Minute] {
 
 object CometSecond extends CometExpressionSerde[Second] {
 
-  override def getIncompatibleReasons(): Seq[String] = Seq(
-    "Incorrectly applies timezone conversion to TimestampNTZ inputs" +
-      " (https://github.com/apache/datafusion-comet/issues/3180)")
-
-  override def getSupportLevel(expr: Second): SupportLevel = {
-    if (expr.child.dataType == TimestampNTZType) {
-      Incompatible(
-        Some(
-          "Incorrectly applies timezone conversion to TimestampNTZ inputs" +
-            " (https://github.com/apache/datafusion-comet/issues/3180)"))
-    } else {
-      Compatible()
-    }
-  }
+  override def getIncompatibleReasons(): Seq[String] =
+    Seq(TimeFieldSerde.timestampNtzIncompatReason)
+
+  override def getSupportLevel(expr: Second): SupportLevel =
+    TimeFieldSerde.supportLevelForChild(expr.child.dataType)
 
   override def convert(
       expr: Second,
@@ -437,6 +420,11 @@ object CometMakeDate extends CometScalarFunction[MakeDate]("make_date")
 
 object CometSecondsToTimestamp
     extends CometScalarFunction[SecondsToTimestamp]("seconds_to_timestamp") {
+
+  override def getUnsupportedReasons(): Seq[String] = Seq(
+    "Only `IntegerType`, `LongType`, `FloatType`, and `DoubleType` inputs are supported." +
+      " `DecimalType`, `ByteType`, and `ShortType` fall back to Spark.")
+
   override def getSupportLevel(expr: SecondsToTimestamp): SupportLevel =
     expr.child.dataType match {
       case IntegerType | LongType | FloatType | DoubleType => Compatible()
@@ -482,8 +470,14 @@ object CometTruncDate extends CometExpressionSerde[TruncDate] {
   val supportedFormats: Seq[String] =
     Seq("year", "yyyy", "yy", "quarter", "mon", "month", "mm", "week")
 
-  override def getIncompatibleReasons(): Seq[String] = Seq(
-    "Non-literal format strings will throw an exception instead of returning NULL")
+  private val nonLiteralFormatIncompatReason: String =
+    "Non-literal format strings will throw an exception instead of returning NULL"
+
+  private def unsupportedFormatReason(fmt: Any): String =
+    s"Format $fmt is not supported. Only the following formats are supported: " +
+      supportedFormats.mkString(", ")
+
+  override def getIncompatibleReasons(): Seq[String] = Seq(nonLiteralFormatIncompatReason)
 
   override def getUnsupportedReasons(): Seq[String] = Seq(
     "Only the following formats are supported: " + supportedFormats.mkString(", "))
@@ -494,11 +488,10 @@ object CometTruncDate extends CometExpressionSerde[TruncDate] {
         if (supportedFormats.contains(fmt.toString.toLowerCase(Locale.ROOT))) {
           Compatible()
         } else {
-          Unsupported(Some(s"Format $fmt is not supported"))
+          Unsupported(Some(unsupportedFormatReason(fmt)))
         }
       case _ =>
-        Incompatible(
-          Some("Invalid format strings will throw an exception instead of returning NULL"))
+        Incompatible(Some(nonLiteralFormatIncompatReason))
     }
   }
 
@@ -521,10 +514,6 @@ object CometTruncDate extends CometExpressionSerde[TruncDate] {
 
 object CometTruncTimestamp extends CometExpressionSerde[TruncTimestamp] {
 
-  override def getIncompatibleReasons(): Seq[String] = Seq(
-    "Produces incorrect results when used with non-UTC timezones. Compatible when timezone is" +
-      " UTC. (https://github.com/apache/datafusion-comet/issues/2649)")
-
   val supportedFormats: Seq[String] =
     Seq(
       "year",
@@ -543,6 +532,23 @@ object CometTruncTimestamp extends CometExpressionSerde[TruncTimestamp] {
       "millisecond",
       "microsecond")
 
+  private val nonUtcIncompatReason: String =
+    "Produces incorrect results when used with non-UTC timezones. Compatible when timezone is" +
+      " UTC. (https://github.com/apache/datafusion-comet/issues/2649)"
+
+  private val nonLiteralFormatIncompatReason: String =
+    "Non-literal format strings will throw an exception instead of returning NULL"
+
+  private def unsupportedFormatReason(fmt: Any): String =
+    s"Format $fmt is not supported. Only the following formats are supported: " +
+      supportedFormats.mkString(", ")
+
+  override def getIncompatibleReasons(): Seq[String] =
+    Seq(nonUtcIncompatReason, nonLiteralFormatIncompatReason)
+
+  override def getUnsupportedReasons(): Seq[String] = Seq(
+    "Only the following formats are supported: " + supportedFormats.mkString(", "))
+
   override def getSupportLevel(expr: TruncTimestamp): SupportLevel = {
     val timezone = expr.timeZoneId.getOrElse("UTC")
     val isUtc = timezone == "UTC" || timezone == "Etc/UTC"
@@ -552,17 +558,13 @@ object CometTruncTimestamp extends CometExpressionSerde[TruncTimestamp] {
           if (isUtc) {
             Compatible()
           } else {
-            Incompatible(
-              Some(
-                s"Incorrect results in non-UTC timezone '$timezone'" +
-                  " (https://github.com/apache/datafusion-comet/issues/2649)"))
+            Incompatible(Some(nonUtcIncompatReason))
           }
         } else {
-          Unsupported(Some(s"Format $fmt is not supported"))
+          Unsupported(Some(unsupportedFormatReason(fmt)))
         }
       case _ =>
-        Incompatible(
-          Some("Invalid format strings will throw an exception instead of returning NULL"))
+        Incompatible(Some(nonLiteralFormatIncompatReason))
     }
   }
 
@@ -700,24 +702,27 @@ object CometDateFormat extends CometExpressionSerde[DateFormatClass] {
  * without applying any session timezone offset.
  */
 object CometHours extends CometExpressionSerde[Hours] {
+
+  override def getUnsupportedReasons(): Seq[String] = Seq(
+    "Only `TimestampType` and `TimestampNTZType` inputs are supported.")
+
+  override def getSupportLevel(expr: Hours): SupportLevel = expr.child.dataType match {
+    case TimestampType | TimestampNTZType => Compatible()
+    case other => Unsupported(Some(s"Hours does not support input type: $other"))
+  }
+
   override def convert(
       expr: Hours,
       inputs: Seq[Attribute],
       binding: Boolean): Option[ExprOuterClass.Expr] = {
-    val optExpr = expr.child.dataType match {
-      case TimestampType | TimestampNTZType =>
-        exprToProtoInternal(expr.child, inputs, binding).map { childExpr =>
-          val builder = ExprOuterClass.HoursTransform.newBuilder()
-          builder.setChild(childExpr)
+    val optExpr = exprToProtoInternal(expr.child, inputs, binding).map { childExpr =>
+      val builder = ExprOuterClass.HoursTransform.newBuilder()
+      builder.setChild(childExpr)
 
-          ExprOuterClass.Expr
-            .newBuilder()
-            .setHoursTransform(builder)
-            .build()
-        }
-      case other =>
-        withInfo(expr, s"Hours does not support input type: $other")
-        None
+      ExprOuterClass.Expr
+        .newBuilder()
+        .setHoursTransform(builder)
+        .build()
     }
     optExprWithInfo(optExpr, expr, expr.child)
   }
@@ -734,6 +739,16 @@ object CometHours extends CometExpressionSerde[Hours] {
  * The first cast respects the session timezone to correctly determine the date boundary.
  */
 object CometDays extends CometExpressionSerde[Days] {
+
+  override def getUnsupportedReasons(): Seq[String] = Seq(
+    "Only `DateType` and `TimestampType` inputs are supported." +
+      " `TimestampNTZType` is not supported.")
+
+  override def getSupportLevel(expr: Days): SupportLevel = expr.child.dataType match {
+    case DateType | TimestampType => Compatible()
+    case other => Unsupported(Some(s"Days does not support input type: $other"))
+  }
+
   override def convert(
       expr: Days,
       inputs: Seq[Attribute],
@@ -748,9 +763,7 @@ object CometDays extends CometExpressionSerde[Days] {
         childExpr.flatMap { child =>
           CometCast.castToProto(expr, Some(timezone), DateType, child, CometEvalMode.LEGACY)
         }
-      case other =>
-        withInfo(expr, s"Days does not support input type: $other")
-        None
+      case _ => None
     }
 
     // Convert DateType to IntegerType (days since epoch)
diff --git a/spark/src/main/scala/org/apache/comet/serde/unixtime.scala b/spark/src/main/scala/org/apache/comet/serde/unixtime.scala
index e5eeb5b848..fe7bacbf49 100644
--- a/spark/src/main/scala/org/apache/comet/serde/unixtime.scala
+++ b/spark/src/main/scala/org/apache/comet/serde/unixtime.scala
@@ -29,12 +29,26 @@ import org.apache.comet.serde.QueryPlanSerde.{exprToProtoInternal, optExprWithIn
 // https://github.com/apache/datafusion/issues/16594
 object CometFromUnixTime extends CometExpressionSerde[FromUnixTime] {
 
-  override def getIncompatibleReasons(): Seq[String] = Seq(
-    "Only supports the default datetime format pattern `yyyy-MM-dd HH:mm:ss`." +
-      " DataFusion's valid timestamp range differs from Spark" +
-      " (https://github.com/apache/datafusion/issues/16594)")
+  private val incompatReason: String =
+    "DataFusion's valid timestamp range differs from Spark" +
+      " (https://github.com/apache/datafusion/issues/16594)"
 
-  override def getSupportLevel(expr: FromUnixTime): SupportLevel = Incompatible(None)
+  private val unsupportedFormatReason: String =
+    "Only the default datetime format pattern `yyyy-MM-dd HH:mm:ss` is supported;" +
+      " other patterns fall back to Spark" +
+      " (https://github.com/apache/datafusion/issues/16577)"
+
+  override def getIncompatibleReasons(): Seq[String] = Seq(incompatReason)
+
+  override def getUnsupportedReasons(): Seq[String] = Seq(unsupportedFormatReason)
+
+  override def getSupportLevel(expr: FromUnixTime): SupportLevel = {
+    if (expr.format != Literal(TimestampFormatter.defaultPattern)) {
+      Unsupported(Some(unsupportedFormatReason))
+    } else {
+      Incompatible(Some(incompatReason))
+    }
+  }
 
   override def convert(
       expr: FromUnixTime,
@@ -48,10 +62,7 @@ object CometFromUnixTime extends CometExpressionSerde[FromUnixTime] {
     val formatExpr = exprToProtoInternal(Literal("%Y-%m-%d %H:%M:%S"), inputs, binding)
     val timeZone = exprToProtoInternal(Literal(expr.timeZoneId.orNull), inputs, binding)
 
-    if (expr.format != Literal(TimestampFormatter.defaultPattern)) {
-      withInfo(expr, "Datetime pattern format is unsupported")
-      None
-    } else if (secExpr.isDefined && formatExpr.isDefined) {
+    if (secExpr.isDefined && formatExpr.isDefined) {
       val timestampExpr =
         scalarFunctionExprToProto("from_unixtime", Seq(secExpr, timeZone): _*)
       val optExpr = scalarFunctionExprToProto("to_char", Seq(timestampExpr, formatExpr): _*)
diff --git a/spark/src/test/resources/sql-tests/expressions/datetime/from_unix_time.sql b/spark/src/test/resources/sql-tests/expressions/datetime/from_unix_time.sql
index a7b0960570..0a2206f0c0 100644
--- a/spark/src/test/resources/sql-tests/expressions/datetime/from_unix_time.sql
+++ b/spark/src/test/resources/sql-tests/expressions/datetime/from_unix_time.sql
@@ -24,12 +24,12 @@ INSERT INTO test_from_unix_time VALUES (0), (1718451045), (-1), (NULL), (2147483
 query expect_fallback(not fully compatible with Spark)
 SELECT from_unixtime(t) FROM test_from_unix_time
 
-query expect_fallback(not fully compatible with Spark)
+query expect_fallback(Only the default datetime format pattern)
 SELECT from_unixtime(t, 'yyyy-MM-dd') FROM test_from_unix_time
 
 -- literal arguments
 query expect_fallback(not fully compatible with Spark)
 SELECT from_unixtime(0)
 
-query expect_fallback(not fully compatible with Spark)
+query expect_fallback(Only the default datetime format pattern)
 SELECT from_unixtime(1718451045, 'yyyy-MM-dd')

From 55b997c18b9eea75b32e4e5fff55bdd7fc0baee2 Mon Sep 17 00:00:00 2001
From: Andy Grove <agrove@apache.org>
Date: Wed, 27 May 2026 09:46:45 -0600
Subject: [PATCH 4/4] test: align CometTemporalExpressionSuite assertions with
 new TruncDate/TruncTimestamp wording

The audit found that the TruncDate / TruncTimestamp non-literal-format
reason was using two different wordings ("Invalid" in the inline
support-level branch, "Non-literal" in getIncompatibleReasons). The
preceding commit picked "Non-literal" as the canonical wording.

CometTemporalExpressionSuite was asserting against the old "Invalid"
wording in three tests; update those assertions to match.
---
 .../org/apache/comet/CometTemporalExpressionSuite.scala     | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

diff --git a/spark/src/test/scala/org/apache/comet/CometTemporalExpressionSuite.scala b/spark/src/test/scala/org/apache/comet/CometTemporalExpressionSuite.scala
index 20ad90a91c..0700713b4a 100644
--- a/spark/src/test/scala/org/apache/comet/CometTemporalExpressionSuite.scala
+++ b/spark/src/test/scala/org/apache/comet/CometTemporalExpressionSuite.scala
@@ -65,7 +65,7 @@ class CometTemporalExpressionSuite extends CometTestBase with AdaptiveSparkPlanH
     // Comet should fall back to Spark if format is not a literal
     checkSparkAnswerAndFallbackReason(
       "SELECT c0, trunc(c0, c1) from tbl order by c0, c1",
-      "Invalid format strings will throw an exception instead of returning NULL")
+      "Non-literal format strings will throw an exception instead of returning NULL")
   }
 
   test("date_trunc (TruncTimestamp) - reading from DataFrame") {
@@ -89,7 +89,7 @@ class CometTemporalExpressionSuite extends CometTestBase with AdaptiveSparkPlanH
       // Comet should fall back to Spark if format is not a literal
       checkSparkAnswerAndFallbackReason(
         "SELECT c0, date_trunc(fmt, c0) from tbl order by c0, fmt",
-        "Invalid format strings will throw an exception instead of returning NULL")
+        "Non-literal format strings will throw an exception instead of returning NULL")
     }
   }
 
@@ -117,7 +117,7 @@ class CometTemporalExpressionSuite extends CometTestBase with AdaptiveSparkPlanH
         // Comet should fall back to Spark if format is not a literal
         checkSparkAnswerAndFallbackReason(
           "SELECT c0, date_trunc(fmt, c0) from tbl order by c0, fmt",
-          "Invalid format strings will throw an exception instead of returning NULL")
+          "Non-literal format strings will throw an exception instead of returning NULL")
       }
     }
   }