Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
9 changes: 9 additions & 0 deletions docs/source/user-guide/latest/compatibility.md
Original file line number Diff line number Diff line change
Expand Up @@ -147,6 +147,15 @@ Cast operations in Comet fall into three levels of support:
Spark.
- **N/A**: Spark does not support this cast.

### String to Timestamp

Comet's native `CAST(string AS TIMESTAMP)` implementation supports all timestamp formats accepted
by Apache Spark, including ISO 8601 date-time strings, date-only strings, time-only strings
(`HH:MM:SS`), embedded timezone offsets (e.g. `+07:30`, `GMT-01:00`, `UTC`), named timezone
suffixes (e.g. `Europe/Moscow`), and the full Spark timestamp year range
(-290308 to 294247). Note that `CAST(string AS DATE)` is only compatible for years between
262143 BC and 262142 AD due to an underlying library limitation.

### Legacy Mode

<!--BEGIN:CAST_LEGACY_TABLE-->
Expand Down
7 changes: 6 additions & 1 deletion native/core/src/execution/planner.rs
Original file line number Diff line number Diff line change
Expand Up @@ -406,7 +406,12 @@ impl PhysicalPlanner {
Ok(Arc::new(Cast::new(
child,
datatype,
SparkCastOptions::new(eval_mode, &expr.timezone, expr.allow_incompat),
{
let mut opts =
SparkCastOptions::new(eval_mode, &expr.timezone, expr.allow_incompat);
opts.is_spark4_plus = expr.is_spark4_plus;
opts
},
spark_expr.expr_id,
query_context,
)))
Expand Down
3 changes: 3 additions & 0 deletions native/proto/src/proto/expr.proto
Original file line number Diff line number Diff line change
Expand Up @@ -266,6 +266,9 @@ message Cast {
string timezone = 3;
EvalMode eval_mode = 4;
bool allow_incompat = 5;
// True when running against Spark 4.0+. Controls version-specific cast behaviour
// such as the handling of leading whitespace before T-prefixed time-only strings.
bool is_spark4_plus = 6;
}

message BinaryExpr {
Expand Down
15 changes: 12 additions & 3 deletions native/spark-expr/src/conversion_funcs/cast.rs
Original file line number Diff line number Diff line change
Expand Up @@ -131,6 +131,9 @@ pub struct SparkCastOptions {
pub timezone: String,
/// Allow casts that are supported but not guaranteed to be 100% compatible
pub allow_incompat: bool,
/// True when running against Spark 4.0+. Enables version-specific cast behaviour
/// such as the handling of leading whitespace before T-prefixed time-only strings.
pub is_spark4_plus: bool,
/// Support casting unsigned ints to signed ints (used by Parquet SchemaAdapter)
pub allow_cast_unsigned_ints: bool,
/// We also use the cast logic for adapting Parquet schemas, so this flag is used
Expand All @@ -148,6 +151,7 @@ impl SparkCastOptions {
eval_mode,
timezone: timezone.to_string(),
allow_incompat,
is_spark4_plus: false,
allow_cast_unsigned_ints: false,
is_adapting_schema: false,
null_string: "null".to_string(),
Expand All @@ -160,6 +164,7 @@ impl SparkCastOptions {
eval_mode,
timezone: "".to_string(),
allow_incompat,
is_spark4_plus: false,
allow_cast_unsigned_ints: false,
is_adapting_schema: false,
null_string: "null".to_string(),
Expand Down Expand Up @@ -296,9 +301,13 @@ pub(crate) fn cast_array(
let cast_result = match (&from_type, to_type) {
(Utf8, Boolean) => spark_cast_utf8_to_boolean::<i32>(&array, eval_mode),
(LargeUtf8, Boolean) => spark_cast_utf8_to_boolean::<i64>(&array, eval_mode),
(Utf8, Timestamp(_, _)) => {
cast_string_to_timestamp(&array, to_type, eval_mode, &cast_options.timezone)
}
(Utf8, Timestamp(_, _)) => cast_string_to_timestamp(
&array,
to_type,
eval_mode,
&cast_options.timezone,
cast_options.is_spark4_plus,
),
(Utf8, Date32) => cast_string_to_date(&array, to_type, eval_mode),
(Date32, Int32) => {
// Date32 is stored as days since epoch (i32), so this is a simple reinterpret cast
Expand Down
Loading
Loading