Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
Show all changes
36 commits
Select commit Hold shift + click to select a range
768b3e9
impl map_from_entries
Dec 14, 2025
c68c342
Revert "impl map_from_entries"
Dec 16, 2025
d887555
Merge branch 'apache:main' into main
kazantsev-maksim Dec 16, 2025
231aa90
Merge branch 'apache:main' into main
kazantsev-maksim Dec 17, 2025
9500bbb
Merge branch 'apache:main' into main
kazantsev-maksim Dec 24, 2025
9577481
Merge branch 'apache:main' into main
kazantsev-maksim Dec 28, 2025
3791557
Merge branch 'apache:main' into main
kazantsev-maksim Jan 2, 2026
7c2f082
Merge branch 'apache:main' into main
kazantsev-maksim Jan 3, 2026
609a605
Merge branch 'apache:main' into main
kazantsev-maksim Jan 6, 2026
a151b2c
Merge branch 'apache:main' into main
kazantsev-maksim Jan 7, 2026
ad3e7f5
Merge branch 'apache:main' into main
kazantsev-maksim Jan 10, 2026
ea92e4b
Merge branch 'apache:main' into main
kazantsev-maksim Jan 14, 2026
8dfeca3
Merge branch 'apache:main' into main
kazantsev-maksim Jan 17, 2026
559741e
Merge branch 'apache:main' into main
kazantsev-maksim Jan 20, 2026
ebda14e
Merge branch 'apache:main' into main
kazantsev-maksim Jan 21, 2026
408152e
Merge branch 'apache:main' into main
kazantsev-maksim Jan 23, 2026
d7857b2
Merge branch 'apache:main' into main
kazantsev-maksim Jan 24, 2026
aef41be
Merge branch 'apache:main' into main
kazantsev-maksim Jan 29, 2026
5ac1c58
Merge branch 'apache:main' into main
kazantsev-maksim Jan 30, 2026
9ae8e23
Merge branch 'apache:main' into main
kazantsev-maksim Feb 1, 2026
5ca3888
Merge branch 'apache:main' into main
kazantsev-maksim Feb 4, 2026
160a817
Merge branch 'apache:main' into main
kazantsev-maksim Feb 5, 2026
88fc313
Merge branch 'apache:main' into main
kazantsev-maksim Feb 7, 2026
e14c180
Merge branch 'apache:main' into main
kazantsev-maksim Feb 13, 2026
610a885
Merge branch 'apache:main' into main
kazantsev-maksim Feb 20, 2026
f8acb2c
Merge branch 'apache:main' into main
kazantsev-maksim Feb 21, 2026
ec94897
Merge branch 'apache:main' into main
kazantsev-maksim Feb 26, 2026
43405e4
Merge branch 'apache:main' into main
kazantsev-maksim Feb 27, 2026
47b4915
Merge branch 'apache:main' into main
kazantsev-maksim Mar 1, 2026
26e2682
Merge branch 'apache:main' into main
kazantsev-maksim Mar 3, 2026
6cb5f07
Merge branch 'apache:main' into main
kazantsev-maksim Mar 4, 2026
ec194fb
Merge branch 'apache:main' into main
kazantsev-maksim Mar 31, 2026
e0a02bf
Feat: support Infinity/-Infinity Nan values for numeric types
Apr 1, 2026
c322014
Fix PR issues
Apr 2, 2026
2dce727
Fix tests
Apr 3, 2026
c7bf49f
Fix tests
Apr 3, 2026
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
10 changes: 9 additions & 1 deletion native/spark-expr/src/json_funcs/to_json.rs
Original file line number Diff line number Diff line change
Expand Up @@ -235,7 +235,7 @@ fn struct_to_json(array: &StructArray, timezone: &str) -> Result<ArrayRef> {
json.push_str("\":");
// value
let string_value = string_arrays[col_index].value(row_index);
if is_string[col_index] {
if is_string[col_index] || is_infinity(string_value) || is_nan(string_value) {
json.push('"');
json.push_str(&escape_string(string_value));
json.push('"');
Expand All @@ -252,6 +252,14 @@ fn struct_to_json(array: &StructArray, timezone: &str) -> Result<ArrayRef> {
Ok(Arc::new(builder.finish()))
}

fn is_infinity(input: &str) -> bool {
input == "Infinity" || input == "-Infinity"
}

fn is_nan(input: &str) -> bool {
input == "NaN"
}

#[cfg(test)]
mod test {
use crate::json_funcs::to_json::struct_to_json;
Expand Down
66 changes: 25 additions & 41 deletions spark/src/main/scala/org/apache/comet/serde/structs.scala
Original file line number Diff line number Diff line change
Expand Up @@ -105,53 +105,37 @@ object CometGetArrayStructFields extends CometExpressionSerde[GetArrayStructFiel

object CometStructsToJson extends CometExpressionSerde[StructsToJson] {

override def getSupportLevel(expr: StructsToJson): SupportLevel =
Incompatible(
Some(
"Does not support Infinity/-Infinity for numeric types" +
" (https://github.com/apache/datafusion-comet/issues/3016)"))
override def getSupportLevel(expr: StructsToJson): SupportLevel = {
if (expr.options.nonEmpty) {
return Unsupported(Some("StructsToJson with options is not supported"))
}
val dataType = expr.child.dataType
if (!isSupportedType(dataType)) {
return Unsupported(Some(s"Struct type: $dataType contains unsupported types"))
}
Compatible()
}

override def convert(
expr: StructsToJson,
inputs: Seq[Attribute],
binding: Boolean): Option[ExprOuterClass.Expr] = {
if (expr.options.nonEmpty) {
withInfo(expr, "StructsToJson with options is not supported")
None
} else {
val isSupported = expr.child.dataType match {
case s: StructType =>
s.fields.forall(f => isSupportedType(f.dataType))
case _: MapType | _: ArrayType =>
// Spark supports map and array in StructsToJson but this is not yet
// implemented in Comet
false
case _ =>
false
}

if (isSupported) {
exprToProtoInternal(expr.child, inputs, binding) match {
case Some(p) =>
val toJson = ExprOuterClass.ToJson
.newBuilder()
.setChild(p)
.setTimezone(expr.timeZoneId.getOrElse("UTC"))
.setIgnoreNullFields(true)
.build()
Some(
ExprOuterClass.Expr
.newBuilder()
.setToJson(toJson)
.build())
case _ =>
withInfo(expr, expr.child)
None
}
} else {
withInfo(expr, "Unsupported data type", expr.child)
exprToProtoInternal(expr.child, inputs, binding) match {
case Some(p) =>
val toJson = ExprOuterClass.ToJson
.newBuilder()
.setChild(p)
.setTimezone(expr.timeZoneId.getOrElse("UTC"))
.setIgnoreNullFields(true)
.build()
Some(
ExprOuterClass.Expr
.newBuilder()
.setToJson(toJson)
.build())
case _ =>
withInfo(expr, expr.child)
None
}
}
}

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -29,3 +29,11 @@ SELECT to_json(named_struct('a', a, 'b', b)) FROM test_to_json
-- literal arguments
query spark_answer_only
SELECT to_json(named_struct('a', 1, 'b', 'hello'))

-- query expect_fallback(StructsToJson with options is not supported)
query ignore("Need support Spark 4.0.0")
SELECT to_json(named_struct('a', a, 'b', b), map('timestampFormat', 'dd/MM/yyyy'))

-- query expect_fallback(Struct type: StructType(StructField(a,ArrayType(IntegerType,false),false)) contains unsupported types)
query ignore("Need support Spark 4.0.0")
SELECT to_json(named_struct(a, array(b)))
Original file line number Diff line number Diff line change
Expand Up @@ -60,7 +60,7 @@ class CometJsonExpressionSuite extends CometTestBase with AdaptiveSparkPlanHelpe
filename,
100,
SchemaGenOptions(generateArray = false, generateStruct = false, generateMap = false),
DataGenOptions(generateNaN = false, generateInfinity = false))
DataGenOptions(generateNaN = true, generateInfinity = true))
}
val table = spark.read.parquet(filename)
val fieldsNames = table.schema.fields
Expand All @@ -72,6 +72,20 @@ class CometJsonExpressionSuite extends CometTestBase with AdaptiveSparkPlanHelpe
}
}

test("to_json - fallback reasons") {
assume(!isSpark40Plus)
withTable("t") {
sql("CREATE TABLE t(a INT, b STRING) USING parquet")
sql("INSERT INTO t VALUES (1, 'hello')")
checkSparkAnswerAndFallbackReason(
"SELECT to_json(named_struct('a', a, 'b', b), map('timestampFormat', 'dd/MM/yyyy')) FROM t",
"StructsToJson with options is not supported")
checkSparkAnswerAndFallbackReason(
"SELECT to_json(named_struct('b', array(b))) FROM t",
"Struct type: StructType(StructField(b,ArrayType(StringType,true),false)) contains unsupported types")
}
}

test("from_json - basic primitives") {
Seq(true, false).foreach { dictionaryEnabled =>
withParquetTable(
Expand Down
Loading