Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 0 additions & 2 deletions .claude/skills/review-comet-pr/SKILL.md
Original file line number Diff line number Diff line change
Expand Up @@ -149,8 +149,6 @@ Categories include: `aggregate/`, `array/`, `string/`, `math/`, `struct/`, `map/
**SQL file structure:**

```sql
-- ConfigMatrix: parquet.enable.dictionary=false,true

-- Create test data
statement
CREATE TABLE test_crc32(col string, a int, b float) USING parquet
Expand Down
2 changes: 0 additions & 2 deletions docs/source/contributor-guide/adding_a_new_expression.md
Original file line number Diff line number Diff line change
Expand Up @@ -217,8 +217,6 @@ It is important to verify that the new expression is correctly recognized by the
Create a `.sql` file under the appropriate subdirectory in `spark/src/test/resources/sql-tests/expressions/` (e.g., `string/`, `math/`, `array/`). The file should create a table with test data, then run queries that exercise the expression. Here is an example for the `unhex` expression:

```sql
-- ConfigMatrix: parquet.enable.dictionary=false,true

statement
CREATE TABLE test_unhex(col string) USING parquet

Expand Down
20 changes: 9 additions & 11 deletions docs/source/contributor-guide/sql-file-tests.md
Original file line number Diff line number Diff line change
Expand Up @@ -76,8 +76,6 @@ A test file consists of SQL comments, directives, statements, and queries separa
lines. Here is a minimal example:

```sql
-- ConfigMatrix: parquet.enable.dictionary=false,true

statement
CREATE TABLE test_abs(v double) USING parquet

Expand Down Expand Up @@ -106,16 +104,19 @@ Runs the entire file once per combination of values. Multiple `ConfigMatrix` lin
cross product of all combinations.

```sql
-- ConfigMatrix: parquet.enable.dictionary=false,true
-- ConfigMatrix: spark.sql.optimizer.inSetConversionThreshold=100,0
```

This generates two test cases:

```
sql-file: expressions/cast/cast.sql [parquet.enable.dictionary=false]
sql-file: expressions/cast/cast.sql [parquet.enable.dictionary=true]
sql-file: expressions/conditional/in_set.sql [spark.sql.optimizer.inSetConversionThreshold=100]
sql-file: expressions/conditional/in_set.sql [spark.sql.optimizer.inSetConversionThreshold=0]
```

Only add a `ConfigMatrix` directive when there is a real reason to run the test under
multiple configurations. Do not add `ConfigMatrix` directives speculatively.

#### `MinSparkVersion`

Skips the file when running on a Spark version older than the specified version.
Expand Down Expand Up @@ -223,12 +224,9 @@ SELECT array(1, 2, 3)[10]

2. Add the Apache license header as a SQL comment.

3. Add a `ConfigMatrix` directive if the test should run with multiple Parquet configurations.
Most expression tests use:

```sql
-- ConfigMatrix: parquet.enable.dictionary=false,true
```
3. Add a `ConfigMatrix` directive only if the test needs to run under multiple configurations
(e.g., testing behavior that varies with a specific Spark config). Do not add `ConfigMatrix`
directives speculatively.

4. Create tables and insert test data using `statement` blocks. Include edge cases such as
`NULL`, boundary values, and negative numbers.
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -83,4 +83,3 @@ SELECT AVG(d) FILTER (WHERE flag = true) FROM test_agg_filter

query spark_answer_only
SELECT grp, AVG(d) FILTER (WHERE flag = true) FROM test_agg_filter GROUP BY grp ORDER BY grp

Original file line number Diff line number Diff line change
Expand Up @@ -15,8 +15,6 @@
-- specific language governing permissions and limitations
-- under the License.

-- ConfigMatrix: parquet.enable.dictionary=false,true

statement
CREATE TABLE test_avg(i int, l long, f float, d double, grp string) USING parquet

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -15,8 +15,6 @@
-- specific language governing permissions and limitations
-- under the License.

-- ConfigMatrix: parquet.enable.dictionary=false,true

statement
CREATE TABLE test_bit_agg(i int, grp string) USING parquet

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -16,7 +16,6 @@
-- under the License.

-- Config: spark.comet.expression.Corr.allowIncompatible=true
-- ConfigMatrix: parquet.enable.dictionary=false,true

statement
CREATE TABLE test_corr(x double, y double, grp string) USING parquet
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -15,8 +15,6 @@
-- specific language governing permissions and limitations
-- under the License.

-- ConfigMatrix: parquet.enable.dictionary=false,true

statement
CREATE TABLE test_count(i int, s string, grp string) USING parquet

Expand Down
Original file line number Diff line number Diff line change
@@ -1,3 +1,4 @@

-- Licensed to the Apache Software Foundation (ASF) under one
-- or more contributor license agreements. See the NOTICE file
-- distributed with this work for additional information
Expand All @@ -15,8 +16,6 @@
-- specific language governing permissions and limitations
-- under the License.

-- ConfigMatrix: parquet.enable.dictionary=false,true

statement
CREATE TABLE test_covar(x double, y double, grp string) USING parquet

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -15,8 +15,6 @@
-- specific language governing permissions and limitations
-- under the License.

-- ConfigMatrix: parquet.enable.dictionary=false,true

statement
CREATE TABLE test_first_last(i int, grp string) USING parquet

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -15,8 +15,6 @@
-- specific language governing permissions and limitations
-- under the License.

-- ConfigMatrix: parquet.enable.dictionary=false,true

statement
CREATE TABLE test_min_max(i int, d double, s string, grp string) USING parquet

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -15,8 +15,6 @@
-- specific language governing permissions and limitations
-- under the License.

-- ConfigMatrix: parquet.enable.dictionary=false,true

statement
CREATE TABLE test_stddev(d double, grp string) USING parquet

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -15,8 +15,6 @@
-- specific language governing permissions and limitations
-- under the License.

-- ConfigMatrix: parquet.enable.dictionary=false,true

statement
CREATE TABLE test_sum(i int, l long, f float, d double, grp string) USING parquet

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -15,8 +15,6 @@
-- specific language governing permissions and limitations
-- under the License.

-- ConfigMatrix: parquet.enable.dictionary=false,true

statement
CREATE TABLE test_variance(d double, grp string) USING parquet

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -18,7 +18,6 @@
-- On Spark 4.0, array_append is a RuntimeReplaceable that rewrites to array_insert(-1),
-- so we need to allow the incompatible array_insert to run natively there.
-- Config: spark.comet.expression.ArrayInsert.allowIncompatible=true
-- ConfigMatrix: parquet.enable.dictionary=false,true

statement
CREATE TABLE test_array_append(arr array<int>, val int) USING parquet
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -15,8 +15,6 @@
-- specific language governing permissions and limitations
-- under the License.

-- ConfigMatrix: parquet.enable.dictionary=false,true

statement
CREATE TABLE test_array_compact(arr array<int>) USING parquet

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -15,8 +15,6 @@
-- specific language governing permissions and limitations
-- under the License.

-- ConfigMatrix: parquet.enable.dictionary=false,true

-- migrated from CometExpressionSuite "test concat function - arrays"
-- https://github.com/apache/datafusion-comet/issues/2647

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -15,8 +15,6 @@
-- specific language governing permissions and limitations
-- under the License.

-- ConfigMatrix: parquet.enable.dictionary=false,true

statement
CREATE TABLE test_array_contains(arr array<int>, val int) USING parquet

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -15,8 +15,6 @@
-- specific language governing permissions and limitations
-- under the License.

-- ConfigMatrix: parquet.enable.dictionary=false,true

statement
CREATE TABLE test_array_distinct(arr array<int>) USING parquet

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -14,8 +14,8 @@
-- KIND, either express or implied. See the License for the
-- specific language governing permissions and limitations
-- under the License.

-- Config: spark.comet.expression.ArrayExcept.allowIncompatible=true
-- ConfigMatrix: parquet.enable.dictionary=false,true

statement
CREATE TABLE test_array_except(a array<int>, b array<int>) USING parquet
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -15,8 +15,6 @@
-- specific language governing permissions and limitations
-- under the License.

-- ConfigMatrix: parquet.enable.dictionary=false,true

statement
CREATE TABLE test_array_filter(arr array<int>) USING parquet

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -15,8 +15,6 @@
-- specific language governing permissions and limitations
-- under the License.

-- ConfigMatrix: parquet.enable.dictionary=false,true

statement
CREATE TABLE test_array_insert(arr array<int>, pos int, val int) USING parquet

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -16,7 +16,6 @@
-- under the License.

-- Config: spark.comet.expression.ArrayIntersect.allowIncompatible=true
-- ConfigMatrix: parquet.enable.dictionary=false,true

statement
CREATE TABLE test_array_intersect(a array<int>, b array<int>) USING parquet
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -15,8 +15,6 @@
-- specific language governing permissions and limitations
-- under the License.

-- ConfigMatrix: parquet.enable.dictionary=false,true

statement
CREATE TABLE test_array_join(arr array<string>) USING parquet

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -15,8 +15,6 @@
-- specific language governing permissions and limitations
-- under the License.

-- ConfigMatrix: parquet.enable.dictionary=false,true

statement
CREATE TABLE test_array_max(arr array<int>) USING parquet

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -15,8 +15,6 @@
-- specific language governing permissions and limitations
-- under the License.

-- ConfigMatrix: parquet.enable.dictionary=false,true

statement
CREATE TABLE test_array_min(arr array<int>) USING parquet

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -16,7 +16,6 @@
-- under the License.

-- Config: spark.comet.expression.ArrayRemove.allowIncompatible=true
-- ConfigMatrix: parquet.enable.dictionary=false,true

statement
CREATE TABLE test_array_remove(arr array<int>, val int) USING parquet
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -15,8 +15,6 @@
-- specific language governing permissions and limitations
-- under the License.

-- ConfigMatrix: parquet.enable.dictionary=false,true

statement
CREATE TABLE test_array_repeat(val int, cnt int) USING parquet

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -16,7 +16,6 @@
-- under the License.

-- Config: spark.comet.expression.ArrayUnion.allowIncompatible=true
-- ConfigMatrix: parquet.enable.dictionary=false,true

statement
CREATE TABLE test_array_union(a array<int>, b array<int>) USING parquet
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -16,7 +16,6 @@
-- under the License.

-- Config: spark.comet.expression.ArraysOverlap.allowIncompatible=true
-- ConfigMatrix: parquet.enable.dictionary=false,true

statement
CREATE TABLE test_arrays_overlap(a array<int>, b array<int>) USING parquet
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -15,8 +15,6 @@
-- specific language governing permissions and limitations
-- under the License.

-- ConfigMatrix: parquet.enable.dictionary=false,true

statement
CREATE TABLE test_create_array(a int, b int, c int) USING parquet

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -15,8 +15,6 @@
-- specific language governing permissions and limitations
-- under the License.

-- ConfigMatrix: parquet.enable.dictionary=false,true

statement
CREATE TABLE test_element_at(arr array<int>) USING parquet

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -15,8 +15,6 @@
-- specific language governing permissions and limitations
-- under the License.

-- ConfigMatrix: parquet.enable.dictionary=false,true

statement
CREATE TABLE test_flatten(arr array<array<int>>) USING parquet

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -15,8 +15,6 @@
-- specific language governing permissions and limitations
-- under the License.

-- ConfigMatrix: parquet.enable.dictionary=false,true

statement
CREATE TABLE test_get_array_item(arr array<int>, idx int) USING parquet

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -15,8 +15,6 @@
-- specific language governing permissions and limitations
-- under the License.

-- ConfigMatrix: parquet.enable.dictionary=false,true

statement
CREATE TABLE test_arr_struct(arr array<struct<name: string, value: int>>) USING parquet

Expand Down
2 changes: 0 additions & 2 deletions spark/src/test/resources/sql-tests/expressions/array/size.sql
Original file line number Diff line number Diff line change
Expand Up @@ -15,8 +15,6 @@
-- specific language governing permissions and limitations
-- under the License.

-- ConfigMatrix: parquet.enable.dictionary=false,true

statement
CREATE TABLE test_size(arr array<int>, m map<string, int>) USING parquet

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -15,8 +15,6 @@
-- specific language governing permissions and limitations
-- under the License.

-- ConfigMatrix: parquet.enable.dictionary=false,true

-- Setup
statement
CREATE TABLE test(col1 int, col2 int) USING parquet
Expand Down
2 changes: 0 additions & 2 deletions spark/src/test/resources/sql-tests/expressions/cast/cast.sql
Original file line number Diff line number Diff line change
Expand Up @@ -15,8 +15,6 @@
-- specific language governing permissions and limitations
-- under the License.

-- ConfigMatrix: parquet.enable.dictionary=false,true

statement
CREATE TABLE test_cast(i int, l long, f float, d double, s string, b boolean) USING parquet

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -15,8 +15,6 @@
-- specific language governing permissions and limitations
-- under the License.

-- ConfigMatrix: parquet.enable.dictionary=false,true

statement
CREATE TABLE test_double_to_string(d double, id int) USING parquet

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -15,8 +15,6 @@
-- specific language governing permissions and limitations
-- under the License.

-- ConfigMatrix: parquet.enable.dictionary=false,true

-- compare true/false to negative zero
statement
CREATE TABLE test(col1 boolean, col2 float) USING parquet
Expand Down
Loading
Loading