diff --git a/packages/bigframes/bigframes/core/array_value.py b/packages/bigframes/bigframes/core/array_value.py index 2c02fa46e86a..5a238e39b3d6 100644 --- a/packages/bigframes/bigframes/core/array_value.py +++ b/packages/bigframes/bigframes/core/array_value.py @@ -541,6 +541,7 @@ def relational_join( for l_col, r_col in conditions ), type=type, + nulls_equal=True, # pandas semantics propogate_order=propogate_order or self.session._strictly_ordered, ) return ArrayValue(join_node), (l_mapping, r_mapping) diff --git a/packages/bigframes/bigframes/core/compile/ibis_compiler/ibis_compiler.py b/packages/bigframes/bigframes/core/compile/ibis_compiler/ibis_compiler.py index b08e6da504b5..1f29a253d550 100644 --- a/packages/bigframes/bigframes/core/compile/ibis_compiler/ibis_compiler.py +++ b/packages/bigframes/bigframes/core/compile/ibis_compiler/ibis_compiler.py @@ -50,6 +50,7 @@ def compile_sql(request: configs.CompileRequest) -> configs.CompileResult: # Need to do this before replacing unsupported ops, as that will rewrite slice ops result_node = rewrites.pull_up_limits(result_node) result_node = _replace_unsupported_ops(result_node) + result_node = result_node.bottom_up(rewrites.simplify_join) # prune before pulling up order to avoid unnnecessary row_number() ops result_node = cast(nodes.ResultNode, rewrites.column_pruning(result_node)) result_node = rewrites.defer_order( diff --git a/packages/bigframes/bigframes/core/compile/sqlglot/compiler.py b/packages/bigframes/bigframes/core/compile/sqlglot/compiler.py index 73f032bbf0a8..a18b74d69d92 100644 --- a/packages/bigframes/bigframes/core/compile/sqlglot/compiler.py +++ b/packages/bigframes/bigframes/core/compile/sqlglot/compiler.py @@ -54,6 +54,7 @@ def compile_sql(request: configs.CompileRequest) -> configs.CompileResult: # Need to do this before replacing unsupported ops, as that will rewrite slice ops result_node = rewrite.pull_up_limits(result_node) result_node = _replace_unsupported_ops(result_node) + result_node = result_node.bottom_up(rewrite.simplify_join) # prune before pulling up order to avoid unnnecessary row_number() ops result_node = typing.cast(nodes.ResultNode, rewrite.column_pruning(result_node)) result_node = rewrite.defer_order( diff --git a/packages/bigframes/bigframes/core/local_data.py b/packages/bigframes/bigframes/core/local_data.py index e797f914c043..09111572f3c9 100644 --- a/packages/bigframes/bigframes/core/local_data.py +++ b/packages/bigframes/bigframes/core/local_data.py @@ -154,6 +154,9 @@ def to_arrow( else: return schema, batches + def is_nullable(self, column_id: identifiers.ColumnId) -> bool: + return self.data.column(column_id).null_count > 0 + def to_pyarrow_table( self, *, diff --git a/packages/bigframes/bigframes/core/nodes.py b/packages/bigframes/bigframes/core/nodes.py index 342b32f4977f..5297ceed9140 100644 --- a/packages/bigframes/bigframes/core/nodes.py +++ b/packages/bigframes/bigframes/core/nodes.py @@ -204,6 +204,8 @@ class InNode(BigFrameNode, AdditiveNode): right_child: BigFrameNode left_col: ex.DerefOp indicator_col: identifiers.ColumnId + # For matching left_col to right_child[0], if true, nulls match nulls, if false, nulls don't match nulls + nulls_equal: bool = True def _validate(self): assert len(self.right_child.fields) == 1 @@ -271,10 +273,7 @@ def additive_base(self) -> BigFrameNode: @property def joins_nulls(self) -> bool: - left_nullable = self.left_child.field_by_id[self.left_col.id].nullable - # assumption: right side has one column - right_nullable = self.right_child.fields[0].nullable - return left_nullable or right_nullable + return self.nulls_equal @property def _node_expressions(self): @@ -316,6 +315,9 @@ class JoinNode(BigFrameNode): right_child: BigFrameNode conditions: typing.Tuple[typing.Tuple[ex.DerefOp, ex.DerefOp], ...] type: typing.Literal["inner", "outer", "left", "right", "cross"] + # choose to treat nulls as equal or not for purposes of the join + # pandas treats nulls as equal, sql does not + nulls_equal: bool propogate_order: bool def _validate(self): @@ -355,13 +357,7 @@ def fields(self) -> Sequence[Field]: @property def joins_nulls(self) -> bool: - for left_ref, right_ref in self.conditions: - if ( - self.left_child.field_by_id[left_ref.id].nullable - and self.right_child.field_by_id[right_ref.id].nullable - ): - return True - return False + return self.nulls_equal @functools.cached_property def variables_introduced(self) -> int: @@ -675,7 +671,11 @@ class ReadLocalNode(LeafNode): @property def fields(self) -> Sequence[Field]: fields = tuple( - Field(col_id, self.local_data_source.schema.get_type(source_id)) + Field( + col_id, + self.local_data_source.schema.get_type(source_id), + nullable=self.local_data_source.is_nullable(source_id), + ) for col_id, source_id in self.scan_list.items ) diff --git a/packages/bigframes/bigframes/core/rewrite/__init__.py b/packages/bigframes/bigframes/core/rewrite/__init__.py index 6b00e9b2f126..0842fde512a1 100644 --- a/packages/bigframes/bigframes/core/rewrite/__init__.py +++ b/packages/bigframes/bigframes/core/rewrite/__init__.py @@ -33,6 +33,7 @@ rewrite_range_rolling, simplify_complex_windows, ) +from bigframes.core.rewrite.nullity import simplify_join __all__ = [ "as_sql_nodes", @@ -55,4 +56,5 @@ "defer_selection", "simplify_complex_windows", "lower_udfs", + "simplify_join", ] diff --git a/packages/bigframes/bigframes/core/rewrite/nullity.py b/packages/bigframes/bigframes/core/rewrite/nullity.py new file mode 100644 index 000000000000..4eef2be72db6 --- /dev/null +++ b/packages/bigframes/bigframes/core/rewrite/nullity.py @@ -0,0 +1,42 @@ +# Copyright 2026 Google LLC +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +from __future__ import annotations + +from bigframes.core import nodes +import dataclasses + + +def simplify_join(node: nodes.BigFrameNode) -> nodes.BigFrameNode: + """Simplify a join node by removing nullity checks.""" + # if join conditions are provably non-null, we can set nulls_equal=False + if isinstance(node, nodes.JoinNode): + # even better, we can always make nulls_equal false, but wrap the join keys in coalesce + # to handle nulls correctly, this is more granular than the current implementation + for left_ref, right_ref in node.conditions: + if ( + node.left_child.field_by_id[left_ref.id].nullable + and node.right_child.field_by_id[right_ref.id].nullable + ): + return node + return dataclasses.replace(node, nulls_equal=False) + elif isinstance(node, nodes.InNode): + if ( + node.left_child.field_by_id[node.left_col.id].nullable + and node.right_child.fields[0].nullable + ): + return node + return dataclasses.replace(node, nulls_equal=False) + else: + return node diff --git a/packages/bigframes/tests/unit/core/compile/sqlglot/tpch/conftest.py b/packages/bigframes/tests/unit/core/compile/sqlglot/tpch/conftest.py index 828618fe455b..d8633ec376ef 100644 --- a/packages/bigframes/tests/unit/core/compile/sqlglot/tpch/conftest.py +++ b/packages/bigframes/tests/unit/core/compile/sqlglot/tpch/conftest.py @@ -24,80 +24,80 @@ TPCH_SCHEMAS = { "LINEITEM": [ - bigquery.SchemaField("L_ORDERKEY", "INTEGER"), - bigquery.SchemaField("L_PARTKEY", "INTEGER"), - bigquery.SchemaField("L_SUPPKEY", "INTEGER"), - bigquery.SchemaField("L_LINENUMBER", "INTEGER"), - bigquery.SchemaField("L_QUANTITY", "FLOAT"), - bigquery.SchemaField("L_EXTENDEDPRICE", "FLOAT"), - bigquery.SchemaField("L_DISCOUNT", "FLOAT"), - bigquery.SchemaField("L_TAX", "FLOAT"), - bigquery.SchemaField("L_RETURNFLAG", "STRING"), - bigquery.SchemaField("L_LINESTATUS", "STRING"), - bigquery.SchemaField("L_SHIPDATE", "DATE"), - bigquery.SchemaField("L_COMMITDATE", "DATE"), - bigquery.SchemaField("L_RECEIPTDATE", "DATE"), - bigquery.SchemaField("L_SHIPINSTRUCT", "STRING"), - bigquery.SchemaField("L_SHIPMODE", "STRING"), + bigquery.SchemaField("L_ORDERKEY", "INTEGER", mode="REQUIRED"), + bigquery.SchemaField("L_PARTKEY", "INTEGER", mode="REQUIRED"), + bigquery.SchemaField("L_SUPPKEY", "INTEGER", mode="REQUIRED"), + bigquery.SchemaField("L_LINENUMBER", "INTEGER", mode="REQUIRED"), + bigquery.SchemaField("L_QUANTITY", "FLOAT", mode="REQUIRED"), + bigquery.SchemaField("L_EXTENDEDPRICE", "FLOAT", mode="REQUIRED"), + bigquery.SchemaField("L_DISCOUNT", "FLOAT", mode="REQUIRED"), + bigquery.SchemaField("L_TAX", "FLOAT", mode="REQUIRED"), + bigquery.SchemaField("L_RETURNFLAG", "STRING", mode="REQUIRED"), + bigquery.SchemaField("L_LINESTATUS", "STRING", mode="REQUIRED"), + bigquery.SchemaField("L_SHIPDATE", "DATE", mode="REQUIRED"), + bigquery.SchemaField("L_COMMITDATE", "DATE", mode="REQUIRED"), + bigquery.SchemaField("L_RECEIPTDATE", "DATE", mode="REQUIRED"), + bigquery.SchemaField("L_SHIPINSTRUCT", "STRING", mode="REQUIRED"), + bigquery.SchemaField("L_SHIPMODE", "STRING", mode="REQUIRED"), bigquery.SchemaField("L_COMMENT", "STRING"), ], "ORDERS": [ - bigquery.SchemaField("O_ORDERKEY", "INTEGER"), - bigquery.SchemaField("O_CUSTKEY", "INTEGER"), - bigquery.SchemaField("O_ORDERSTATUS", "STRING"), - bigquery.SchemaField("O_TOTALPRICE", "FLOAT"), - bigquery.SchemaField("O_ORDERDATE", "DATE"), - bigquery.SchemaField("O_ORDERPRIORITY", "STRING"), - bigquery.SchemaField("O_CLERK", "STRING"), - bigquery.SchemaField("O_SHIPPRIORITY", "INTEGER"), + bigquery.SchemaField("O_ORDERKEY", "INTEGER", mode="REQUIRED"), + bigquery.SchemaField("O_CUSTKEY", "INTEGER", mode="REQUIRED"), + bigquery.SchemaField("O_ORDERSTATUS", "STRING", mode="REQUIRED"), + bigquery.SchemaField("O_TOTALPRICE", "FLOAT", mode="REQUIRED"), + bigquery.SchemaField("O_ORDERDATE", "DATE", mode="REQUIRED"), + bigquery.SchemaField("O_ORDERPRIORITY", "STRING", mode="REQUIRED"), + bigquery.SchemaField("O_CLERK", "STRING", mode="REQUIRED"), + bigquery.SchemaField("O_SHIPPRIORITY", "INTEGER", mode="REQUIRED"), bigquery.SchemaField("O_COMMENT", "STRING"), ], "PART": [ - bigquery.SchemaField("P_PARTKEY", "INTEGER"), - bigquery.SchemaField("P_NAME", "STRING"), - bigquery.SchemaField("P_MFGR", "STRING"), - bigquery.SchemaField("P_BRAND", "STRING"), - bigquery.SchemaField("P_TYPE", "STRING"), - bigquery.SchemaField("P_SIZE", "INTEGER"), - bigquery.SchemaField("P_CONTAINER", "STRING"), - bigquery.SchemaField("P_RETAILPRICE", "FLOAT"), + bigquery.SchemaField("P_PARTKEY", "INTEGER", mode="REQUIRED"), + bigquery.SchemaField("P_NAME", "STRING", mode="REQUIRED"), + bigquery.SchemaField("P_MFGR", "STRING", mode="REQUIRED"), + bigquery.SchemaField("P_BRAND", "STRING", mode="REQUIRED"), + bigquery.SchemaField("P_TYPE", "STRING", mode="REQUIRED"), + bigquery.SchemaField("P_SIZE", "INTEGER", mode="REQUIRED"), + bigquery.SchemaField("P_CONTAINER", "STRING", mode="REQUIRED"), + bigquery.SchemaField("P_RETAILPRICE", "FLOAT", mode="REQUIRED"), bigquery.SchemaField("P_COMMENT", "STRING"), ], "SUPPLIER": [ - bigquery.SchemaField("S_SUPPKEY", "INTEGER"), - bigquery.SchemaField("S_NAME", "STRING"), - bigquery.SchemaField("S_ADDRESS", "STRING"), - bigquery.SchemaField("S_NATIONKEY", "INTEGER"), - bigquery.SchemaField("S_PHONE", "STRING"), - bigquery.SchemaField("S_ACCTBAL", "FLOAT"), + bigquery.SchemaField("S_SUPPKEY", "INTEGER", mode="REQUIRED"), + bigquery.SchemaField("S_NAME", "STRING", mode="REQUIRED"), + bigquery.SchemaField("S_ADDRESS", "STRING", mode="REQUIRED"), + bigquery.SchemaField("S_NATIONKEY", "INTEGER", mode="REQUIRED"), + bigquery.SchemaField("S_PHONE", "STRING", mode="REQUIRED"), + bigquery.SchemaField("S_ACCTBAL", "FLOAT", mode="REQUIRED"), bigquery.SchemaField("S_COMMENT", "STRING"), ], "PARTSUPP": [ - bigquery.SchemaField("PS_PARTKEY", "INTEGER"), - bigquery.SchemaField("PS_SUPPKEY", "INTEGER"), - bigquery.SchemaField("PS_AVAILQTY", "INTEGER"), - bigquery.SchemaField("PS_SUPPLYCOST", "FLOAT"), + bigquery.SchemaField("PS_PARTKEY", "INTEGER", mode="REQUIRED"), + bigquery.SchemaField("PS_SUPPKEY", "INTEGER", mode="REQUIRED"), + bigquery.SchemaField("PS_AVAILQTY", "INTEGER", mode="REQUIRED"), + bigquery.SchemaField("PS_SUPPLYCOST", "FLOAT", mode="REQUIRED"), bigquery.SchemaField("PS_COMMENT", "STRING"), ], "CUSTOMER": [ - bigquery.SchemaField("C_CUSTKEY", "INTEGER"), - bigquery.SchemaField("C_NAME", "STRING"), - bigquery.SchemaField("C_ADDRESS", "STRING"), - bigquery.SchemaField("C_NATIONKEY", "INTEGER"), - bigquery.SchemaField("C_PHONE", "STRING"), - bigquery.SchemaField("C_ACCTBAL", "FLOAT"), - bigquery.SchemaField("C_MKTSEGMENT", "STRING"), + bigquery.SchemaField("C_CUSTKEY", "INTEGER", mode="REQUIRED"), + bigquery.SchemaField("C_NAME", "STRING", mode="REQUIRED"), + bigquery.SchemaField("C_ADDRESS", "STRING", mode="REQUIRED"), + bigquery.SchemaField("C_NATIONKEY", "INTEGER", mode="REQUIRED"), + bigquery.SchemaField("C_PHONE", "STRING", mode="REQUIRED"), + bigquery.SchemaField("C_ACCTBAL", "FLOAT", mode="REQUIRED"), + bigquery.SchemaField("C_MKTSEGMENT", "STRING", mode="REQUIRED"), bigquery.SchemaField("C_COMMENT", "STRING"), ], "NATION": [ - bigquery.SchemaField("N_NATIONKEY", "INTEGER"), - bigquery.SchemaField("N_NAME", "STRING"), - bigquery.SchemaField("N_REGIONKEY", "INTEGER"), + bigquery.SchemaField("N_NATIONKEY", "INTEGER", mode="REQUIRED"), + bigquery.SchemaField("N_NAME", "STRING", mode="REQUIRED"), + bigquery.SchemaField("N_REGIONKEY", "INTEGER", mode="REQUIRED"), bigquery.SchemaField("N_COMMENT", "STRING"), ], "REGION": [ - bigquery.SchemaField("R_REGIONKEY", "INTEGER"), - bigquery.SchemaField("R_NAME", "STRING"), + bigquery.SchemaField("R_REGIONKEY", "INTEGER", mode="REQUIRED"), + bigquery.SchemaField("R_NAME", "STRING", mode="REQUIRED"), bigquery.SchemaField("R_COMMENT", "STRING"), ], } diff --git a/packages/bigframes/tests/unit/core/compile/sqlglot/tpch/snapshots/test_tpch/test_tpch_query/1/out.sql b/packages/bigframes/tests/unit/core/compile/sqlglot/tpch/snapshots/test_tpch/test_tpch_query/1/out.sql index 1afccf820c14..59247c7bdc39 100644 --- a/packages/bigframes/tests/unit/core/compile/sqlglot/tpch/snapshots/test_tpch/test_tpch_query/1/out.sql +++ b/packages/bigframes/tests/unit/core/compile/sqlglot/tpch/snapshots/test_tpch/test_tpch_query/1/out.sql @@ -54,8 +54,6 @@ WITH `bfcte_0` AS ( AVG(`bfcol_43`) AS `bfcol_61`, COUNT(`bfcol_41`) AS `bfcol_62` FROM `bfcte_0` - WHERE - NOT `bfcol_44` IS NULL AND NOT `bfcol_45` IS NULL GROUP BY `bfcol_44`, `bfcol_45` diff --git a/packages/bigframes/tests/unit/core/compile/sqlglot/tpch/snapshots/test_tpch/test_tpch_query/10/out.sql b/packages/bigframes/tests/unit/core/compile/sqlglot/tpch/snapshots/test_tpch/test_tpch_query/10/out.sql index 8362d3afca10..b6167a1fd9b3 100644 --- a/packages/bigframes/tests/unit/core/compile/sqlglot/tpch/snapshots/test_tpch/test_tpch_query/10/out.sql +++ b/packages/bigframes/tests/unit/core/compile/sqlglot/tpch/snapshots/test_tpch/test_tpch_query/10/out.sql @@ -39,8 +39,7 @@ WITH `bfcte_0` AS ( `bfcol_8` AS `bfcol_24` FROM `bfcte_3` INNER JOIN `bfcte_2` - ON COALESCE(`bfcol_9`, 0) = COALESCE(`bfcol_7`, 0) - AND COALESCE(`bfcol_9`, 1) = COALESCE(`bfcol_7`, 1) + ON `bfcol_9` = `bfcol_7` ), `bfcte_5` AS ( SELECT `bfcol_16` AS `bfcol_25`, @@ -56,8 +55,7 @@ WITH `bfcte_0` AS ( `bfcol_5` AS `bfcol_35` FROM `bfcte_4` INNER JOIN `bfcte_1` - ON COALESCE(`bfcol_23`, 0) = COALESCE(`bfcol_2`, 0) - AND COALESCE(`bfcol_23`, 1) = COALESCE(`bfcol_2`, 1) + ON `bfcol_23` = `bfcol_2` ), `bfcte_6` AS ( SELECT `bfcol_25`, @@ -107,8 +105,7 @@ WITH `bfcte_0` AS ( ), 2) AS `bfcol_83` FROM `bfcte_5` INNER JOIN `bfcte_0` - ON COALESCE(`bfcol_28`, 0) = COALESCE(`bfcol_0`, 0) - AND COALESCE(`bfcol_28`, 1) = COALESCE(`bfcol_0`, 1) + ON `bfcol_28` = `bfcol_0` WHERE ( ( @@ -133,13 +130,7 @@ WITH `bfcte_0` AS ( COALESCE(SUM(`bfcol_83`), 0) AS `bfcol_92` FROM `bfcte_6` WHERE - NOT `bfcol_76` IS NULL - AND NOT `bfcol_77` IS NULL - AND NOT `bfcol_80` IS NULL - AND NOT `bfcol_79` IS NULL - AND NOT `bfcol_82` IS NULL - AND NOT `bfcol_78` IS NULL - AND NOT `bfcol_81` IS NULL + NOT `bfcol_81` IS NULL GROUP BY `bfcol_76`, `bfcol_77`, diff --git a/packages/bigframes/tests/unit/core/compile/sqlglot/tpch/snapshots/test_tpch/test_tpch_query/11/out.sql b/packages/bigframes/tests/unit/core/compile/sqlglot/tpch/snapshots/test_tpch/test_tpch_query/11/out.sql index 2b296be74483..2f080a0c1c13 100644 --- a/packages/bigframes/tests/unit/core/compile/sqlglot/tpch/snapshots/test_tpch/test_tpch_query/11/out.sql +++ b/packages/bigframes/tests/unit/core/compile/sqlglot/tpch/snapshots/test_tpch/test_tpch_query/11/out.sql @@ -31,8 +31,7 @@ WITH `bfcte_0` AS ( `bfcol_3` AS `bfcol_19` FROM `bfcte_4` INNER JOIN `bfcte_3` - ON COALESCE(`bfcol_18`, 0) = COALESCE(`bfcol_4`, 0) - AND COALESCE(`bfcol_18`, 1) = COALESCE(`bfcol_4`, 1) + ON `bfcol_18` = `bfcol_4` ), `bfcte_6` AS ( SELECT `bfcol_19`, @@ -46,8 +45,7 @@ WITH `bfcte_0` AS ( `bfcol_2` * `bfcol_1` AS `bfcol_40` FROM `bfcte_5` INNER JOIN `bfcte_1` - ON COALESCE(`bfcol_19`, 0) = COALESCE(`bfcol_0`, 0) - AND COALESCE(`bfcol_19`, 1) = COALESCE(`bfcol_0`, 1) + ON `bfcol_19` = `bfcol_0` ), `bfcte_7` AS ( SELECT `bfcol_19`, @@ -59,8 +57,7 @@ WITH `bfcte_0` AS ( `bfcol_13` * `bfcol_12` AS `bfcol_28` FROM `bfcte_5` INNER JOIN `bfcte_2` - ON COALESCE(`bfcol_19`, 0) = COALESCE(`bfcol_11`, 0) - AND COALESCE(`bfcol_19`, 1) = COALESCE(`bfcol_11`, 1) + ON `bfcol_19` = `bfcol_11` ), `bfcte_8` AS ( SELECT COALESCE(SUM(`bfcol_40`), 0) AS `bfcol_44` @@ -70,8 +67,6 @@ WITH `bfcte_0` AS ( `bfcol_27`, COALESCE(SUM(`bfcol_28`), 0) AS `bfcol_35` FROM `bfcte_7` - WHERE - NOT `bfcol_27` IS NULL GROUP BY `bfcol_27` ), `bfcte_10` AS ( @@ -101,8 +96,6 @@ WITH `bfcte_0` AS ( `bfcol_8`, ANY_VALUE(`bfcol_51`) AS `bfcol_55` FROM `bfcte_12` - WHERE - NOT `bfcol_7` IS NULL AND NOT `bfcol_8` IS NULL GROUP BY `bfcol_7`, `bfcol_8` diff --git a/packages/bigframes/tests/unit/core/compile/sqlglot/tpch/snapshots/test_tpch/test_tpch_query/12/out.sql b/packages/bigframes/tests/unit/core/compile/sqlglot/tpch/snapshots/test_tpch/test_tpch_query/12/out.sql index 4d91dcdac192..8238f7620bfa 100644 --- a/packages/bigframes/tests/unit/core/compile/sqlglot/tpch/snapshots/test_tpch/test_tpch_query/12/out.sql +++ b/packages/bigframes/tests/unit/core/compile/sqlglot/tpch/snapshots/test_tpch/test_tpch_query/12/out.sql @@ -51,8 +51,7 @@ WITH `bfcte_0` AS ( ) AS INT64) AS `bfcol_28` FROM `bfcte_1` INNER JOIN `bfcte_0` - ON COALESCE(`bfcol_5`, 0) = COALESCE(`bfcol_0`, 0) - AND COALESCE(`bfcol_5`, 1) = COALESCE(`bfcol_0`, 1) + ON `bfcol_5` = `bfcol_0` WHERE ( ( @@ -79,8 +78,6 @@ WITH `bfcte_0` AS ( COALESCE(SUM(`bfcol_27`), 0) AS `bfcol_32`, COALESCE(SUM(`bfcol_28`), 0) AS `bfcol_33` FROM `bfcte_2` - WHERE - NOT `bfcol_26` IS NULL GROUP BY `bfcol_26` ) diff --git a/packages/bigframes/tests/unit/core/compile/sqlglot/tpch/snapshots/test_tpch/test_tpch_query/13/out.sql b/packages/bigframes/tests/unit/core/compile/sqlglot/tpch/snapshots/test_tpch/test_tpch_query/13/out.sql index 728738a15e02..5cab3bf2fb36 100644 --- a/packages/bigframes/tests/unit/core/compile/sqlglot/tpch/snapshots/test_tpch/test_tpch_query/13/out.sql +++ b/packages/bigframes/tests/unit/core/compile/sqlglot/tpch/snapshots/test_tpch/test_tpch_query/13/out.sql @@ -17,10 +17,7 @@ WITH `bfcte_0` AS ( COUNT(`bfcol_10`) AS `bfcol_14` FROM `bfcte_1` LEFT JOIN `bfcte_0` - ON COALESCE(`bfcol_3`, 0) = COALESCE(`bfcol_11`, 0) - AND COALESCE(`bfcol_3`, 1) = COALESCE(`bfcol_11`, 1) - WHERE - NOT `bfcol_3` IS NULL + ON `bfcol_3` = `bfcol_11` GROUP BY `bfcol_3` ), `bfcte_3` AS ( diff --git a/packages/bigframes/tests/unit/core/compile/sqlglot/tpch/snapshots/test_tpch/test_tpch_query/14/out.sql b/packages/bigframes/tests/unit/core/compile/sqlglot/tpch/snapshots/test_tpch/test_tpch_query/14/out.sql index a4644a86def0..1fb8a766790e 100644 --- a/packages/bigframes/tests/unit/core/compile/sqlglot/tpch/snapshots/test_tpch/test_tpch_query/14/out.sql +++ b/packages/bigframes/tests/unit/core/compile/sqlglot/tpch/snapshots/test_tpch/test_tpch_query/14/out.sql @@ -46,8 +46,7 @@ WITH `bfcte_0` AS ( ) AS `bfcol_51` FROM `bfcte_3` INNER JOIN `bfcte_2` - ON COALESCE(`bfcol_4`, 0) = COALESCE(`bfcol_0`, 0) - AND COALESCE(`bfcol_4`, 1) = COALESCE(`bfcol_0`, 1) + ON `bfcol_4` = `bfcol_0` WHERE ( `bfcol_3` >= CAST('1995-09-01' AS DATE) @@ -79,8 +78,7 @@ WITH `bfcte_0` AS ( ) * CAST(REGEXP_CONTAINS(`bfcol_9`, 'PROMO') AS INT64) AS `bfcol_41` FROM `bfcte_4` INNER JOIN `bfcte_2` - ON COALESCE(`bfcol_8`, 0) = COALESCE(`bfcol_0`, 0) - AND COALESCE(`bfcol_8`, 1) = COALESCE(`bfcol_0`, 1) + ON `bfcol_8` = `bfcol_0` WHERE ( `bfcol_3` >= CAST('1995-09-01' AS DATE) @@ -134,8 +132,6 @@ WITH `bfcte_0` AS ( `bfcol_6`, ANY_VALUE(`bfcol_72`) AS `bfcol_79` FROM `bfcte_11` - WHERE - NOT `bfcol_5` IS NULL AND NOT `bfcol_6` IS NULL GROUP BY `bfcol_5`, `bfcol_6` @@ -145,8 +141,6 @@ WITH `bfcte_0` AS ( `bfcol_11`, ANY_VALUE(`bfcol_60`) AS `bfcol_65` FROM `bfcte_12` - WHERE - NOT `bfcol_10` IS NULL AND NOT `bfcol_11` IS NULL GROUP BY `bfcol_10`, `bfcol_11` diff --git a/packages/bigframes/tests/unit/core/compile/sqlglot/tpch/snapshots/test_tpch/test_tpch_query/15/out.sql b/packages/bigframes/tests/unit/core/compile/sqlglot/tpch/snapshots/test_tpch/test_tpch_query/15/out.sql index 929418a09b27..e8f5214f34a5 100644 --- a/packages/bigframes/tests/unit/core/compile/sqlglot/tpch/snapshots/test_tpch/test_tpch_query/15/out.sql +++ b/packages/bigframes/tests/unit/core/compile/sqlglot/tpch/snapshots/test_tpch/test_tpch_query/15/out.sql @@ -45,8 +45,6 @@ WITH `bfcte_0` AS ( `bfcol_23`, COALESCE(SUM(`bfcol_24`), 0) AS `bfcol_27` FROM `bfcte_1` - WHERE - NOT `bfcol_23` IS NULL GROUP BY `bfcol_23` ), `bfcte_5` AS ( @@ -92,8 +90,6 @@ WITH `bfcte_0` AS ( `bfcol_6`, ANY_VALUE(`bfcol_45`) AS `bfcol_49` FROM `bfcte_9` - WHERE - NOT `bfcol_5` IS NULL AND NOT `bfcol_6` IS NULL GROUP BY `bfcol_5`, `bfcol_6` diff --git a/packages/bigframes/tests/unit/core/compile/sqlglot/tpch/snapshots/test_tpch/test_tpch_query/16/out.sql b/packages/bigframes/tests/unit/core/compile/sqlglot/tpch/snapshots/test_tpch/test_tpch_query/16/out.sql index bd637ec30634..eddd683cbd8b 100644 --- a/packages/bigframes/tests/unit/core/compile/sqlglot/tpch/snapshots/test_tpch/test_tpch_query/16/out.sql +++ b/packages/bigframes/tests/unit/core/compile/sqlglot/tpch/snapshots/test_tpch/test_tpch_query/16/out.sql @@ -37,8 +37,7 @@ WITH `bfcte_0` AS ( `bfcol_3` AS `bfcol_58` FROM `bfcte_2` INNER JOIN `bfcte_1` - ON COALESCE(`bfcol_4`, 0) = COALESCE(`bfcol_2`, 0) - AND COALESCE(`bfcol_4`, 1) = COALESCE(`bfcol_2`, 1) + ON `bfcol_4` = `bfcol_2` WHERE `bfcol_5` <> 'Brand#45' AND NOT ( @@ -52,13 +51,11 @@ WITH `bfcte_0` AS ( ), `bfcte_6` AS ( SELECT *, - STRUCT(COALESCE(`bfcol_58`, 0) AS `bfpart1`, COALESCE(`bfcol_58`, 1) AS `bfpart2`) IN ( - ( + `bfcol_58` IN (( SELECT - STRUCT(COALESCE(`bfcol_21`, 0) AS `bfpart1`, COALESCE(`bfcol_21`, 1) AS `bfpart2`) + * FROM `bfcte_5` - ) - ) AS `bfcol_59` + )) AS `bfcol_59` FROM `bfcte_4` ), `bfcte_7` AS ( SELECT @@ -73,8 +70,6 @@ WITH `bfcte_0` AS ( `bfcol_57`, COUNT(DISTINCT `bfcol_58`) AS `bfcol_69` FROM `bfcte_7` - WHERE - NOT `bfcol_55` IS NULL AND NOT `bfcol_56` IS NULL AND NOT `bfcol_57` IS NULL GROUP BY `bfcol_55`, `bfcol_56`, diff --git a/packages/bigframes/tests/unit/core/compile/sqlglot/tpch/snapshots/test_tpch/test_tpch_query/17/out.sql b/packages/bigframes/tests/unit/core/compile/sqlglot/tpch/snapshots/test_tpch/test_tpch_query/17/out.sql index b9816ff0bf7b..ce7760b7affe 100644 --- a/packages/bigframes/tests/unit/core/compile/sqlglot/tpch/snapshots/test_tpch/test_tpch_query/17/out.sql +++ b/packages/bigframes/tests/unit/core/compile/sqlglot/tpch/snapshots/test_tpch/test_tpch_query/17/out.sql @@ -30,18 +30,14 @@ WITH `bfcte_0` AS ( `bfcol_15` AS `bfcol_18` FROM `bfcte_2` RIGHT JOIN `bfcte_1` - ON COALESCE(`bfcol_3`, 0) = COALESCE(`bfcol_15`, 0) - AND COALESCE(`bfcol_3`, 1) = COALESCE(`bfcol_15`, 1) + ON `bfcol_3` = `bfcol_15` ), `bfcte_5` AS ( SELECT `bfcol_15`, AVG(`bfcol_7`) AS `bfcol_21` FROM `bfcte_3` RIGHT JOIN `bfcte_1` - ON COALESCE(`bfcol_6`, 0) = COALESCE(`bfcol_15`, 0) - AND COALESCE(`bfcol_6`, 1) = COALESCE(`bfcol_15`, 1) - WHERE - NOT `bfcol_15` IS NULL + ON `bfcol_6` = `bfcol_15` GROUP BY `bfcol_15` ), `bfcte_6` AS ( @@ -89,8 +85,6 @@ WITH `bfcte_0` AS ( `bfcol_9`, ANY_VALUE(`bfcol_41`) AS `bfcol_45` FROM `bfcte_10` - WHERE - NOT `bfcol_8` IS NULL AND NOT `bfcol_9` IS NULL GROUP BY `bfcol_8`, `bfcol_9` diff --git a/packages/bigframes/tests/unit/core/compile/sqlglot/tpch/snapshots/test_tpch/test_tpch_query/18/out.sql b/packages/bigframes/tests/unit/core/compile/sqlglot/tpch/snapshots/test_tpch/test_tpch_query/18/out.sql index b5720bc932a5..5ad93fbb5584 100644 --- a/packages/bigframes/tests/unit/core/compile/sqlglot/tpch/snapshots/test_tpch/test_tpch_query/18/out.sql +++ b/packages/bigframes/tests/unit/core/compile/sqlglot/tpch/snapshots/test_tpch/test_tpch_query/18/out.sql @@ -20,8 +20,6 @@ WITH `bfcte_0` AS ( `bfcol_2`, COALESCE(SUM(`bfcol_3`), 0) AS `bfcol_8` FROM `bfcte_1` - WHERE - NOT `bfcol_2` IS NULL GROUP BY `bfcol_2` ), `bfcte_4` AS ( @@ -46,13 +44,11 @@ WITH `bfcte_0` AS ( ), `bfcte_7` AS ( SELECT *, - STRUCT(COALESCE(`bfcol_4`, 0) AS `bfpart1`, COALESCE(`bfcol_4`, 1) AS `bfpart2`) IN ( - ( + `bfcol_4` IN (( SELECT - STRUCT(COALESCE(`bfcol_13`, 0) AS `bfpart1`, COALESCE(`bfcol_13`, 1) AS `bfpart2`) + * FROM `bfcte_6` - ) - ) AS `bfcol_14` + )) AS `bfcol_14` FROM `bfcte_2` ), `bfcte_8` AS ( SELECT @@ -72,8 +68,7 @@ WITH `bfcte_0` AS ( `bfcol_3` AS `bfcol_28` FROM `bfcte_8` INNER JOIN `bfcte_1` - ON COALESCE(`bfcol_20`, 0) = COALESCE(`bfcol_2`, 0) - AND COALESCE(`bfcol_20`, 1) = COALESCE(`bfcol_2`, 1) + ON `bfcol_20` = `bfcol_2` ), `bfcte_10` AS ( SELECT `bfcol_1`, @@ -84,14 +79,7 @@ WITH `bfcte_0` AS ( COALESCE(SUM(`bfcol_28`), 0) AS `bfcol_35` FROM `bfcte_9` INNER JOIN `bfcte_0` - ON COALESCE(`bfcol_25`, 0) = COALESCE(`bfcol_0`, 0) - AND COALESCE(`bfcol_25`, 1) = COALESCE(`bfcol_0`, 1) - WHERE - NOT `bfcol_1` IS NULL - AND NOT `bfcol_0` IS NULL - AND NOT `bfcol_24` IS NULL - AND NOT `bfcol_27` IS NULL - AND NOT `bfcol_26` IS NULL + ON `bfcol_25` = `bfcol_0` GROUP BY `bfcol_1`, `bfcol_0`, diff --git a/packages/bigframes/tests/unit/core/compile/sqlglot/tpch/snapshots/test_tpch/test_tpch_query/19/out.sql b/packages/bigframes/tests/unit/core/compile/sqlglot/tpch/snapshots/test_tpch/test_tpch_query/19/out.sql index 9672739d6457..e083e7686c70 100644 --- a/packages/bigframes/tests/unit/core/compile/sqlglot/tpch/snapshots/test_tpch/test_tpch_query/19/out.sql +++ b/packages/bigframes/tests/unit/core/compile/sqlglot/tpch/snapshots/test_tpch/test_tpch_query/19/out.sql @@ -124,8 +124,7 @@ WITH `bfcte_0` AS ( ) AS `bfcol_31` FROM `bfcte_2` INNER JOIN `bfcte_1` - ON COALESCE(`bfcol_7`, 0) = COALESCE(`bfcol_1`, 0) - AND COALESCE(`bfcol_7`, 1) = COALESCE(`bfcol_1`, 1) + ON `bfcol_7` = `bfcol_1` WHERE ( COALESCE(COALESCE(`bfcol_6` IN ('AIR', 'AIR REG'), FALSE), FALSE) diff --git a/packages/bigframes/tests/unit/core/compile/sqlglot/tpch/snapshots/test_tpch/test_tpch_query/2/out.sql b/packages/bigframes/tests/unit/core/compile/sqlglot/tpch/snapshots/test_tpch/test_tpch_query/2/out.sql index 9130dc95fcee..3e845e8ff6df 100644 --- a/packages/bigframes/tests/unit/core/compile/sqlglot/tpch/snapshots/test_tpch/test_tpch_query/2/out.sql +++ b/packages/bigframes/tests/unit/core/compile/sqlglot/tpch/snapshots/test_tpch/test_tpch_query/2/out.sql @@ -58,8 +58,7 @@ WITH `bfcte_0` AS ( `bfcol_14` AS `bfcol_31` FROM `bfcte_6` INNER JOIN `bfcte_5` - ON COALESCE(`bfcol_15`, 0) = COALESCE(`bfcol_12`, 0) - AND COALESCE(`bfcol_15`, 1) = COALESCE(`bfcol_12`, 1) + ON `bfcol_15` = `bfcol_12` ), `bfcte_9` AS ( SELECT `bfcol_23` AS `bfcol_32`, @@ -69,8 +68,7 @@ WITH `bfcte_0` AS ( `bfcol_14` AS `bfcol_36` FROM `bfcte_7` INNER JOIN `bfcte_5` - ON COALESCE(`bfcol_23`, 0) = COALESCE(`bfcol_12`, 0) - AND COALESCE(`bfcol_23`, 1) = COALESCE(`bfcol_12`, 1) + ON `bfcol_23` = `bfcol_12` ), `bfcte_10` AS ( SELECT `bfcol_26` AS `bfcol_37`, @@ -86,8 +84,7 @@ WITH `bfcte_0` AS ( `bfcol_11` AS `bfcol_47` FROM `bfcte_8` INNER JOIN `bfcte_3` - ON COALESCE(`bfcol_30`, 0) = COALESCE(`bfcol_5`, 0) - AND COALESCE(`bfcol_30`, 1) = COALESCE(`bfcol_5`, 1) + ON `bfcol_30` = `bfcol_5` ), `bfcte_11` AS ( SELECT `bfcol_32` AS `bfcol_48`, @@ -97,8 +94,7 @@ WITH `bfcte_0` AS ( `bfcol_22` AS `bfcol_52` FROM `bfcte_9` INNER JOIN `bfcte_4` - ON COALESCE(`bfcol_35`, 0) = COALESCE(`bfcol_21`, 0) - AND COALESCE(`bfcol_35`, 1) = COALESCE(`bfcol_21`, 1) + ON `bfcol_35` = `bfcol_21` ), `bfcte_12` AS ( SELECT `bfcol_37` AS `bfcol_53`, @@ -115,8 +111,7 @@ WITH `bfcte_0` AS ( `bfcol_4` AS `bfcol_64` FROM `bfcte_10` INNER JOIN `bfcte_1` - ON COALESCE(`bfcol_44`, 0) = COALESCE(`bfcol_2`, 0) - AND COALESCE(`bfcol_44`, 1) = COALESCE(`bfcol_2`, 1) + ON `bfcol_44` = `bfcol_2` ), `bfcte_13` AS ( SELECT `bfcol_48` AS `bfcol_65`, @@ -126,8 +121,7 @@ WITH `bfcte_0` AS ( `bfcol_20` AS `bfcol_69` FROM `bfcte_11` INNER JOIN `bfcte_2` - ON COALESCE(`bfcol_52`, 0) = COALESCE(`bfcol_19`, 0) - AND COALESCE(`bfcol_52`, 1) = COALESCE(`bfcol_19`, 1) + ON `bfcol_52` = `bfcol_19` ), `bfcte_14` AS ( SELECT `bfcol_53` AS `bfcol_205`, @@ -141,8 +135,7 @@ WITH `bfcte_0` AS ( `bfcol_63` AS `bfcol_213` FROM `bfcte_12` INNER JOIN `bfcte_0` - ON COALESCE(`bfcol_64`, 0) = COALESCE(`bfcol_0`, 0) - AND COALESCE(`bfcol_64`, 1) = COALESCE(`bfcol_0`, 1) + ON `bfcol_64` = `bfcol_0` WHERE `bfcol_56` = 15 AND ENDS_WITH(`bfcol_55`, 'BRASS') AND `bfcol_1` = 'EUROPE' ), `bfcte_15` AS ( @@ -168,8 +161,7 @@ WITH `bfcte_0` AS ( `bfcol_1` = 'EUROPE' AS `bfcol_191` FROM `bfcte_13` INNER JOIN `bfcte_0` - ON COALESCE(`bfcol_69`, 0) = COALESCE(`bfcol_0`, 0) - AND COALESCE(`bfcol_69`, 1) = COALESCE(`bfcol_0`, 1) + ON `bfcol_69` = `bfcol_0` WHERE `bfcol_67` = 15 AND ENDS_WITH(`bfcol_66`, 'BRASS') AND `bfcol_1` = 'EUROPE' ), `bfcte_16` AS ( @@ -177,8 +169,6 @@ WITH `bfcte_0` AS ( `bfcol_189`, MIN(`bfcol_190`) AS `bfcol_216` FROM `bfcte_15` - WHERE - NOT `bfcol_189` IS NULL GROUP BY `bfcol_189` ), `bfcte_17` AS ( @@ -198,10 +188,7 @@ SELECT `bfcol_212` AS `S_COMMENT` FROM `bfcte_17` INNER JOIN `bfcte_14` - ON COALESCE(`bfcol_214`, 0) = COALESCE(`bfcol_205`, 0) - AND COALESCE(`bfcol_214`, 1) = COALESCE(`bfcol_205`, 1) - AND IF(IS_NAN(`bfcol_216`), 2.0, COALESCE(`bfcol_216`, 0.0)) = IF(IS_NAN(`bfcol_207`), 2.0, COALESCE(`bfcol_207`, 0.0)) - AND IF(IS_NAN(`bfcol_216`), 3, COALESCE(`bfcol_216`, 1.0)) = IF(IS_NAN(`bfcol_207`), 3, COALESCE(`bfcol_207`, 1.0)) + ON `bfcol_214` = `bfcol_205` AND `bfcol_216` = `bfcol_207` ORDER BY `bfcol_211` DESC, `bfcol_213` ASC NULLS LAST, diff --git a/packages/bigframes/tests/unit/core/compile/sqlglot/tpch/snapshots/test_tpch/test_tpch_query/20/out.sql b/packages/bigframes/tests/unit/core/compile/sqlglot/tpch/snapshots/test_tpch/test_tpch_query/20/out.sql index 8c9cd9bb763b..36846b698956 100644 --- a/packages/bigframes/tests/unit/core/compile/sqlglot/tpch/snapshots/test_tpch/test_tpch_query/20/out.sql +++ b/packages/bigframes/tests/unit/core/compile/sqlglot/tpch/snapshots/test_tpch/test_tpch_query/20/out.sql @@ -61,8 +61,6 @@ WITH `bfcte_0` AS ( `bfcol_18`, COALESCE(SUM(`bfcol_19`), 0) AS `bfcol_36` FROM `bfcte_2` - WHERE - NOT `bfcol_17` IS NULL AND NOT `bfcol_18` IS NULL GROUP BY `bfcol_17`, `bfcol_18` @@ -73,8 +71,7 @@ WITH `bfcte_0` AS ( `bfcol_13` AS `bfcol_43` FROM `bfcte_4` INNER JOIN `bfcte_3` - ON COALESCE(`bfcol_14`, 0) = COALESCE(`bfcol_35`, 0) - AND COALESCE(`bfcol_14`, 1) = COALESCE(`bfcol_35`, 1) + ON `bfcol_14` = `bfcol_35` ), `bfcte_8` AS ( SELECT `bfcol_15` AS `bfcol_31` @@ -88,13 +85,11 @@ WITH `bfcte_0` AS ( ), `bfcte_10` AS ( SELECT *, - STRUCT(COALESCE(`bfcol_2`, 0) AS `bfpart1`, COALESCE(`bfcol_2`, 1) AS `bfpart2`) IN ( - ( + `bfcol_2` IN (( SELECT - STRUCT(COALESCE(`bfcol_31`, 0) AS `bfpart1`, COALESCE(`bfcol_31`, 1) AS `bfpart2`) + * FROM `bfcte_8` - ) - ) AS `bfcol_37` + )) AS `bfcol_37` FROM `bfcte_1` ), `bfcte_11` AS ( SELECT @@ -132,13 +127,11 @@ WITH `bfcte_0` AS ( ), `bfcte_15` AS ( SELECT *, - STRUCT(COALESCE(`bfcol_41`, 0) AS `bfpart1`, COALESCE(`bfcol_41`, 1) AS `bfpart2`) IN ( - ( + `bfcol_41` IN (( SELECT - STRUCT(COALESCE(`bfcol_61`, 0) AS `bfpart1`, COALESCE(`bfcol_61`, 1) AS `bfpart2`) + * FROM `bfcte_14` - ) - ) AS `bfcol_62` + )) AS `bfcol_62` FROM `bfcte_7` ) SELECT diff --git a/packages/bigframes/tests/unit/core/compile/sqlglot/tpch/snapshots/test_tpch/test_tpch_query/21/out.sql b/packages/bigframes/tests/unit/core/compile/sqlglot/tpch/snapshots/test_tpch/test_tpch_query/21/out.sql index 93a44e529d94..034de44a1201 100644 --- a/packages/bigframes/tests/unit/core/compile/sqlglot/tpch/snapshots/test_tpch/test_tpch_query/21/out.sql +++ b/packages/bigframes/tests/unit/core/compile/sqlglot/tpch/snapshots/test_tpch/test_tpch_query/21/out.sql @@ -36,8 +36,6 @@ WITH `bfcte_0` AS ( `L_ORDERKEY`, COUNT(1) AS `bfcol_18` FROM `bfcte_4` - WHERE - NOT `L_ORDERKEY` IS NULL GROUP BY `L_ORDERKEY` ), `bfcte_7` AS ( @@ -83,8 +81,7 @@ WITH `bfcte_0` AS ( `bfcol_6` AS `bfcol_44` FROM `bfcte_11` INNER JOIN `bfcte_2` - ON COALESCE(`bfcol_40`, 0) = COALESCE(`bfcol_4`, 0) - AND COALESCE(`bfcol_40`, 1) = COALESCE(`bfcol_4`, 1) + ON `bfcol_40` = `bfcol_4` ), `bfcte_13` AS ( SELECT `bfcol_41` AS `bfcol_45`, @@ -93,8 +90,7 @@ WITH `bfcte_0` AS ( `bfcol_3` AS `bfcol_48` FROM `bfcte_12` INNER JOIN `bfcte_1` - ON COALESCE(`bfcol_44`, 0) = COALESCE(`bfcol_2`, 0) - AND COALESCE(`bfcol_44`, 1) = COALESCE(`bfcol_2`, 1) + ON `bfcol_44` = `bfcol_2` ), `bfcte_14` AS ( SELECT `bfcol_45`, @@ -133,8 +129,6 @@ WITH `bfcte_0` AS ( `bfcol_53`, COUNT(1) AS `bfcol_58` FROM `bfcte_14` - WHERE - NOT `bfcol_53` IS NULL GROUP BY `bfcol_53` ) diff --git a/packages/bigframes/tests/unit/core/compile/sqlglot/tpch/snapshots/test_tpch/test_tpch_query/22/out.sql b/packages/bigframes/tests/unit/core/compile/sqlglot/tpch/snapshots/test_tpch/test_tpch_query/22/out.sql index 87ca2d8d5e01..ceb7d812f176 100644 --- a/packages/bigframes/tests/unit/core/compile/sqlglot/tpch/snapshots/test_tpch/test_tpch_query/22/out.sql +++ b/packages/bigframes/tests/unit/core/compile/sqlglot/tpch/snapshots/test_tpch/test_tpch_query/22/out.sql @@ -73,8 +73,6 @@ WITH `bfcte_0` AS ( `bfcol_4`, ANY_VALUE(`bfcol_47`) AS `bfcol_51` FROM `bfcte_8` - WHERE - NOT `bfcol_3` IS NULL AND NOT `bfcol_4` IS NULL GROUP BY `bfcol_3`, `bfcol_4` @@ -94,13 +92,11 @@ WITH `bfcte_0` AS ( ), `bfcte_12` AS ( SELECT *, - STRUCT(COALESCE(`bfcol_61`, 0) AS `bfpart1`, COALESCE(`bfcol_61`, 1) AS `bfpart2`) IN ( - ( + `bfcol_61` IN (( SELECT - STRUCT(COALESCE(`bfcol_0`, 0) AS `bfpart1`, COALESCE(`bfcol_0`, 1) AS `bfpart2`) + * FROM `bfcte_6` - ) - ) AS `bfcol_64` + )) AS `bfcol_64` FROM `bfcte_11` ), `bfcte_13` AS ( SELECT diff --git a/packages/bigframes/tests/unit/core/compile/sqlglot/tpch/snapshots/test_tpch/test_tpch_query/3/out.sql b/packages/bigframes/tests/unit/core/compile/sqlglot/tpch/snapshots/test_tpch/test_tpch_query/3/out.sql index 0d1365d76d14..68a5e4a3ab25 100644 --- a/packages/bigframes/tests/unit/core/compile/sqlglot/tpch/snapshots/test_tpch/test_tpch_query/3/out.sql +++ b/packages/bigframes/tests/unit/core/compile/sqlglot/tpch/snapshots/test_tpch/test_tpch_query/3/out.sql @@ -31,8 +31,7 @@ WITH `bfcte_0` AS ( `bfcol_35` AS `bfcol_45` FROM `bfcte_1` INNER JOIN `bfcte_0` - ON COALESCE(`bfcol_36`, 0) = COALESCE(`bfcol_32`, 0) - AND COALESCE(`bfcol_36`, 1) = COALESCE(`bfcol_32`, 1) + ON `bfcol_36` = `bfcol_32` ), `bfcte_4` AS ( SELECT `bfcol_39`, @@ -50,8 +49,7 @@ WITH `bfcte_0` AS ( ) AS `bfcol_54` FROM `bfcte_2` INNER JOIN `bfcte_3` - ON COALESCE(`bfcol_39`, 0) = COALESCE(`bfcol_43`, 0) - AND COALESCE(`bfcol_39`, 1) = COALESCE(`bfcol_43`, 1) + ON `bfcol_39` = `bfcol_43` ), `bfcte_5` AS ( SELECT `bfcol_51`, @@ -59,8 +57,6 @@ WITH `bfcte_0` AS ( `bfcol_53`, COALESCE(SUM(`bfcol_54`), 0) AS `bfcol_59` FROM `bfcte_4` - WHERE - NOT `bfcol_51` IS NULL AND NOT `bfcol_52` IS NULL AND NOT `bfcol_53` IS NULL GROUP BY `bfcol_51`, `bfcol_52`, diff --git a/packages/bigframes/tests/unit/core/compile/sqlglot/tpch/snapshots/test_tpch/test_tpch_query/4/out.sql b/packages/bigframes/tests/unit/core/compile/sqlglot/tpch/snapshots/test_tpch/test_tpch_query/4/out.sql index 9eb0259be506..873daa861039 100644 --- a/packages/bigframes/tests/unit/core/compile/sqlglot/tpch/snapshots/test_tpch/test_tpch_query/4/out.sql +++ b/packages/bigframes/tests/unit/core/compile/sqlglot/tpch/snapshots/test_tpch/test_tpch_query/4/out.sql @@ -33,8 +33,7 @@ WITH `bfcte_0` AS ( `bfcol_4` < `bfcol_5` AS `bfcol_27` FROM `bfcte_1` INNER JOIN `bfcte_0` - ON COALESCE(`bfcol_3`, 0) = COALESCE(`bfcol_0`, 0) - AND COALESCE(`bfcol_3`, 1) = COALESCE(`bfcol_0`, 1) + ON `bfcol_3` = `bfcol_0` WHERE ( `bfcol_1` >= CAST('1993-07-01' AS DATE) @@ -49,8 +48,6 @@ WITH `bfcte_0` AS ( `bfcol_25`, COUNT(1) AS `bfcol_33` FROM `bfcte_2` - WHERE - NOT `bfcol_26` IS NULL AND NOT `bfcol_25` IS NULL GROUP BY `bfcol_26`, `bfcol_25` diff --git a/packages/bigframes/tests/unit/core/compile/sqlglot/tpch/snapshots/test_tpch/test_tpch_query/5/out.sql b/packages/bigframes/tests/unit/core/compile/sqlglot/tpch/snapshots/test_tpch/test_tpch_query/5/out.sql index 34974b36d8f4..184aad6bced0 100644 --- a/packages/bigframes/tests/unit/core/compile/sqlglot/tpch/snapshots/test_tpch/test_tpch_query/5/out.sql +++ b/packages/bigframes/tests/unit/core/compile/sqlglot/tpch/snapshots/test_tpch/test_tpch_query/5/out.sql @@ -46,8 +46,7 @@ WITH `bfcte_0` AS ( `bfcol_5` AS `bfcol_36` FROM `bfcte_3` INNER JOIN `bfcte_2` - ON COALESCE(`bfcol_32`, 0) = COALESCE(`bfcol_6`, 0) - AND COALESCE(`bfcol_32`, 1) = COALESCE(`bfcol_6`, 1) + ON `bfcol_32` = `bfcol_6` ), `bfcte_7` AS ( SELECT `bfcol_35` AS `bfcol_37`, @@ -55,8 +54,7 @@ WITH `bfcte_0` AS ( `bfcol_2` AS `bfcol_39` FROM `bfcte_6` INNER JOIN `bfcte_1` - ON COALESCE(`bfcol_35`, 0) = COALESCE(`bfcol_3`, 0) - AND COALESCE(`bfcol_35`, 1) = COALESCE(`bfcol_3`, 1) + ON `bfcol_35` = `bfcol_3` ), `bfcte_8` AS ( SELECT `bfcol_33` AS `bfcol_40`, @@ -64,8 +62,7 @@ WITH `bfcte_0` AS ( `bfcol_38` AS `bfcol_42` FROM `bfcte_4` INNER JOIN `bfcte_7` - ON COALESCE(`bfcol_34`, 0) = COALESCE(`bfcol_39`, 0) - AND COALESCE(`bfcol_34`, 1) = COALESCE(`bfcol_39`, 1) + ON `bfcol_34` = `bfcol_39` ), `bfcte_9` AS ( SELECT `bfcol_30` AS `bfcol_43`, @@ -74,20 +71,14 @@ WITH `bfcte_0` AS ( `bfcol_42` AS `bfcol_46` FROM `bfcte_5` INNER JOIN `bfcte_8` - ON COALESCE(`bfcol_29`, 0) = COALESCE(`bfcol_40`, 0) - AND COALESCE(`bfcol_29`, 1) = COALESCE(`bfcol_40`, 1) + ON `bfcol_29` = `bfcol_40` ), `bfcte_10` AS ( SELECT `bfcol_46`, COALESCE(SUM(`bfcol_44`), 0) AS `bfcol_49` FROM `bfcte_9` INNER JOIN `bfcte_0` - ON COALESCE(`bfcol_43`, 0) = COALESCE(`bfcol_0`, 0) - AND COALESCE(`bfcol_43`, 1) = COALESCE(`bfcol_0`, 1) - AND COALESCE(`bfcol_45`, 0) = COALESCE(`bfcol_1`, 0) - AND COALESCE(`bfcol_45`, 1) = COALESCE(`bfcol_1`, 1) - WHERE - NOT `bfcol_46` IS NULL + ON `bfcol_43` = `bfcol_0` AND `bfcol_45` = `bfcol_1` GROUP BY `bfcol_46` ) diff --git a/packages/bigframes/tests/unit/core/compile/sqlglot/tpch/snapshots/test_tpch/test_tpch_query/7/out.sql b/packages/bigframes/tests/unit/core/compile/sqlglot/tpch/snapshots/test_tpch/test_tpch_query/7/out.sql index 3d82a905e8bc..b6616d8c85cf 100644 --- a/packages/bigframes/tests/unit/core/compile/sqlglot/tpch/snapshots/test_tpch/test_tpch_query/7/out.sql +++ b/packages/bigframes/tests/unit/core/compile/sqlglot/tpch/snapshots/test_tpch/test_tpch_query/7/out.sql @@ -52,16 +52,14 @@ WITH `bfcte_0` AS ( `bfcol_39` AS `bfcol_41` FROM `bfcte_4` INNER JOIN `bfcte_6` - ON COALESCE(`bfcol_12`, 0) = COALESCE(`bfcol_38`, 0) - AND COALESCE(`bfcol_12`, 1) = COALESCE(`bfcol_38`, 1) + ON `bfcol_12` = `bfcol_38` ), `bfcte_8` AS ( SELECT `bfcol_41` AS `bfcol_42`, `bfcol_9` AS `bfcol_43` FROM `bfcte_7` INNER JOIN `bfcte_3` - ON COALESCE(`bfcol_40`, 0) = COALESCE(`bfcol_10`, 0) - AND COALESCE(`bfcol_40`, 1) = COALESCE(`bfcol_10`, 1) + ON `bfcol_40` = `bfcol_10` ), `bfcte_9` AS ( SELECT `bfcol_42` AS `bfcol_44`, @@ -71,8 +69,7 @@ WITH `bfcte_0` AS ( `bfcol_35` AS `bfcol_48` FROM `bfcte_8` INNER JOIN `bfcte_2` - ON COALESCE(`bfcol_43`, 0) = COALESCE(`bfcol_31`, 0) - AND COALESCE(`bfcol_43`, 1) = COALESCE(`bfcol_31`, 1) + ON `bfcol_43` = `bfcol_31` ), `bfcte_10` AS ( SELECT `bfcol_44` AS `bfcol_49`, @@ -82,8 +79,7 @@ WITH `bfcte_0` AS ( `bfcol_3` AS `bfcol_53` FROM `bfcte_9` INNER JOIN `bfcte_1` - ON COALESCE(`bfcol_45`, 0) = COALESCE(`bfcol_2`, 0) - AND COALESCE(`bfcol_45`, 1) = COALESCE(`bfcol_2`, 1) + ON `bfcol_45` = `bfcol_2` ), `bfcte_11` AS ( SELECT `bfcol_49`, @@ -113,8 +109,7 @@ WITH `bfcte_0` AS ( EXTRACT(YEAR FROM `bfcol_52`) AS `bfcol_87` FROM `bfcte_10` INNER JOIN `bfcte_5` - ON COALESCE(`bfcol_53`, 0) = COALESCE(`bfcol_36`, 0) - AND COALESCE(`bfcol_53`, 1) = COALESCE(`bfcol_36`, 1) + ON `bfcol_53` = `bfcol_36` WHERE `bfcol_49` <> `bfcol_37` ), `bfcte_12` AS ( @@ -125,7 +120,7 @@ WITH `bfcte_0` AS ( COALESCE(SUM(`bfcol_86`), 0) AS `bfcol_92` FROM `bfcte_11` WHERE - NOT `bfcol_85` IS NULL AND NOT `bfcol_84` IS NULL AND NOT `bfcol_87` IS NULL + NOT `bfcol_87` IS NULL GROUP BY `bfcol_85`, `bfcol_84`, diff --git a/packages/bigframes/tests/unit/core/compile/sqlglot/tpch/snapshots/test_tpch/test_tpch_query/8/out.sql b/packages/bigframes/tests/unit/core/compile/sqlglot/tpch/snapshots/test_tpch/test_tpch_query/8/out.sql index b2fa2971cafd..eca05d65e79a 100644 --- a/packages/bigframes/tests/unit/core/compile/sqlglot/tpch/snapshots/test_tpch/test_tpch_query/8/out.sql +++ b/packages/bigframes/tests/unit/core/compile/sqlglot/tpch/snapshots/test_tpch/test_tpch_query/8/out.sql @@ -51,8 +51,7 @@ WITH `bfcte_0` AS ( `bfcol_17` AS `bfcol_24` FROM `bfcte_7` INNER JOIN `bfcte_6` - ON COALESCE(`bfcol_18`, 0) = COALESCE(`bfcol_14`, 0) - AND COALESCE(`bfcol_18`, 1) = COALESCE(`bfcol_14`, 1) + ON `bfcol_18` = `bfcol_14` ), `bfcte_9` AS ( SELECT `bfcol_20` AS `bfcol_25`, @@ -62,8 +61,7 @@ WITH `bfcte_0` AS ( `bfcol_12` AS `bfcol_29` FROM `bfcte_8` INNER JOIN `bfcte_5` - ON COALESCE(`bfcol_22`, 0) = COALESCE(`bfcol_11`, 0) - AND COALESCE(`bfcol_22`, 1) = COALESCE(`bfcol_11`, 1) + ON `bfcol_22` = `bfcol_11` ), `bfcte_10` AS ( SELECT `bfcol_25` AS `bfcol_30`, @@ -74,8 +72,7 @@ WITH `bfcte_0` AS ( `bfcol_10` AS `bfcol_35` FROM `bfcte_9` INNER JOIN `bfcte_4` - ON COALESCE(`bfcol_26`, 0) = COALESCE(`bfcol_8`, 0) - AND COALESCE(`bfcol_26`, 1) = COALESCE(`bfcol_8`, 1) + ON `bfcol_26` = `bfcol_8` ), `bfcte_11` AS ( SELECT `bfcol_30` AS `bfcol_36`, @@ -86,8 +83,7 @@ WITH `bfcte_0` AS ( `bfcol_7` AS `bfcol_41` FROM `bfcte_10` INNER JOIN `bfcte_3` - ON COALESCE(`bfcol_34`, 0) = COALESCE(`bfcol_6`, 0) - AND COALESCE(`bfcol_34`, 1) = COALESCE(`bfcol_6`, 1) + ON `bfcol_34` = `bfcol_6` ), `bfcte_12` AS ( SELECT `bfcol_36` AS `bfcol_42`, @@ -98,8 +94,7 @@ WITH `bfcte_0` AS ( `bfcol_5` AS `bfcol_47` FROM `bfcte_11` INNER JOIN `bfcte_1` - ON COALESCE(`bfcol_41`, 0) = COALESCE(`bfcol_4`, 0) - AND COALESCE(`bfcol_41`, 1) = COALESCE(`bfcol_4`, 1) + ON `bfcol_41` = `bfcol_4` ), `bfcte_13` AS ( SELECT `bfcol_42` AS `bfcol_66`, @@ -109,8 +104,7 @@ WITH `bfcte_0` AS ( `bfcol_46` AS `bfcol_70` FROM `bfcte_12` INNER JOIN `bfcte_2` - ON COALESCE(`bfcol_47`, 0) = COALESCE(`bfcol_2`, 0) - AND COALESCE(`bfcol_47`, 1) = COALESCE(`bfcol_2`, 1) + ON `bfcol_47` = `bfcol_2` WHERE `bfcol_3` = 'AMERICA' ), `bfcte_14` AS ( @@ -163,8 +157,7 @@ WITH `bfcte_0` AS ( ) AS `bfcol_129` FROM `bfcte_13` INNER JOIN `bfcte_0` - ON COALESCE(`bfcol_69`, 0) = COALESCE(`bfcol_0`, 0) - AND COALESCE(`bfcol_69`, 1) = COALESCE(`bfcol_0`, 1) + ON `bfcol_69` = `bfcol_0` WHERE ( `bfcol_70` >= CAST('1995-01-01' AS DATE) diff --git a/packages/bigframes/tests/unit/core/compile/sqlglot/tpch/snapshots/test_tpch/test_tpch_query/9/out.sql b/packages/bigframes/tests/unit/core/compile/sqlglot/tpch/snapshots/test_tpch/test_tpch_query/9/out.sql index 7f886aa7ce56..e33b61c55086 100644 --- a/packages/bigframes/tests/unit/core/compile/sqlglot/tpch/snapshots/test_tpch/test_tpch_query/9/out.sql +++ b/packages/bigframes/tests/unit/core/compile/sqlglot/tpch/snapshots/test_tpch/test_tpch_query/9/out.sql @@ -44,8 +44,7 @@ WITH `bfcte_0` AS ( `bfcol_14` AS `bfcol_23` FROM `bfcte_5` INNER JOIN `bfcte_4` - ON COALESCE(`bfcol_15`, 0) = COALESCE(`bfcol_10`, 0) - AND COALESCE(`bfcol_15`, 1) = COALESCE(`bfcol_10`, 1) + ON `bfcol_15` = `bfcol_10` ), `bfcte_7` AS ( SELECT `bfcol_17` AS `bfcol_24`, @@ -57,10 +56,7 @@ WITH `bfcte_0` AS ( `bfcol_8` AS `bfcol_30` FROM `bfcte_6` INNER JOIN `bfcte_3` - ON COALESCE(`bfcol_20`, 0) = COALESCE(`bfcol_7`, 0) - AND COALESCE(`bfcol_20`, 1) = COALESCE(`bfcol_7`, 1) - AND COALESCE(`bfcol_19`, 0) = COALESCE(`bfcol_6`, 0) - AND COALESCE(`bfcol_19`, 1) = COALESCE(`bfcol_6`, 1) + ON `bfcol_20` = `bfcol_7` AND `bfcol_19` = `bfcol_6` ), `bfcte_8` AS ( SELECT `bfcol_24` AS `bfcol_31`, @@ -72,8 +68,7 @@ WITH `bfcte_0` AS ( `bfcol_5` AS `bfcol_37` FROM `bfcte_7` INNER JOIN `bfcte_2` - ON COALESCE(`bfcol_26`, 0) = COALESCE(`bfcol_4`, 0) - AND COALESCE(`bfcol_26`, 1) = COALESCE(`bfcol_4`, 1) + ON `bfcol_26` = `bfcol_4` ), `bfcte_9` AS ( SELECT `bfcol_31` AS `bfcol_38`, @@ -85,8 +80,7 @@ WITH `bfcte_0` AS ( `bfcol_3` AS `bfcol_44` FROM `bfcte_8` INNER JOIN `bfcte_1` - ON COALESCE(`bfcol_32`, 0) = COALESCE(`bfcol_2`, 0) - AND COALESCE(`bfcol_32`, 1) = COALESCE(`bfcol_2`, 1) + ON `bfcol_32` = `bfcol_2` ), `bfcte_10` AS ( SELECT `bfcol_38`, @@ -122,8 +116,7 @@ WITH `bfcte_0` AS ( ) AS `bfcol_86` FROM `bfcte_9` INNER JOIN `bfcte_0` - ON COALESCE(`bfcol_43`, 0) = COALESCE(`bfcol_0`, 0) - AND COALESCE(`bfcol_43`, 1) = COALESCE(`bfcol_0`, 1) + ON `bfcol_43` = `bfcol_0` WHERE REGEXP_CONTAINS(`bfcol_38`, 'green') ), `bfcte_11` AS ( @@ -133,7 +126,7 @@ WITH `bfcte_0` AS ( COALESCE(SUM(`bfcol_86`), 0) AS `bfcol_90` FROM `bfcte_10` WHERE - NOT `bfcol_84` IS NULL AND NOT `bfcol_85` IS NULL + NOT `bfcol_85` IS NULL GROUP BY `bfcol_84`, `bfcol_85` diff --git a/packages/bigframes/tests/unit/core/rewrite/test_identifiers.py b/packages/bigframes/tests/unit/core/rewrite/test_identifiers.py index 3f27315a0fd3..4d4609bb0fa8 100644 --- a/packages/bigframes/tests/unit/core/rewrite/test_identifiers.py +++ b/packages/bigframes/tests/unit/core/rewrite/test_identifiers.py @@ -134,6 +134,7 @@ def test_remap_variables_nested_join_stability(leaf, fake_session, table): ), type="inner", propogate_order=False, + nulls_equal=True, ) outer_join = nodes.JoinNode( left_child=inner_join, @@ -146,6 +147,7 @@ def test_remap_variables_nested_join_stability(leaf, fake_session, table): ), type="inner", propogate_order=False, + nulls_equal=True, ) # Run remap_variables twice and assert stability