From 8db392e3849feb38fa1cfe41a51c6c0e5840ed5e Mon Sep 17 00:00:00 2001
From: Chelsea Lin <chelsealin@google.com>
Date: Wed, 11 Feb 2026 18:47:54 +0000
Subject: [PATCH 1/2] refactor: fix pull_up_select disorder the columns of
 readtable nodes

---
 bigframes/core/rewrite/select_pullup.py                  | 9 ++++-----
 .../snapshots/test_binary_compiler/test_corr/out.sql     | 4 ++--
 .../snapshots/test_binary_compiler/test_cov/out.sql      | 4 ++--
 .../snapshots/test_nullary_compiler/test_size/out.sql    | 6 +++---
 .../snapshots/test_unary_compiler/test_mean/out.sql      | 2 +-
 .../snapshots/test_unary_compiler/test_std/out.sql       | 2 +-
 .../test_compile_explode_dataframe/out.sql               | 2 +-
 .../test_compile_explode_series/out.sql                  | 4 ++--
 8 files changed, 16 insertions(+), 17 deletions(-)

diff --git a/bigframes/core/rewrite/select_pullup.py b/bigframes/core/rewrite/select_pullup.py
index 415182f8840..a15aba7663f 100644
--- a/bigframes/core/rewrite/select_pullup.py
+++ b/bigframes/core/rewrite/select_pullup.py
@@ -54,13 +54,12 @@ def pull_up_source_ids(node: nodes.ReadTableNode) -> nodes.BigFrameNode:
     if all(id.sql == source_id for id, source_id in node.scan_list.items):
         return node
     else:
-        source_ids = sorted(
-            set(scan_item.source_id for scan_item in node.scan_list.items)
-        )
         new_scan_list = nodes.ScanList.from_items(
             [
-                nodes.ScanItem(identifiers.ColumnId(source_id), source_id)
-                for source_id in source_ids
+                nodes.ScanItem(
+                    identifiers.ColumnId(scan_item.source_id), scan_item.source_id
+                )
+                for scan_item in node.scan_list.items
             ]
         )
         new_source = dataclasses.replace(node, scan_list=new_scan_list)
diff --git a/tests/unit/core/compile/sqlglot/aggregations/snapshots/test_binary_compiler/test_corr/out.sql b/tests/unit/core/compile/sqlglot/aggregations/snapshots/test_binary_compiler/test_corr/out.sql
index 5c838f48827..08272882e6b 100644
--- a/tests/unit/core/compile/sqlglot/aggregations/snapshots/test_binary_compiler/test_corr/out.sql
+++ b/tests/unit/core/compile/sqlglot/aggregations/snapshots/test_binary_compiler/test_corr/out.sql
@@ -1,7 +1,7 @@
 WITH `bfcte_0` AS (
   SELECT
-    `float64_col`,
-    `int64_col`
+    `int64_col`,
+    `float64_col`
   FROM `bigframes-dev`.`sqlglot_test`.`scalar_types`
 ), `bfcte_1` AS (
   SELECT
diff --git a/tests/unit/core/compile/sqlglot/aggregations/snapshots/test_binary_compiler/test_cov/out.sql b/tests/unit/core/compile/sqlglot/aggregations/snapshots/test_binary_compiler/test_cov/out.sql
index eda082250a6..7f4463e3b8e 100644
--- a/tests/unit/core/compile/sqlglot/aggregations/snapshots/test_binary_compiler/test_cov/out.sql
+++ b/tests/unit/core/compile/sqlglot/aggregations/snapshots/test_binary_compiler/test_cov/out.sql
@@ -1,7 +1,7 @@
 WITH `bfcte_0` AS (
   SELECT
-    `float64_col`,
-    `int64_col`
+    `int64_col`,
+    `float64_col`
   FROM `bigframes-dev`.`sqlglot_test`.`scalar_types`
 ), `bfcte_1` AS (
   SELECT
diff --git a/tests/unit/core/compile/sqlglot/aggregations/snapshots/test_nullary_compiler/test_size/out.sql b/tests/unit/core/compile/sqlglot/aggregations/snapshots/test_nullary_compiler/test_size/out.sql
index ed8e0c7619d..d5f599b5da7 100644
--- a/tests/unit/core/compile/sqlglot/aggregations/snapshots/test_nullary_compiler/test_size/out.sql
+++ b/tests/unit/core/compile/sqlglot/aggregations/snapshots/test_nullary_compiler/test_size/out.sql
@@ -4,17 +4,17 @@ WITH `bfcte_0` AS (
     `bytes_col`,
     `date_col`,
     `datetime_col`,
-    `duration_col`,
-    `float64_col`,
     `geography_col`,
     `int64_col`,
     `int64_too`,
     `numeric_col`,
+    `float64_col`,
     `rowindex`,
     `rowindex_2`,
     `string_col`,
     `time_col`,
-    `timestamp_col`
+    `timestamp_col`,
+    `duration_col`
   FROM `bigframes-dev`.`sqlglot_test`.`scalar_types`
 ), `bfcte_1` AS (
   SELECT
diff --git a/tests/unit/core/compile/sqlglot/aggregations/snapshots/test_unary_compiler/test_mean/out.sql b/tests/unit/core/compile/sqlglot/aggregations/snapshots/test_unary_compiler/test_mean/out.sql
index 2f9d540776f..74319b646f2 100644
--- a/tests/unit/core/compile/sqlglot/aggregations/snapshots/test_unary_compiler/test_mean/out.sql
+++ b/tests/unit/core/compile/sqlglot/aggregations/snapshots/test_unary_compiler/test_mean/out.sql
@@ -1,8 +1,8 @@
 WITH `bfcte_0` AS (
   SELECT
     `bool_col`,
-    `duration_col`,
     `int64_col`,
+    `duration_col`,
     `int64_col` AS `bfcol_6`,
     `bool_col` AS `bfcol_7`,
     `duration_col` AS `bfcol_8`
diff --git a/tests/unit/core/compile/sqlglot/aggregations/snapshots/test_unary_compiler/test_std/out.sql b/tests/unit/core/compile/sqlglot/aggregations/snapshots/test_unary_compiler/test_std/out.sql
index bc744258913..c57abdba4b5 100644
--- a/tests/unit/core/compile/sqlglot/aggregations/snapshots/test_unary_compiler/test_std/out.sql
+++ b/tests/unit/core/compile/sqlglot/aggregations/snapshots/test_unary_compiler/test_std/out.sql
@@ -1,8 +1,8 @@
 WITH `bfcte_0` AS (
   SELECT
     `bool_col`,
-    `duration_col`,
     `int64_col`,
+    `duration_col`,
     `int64_col` AS `bfcol_6`,
     `bool_col` AS `bfcol_7`,
     `duration_col` AS `bfcol_8`
diff --git a/tests/unit/core/compile/sqlglot/snapshots/test_compile_explode/test_compile_explode_dataframe/out.sql b/tests/unit/core/compile/sqlglot/snapshots/test_compile_explode/test_compile_explode_dataframe/out.sql
index 5d9019439f2..4f05929e0c7 100644
--- a/tests/unit/core/compile/sqlglot/snapshots/test_compile_explode/test_compile_explode_dataframe/out.sql
+++ b/tests/unit/core/compile/sqlglot/snapshots/test_compile_explode/test_compile_explode_dataframe/out.sql
@@ -1,7 +1,7 @@
 WITH `bfcte_0` AS (
   SELECT
-    `int_list_col`,
     `rowindex`,
+    `int_list_col`,
     `string_list_col`
   FROM `bigframes-dev`.`sqlglot_test`.`repeated_types`
 ), `bfcte_1` AS (
diff --git a/tests/unit/core/compile/sqlglot/snapshots/test_compile_explode/test_compile_explode_series/out.sql b/tests/unit/core/compile/sqlglot/snapshots/test_compile_explode/test_compile_explode_series/out.sql
index 8ba4559da83..d5b42741d31 100644
--- a/tests/unit/core/compile/sqlglot/snapshots/test_compile_explode/test_compile_explode_series/out.sql
+++ b/tests/unit/core/compile/sqlglot/snapshots/test_compile_explode/test_compile_explode_series/out.sql
@@ -1,7 +1,7 @@
 WITH `bfcte_0` AS (
   SELECT
-    `int_list_col`,
-    `rowindex`
+    `rowindex`,
+    `int_list_col`
   FROM `bigframes-dev`.`sqlglot_test`.`repeated_types`
 ), `bfcte_1` AS (
   SELECT

From a62598e1673f1197ec4726ef7218b58294b965d4 Mon Sep 17 00:00:00 2001
From: Chelsea Lin <chelsealin@google.com>
Date: Wed, 11 Feb 2026 19:42:58 +0000
Subject: [PATCH 2/2] refactor: enable SELECT * optimizations in sqlglot
 compiler

---
 bigframes/core/compile/sqlglot/compiler.py    | 16 +++++++-----
 bigframes/core/compile/sqlglot/sqlglot_ir.py  | 25 +++++++++++--------
 bigframes/core/sql_nodes.py                   |  4 +++
 .../test_nullary_compiler/test_size/out.sql   | 16 +-----------
 .../out.sql                                   |  4 +--
 .../out.sql                                   |  3 +--
 .../out.sql                                   | 16 +-----------
 7 files changed, 32 insertions(+), 52 deletions(-)

diff --git a/bigframes/core/compile/sqlglot/compiler.py b/bigframes/core/compile/sqlglot/compiler.py
index 786c5a1ed1f..d74c1b38696 100644
--- a/bigframes/core/compile/sqlglot/compiler.py
+++ b/bigframes/core/compile/sqlglot/compiler.py
@@ -153,13 +153,17 @@ def compile_sql_select(node: sql_nodes.SqlSelectNode, child: ir.SQLGlotIR):
         for ordering in node.sorting
     )
 
-    projected_cols: tuple[tuple[str, sge.Expression], ...] = tuple(
-        (
-            cdef.id.sql,
-            expression_compiler.expression_compiler.compile_expression(cdef.expression),
+    projected_cols: tuple[tuple[str, sge.Expression], ...] = tuple()
+    if not node.is_star_selection:
+        projected_cols = tuple(
+            (
+                cdef.id.sql,
+                expression_compiler.expression_compiler.compile_expression(
+                    cdef.expression
+                ),
+            )
+            for cdef in node.selections
         )
-        for cdef in node.selections
-    )
 
     sge_predicates = tuple(
         expression_compiler.expression_compiler.compile_expression(expression)
diff --git a/bigframes/core/compile/sqlglot/sqlglot_ir.py b/bigframes/core/compile/sqlglot/sqlglot_ir.py
index d0bd32697c4..efe5e09aff2 100644
--- a/bigframes/core/compile/sqlglot/sqlglot_ir.py
+++ b/bigframes/core/compile/sqlglot/sqlglot_ir.py
@@ -150,7 +150,7 @@ def from_table(
         if sql_predicate:
             select_expr = sge.Select().select(sge.Star()).from_(table_expr)
             select_expr = select_expr.where(
-                sg.parse_one(sql_predicate, dialect="bigquery"), append=False
+                sg.parse_one(sql_predicate, dialect=cls.dialect), append=False
             )
             return cls(expr=select_expr, uid_gen=uid_gen)
 
@@ -172,16 +172,19 @@ def select(
         if len(sorting) > 0:
             new_expr = new_expr.order_by(*sorting)
 
-        to_select = [
-            sge.Alias(
-                this=expr,
-                alias=sge.to_identifier(id, quoted=self.quoted),
-            )
-            if expr.alias_or_name != id
-            else expr
-            for id, expr in selections
-        ]
-        new_expr = new_expr.select(*to_select, append=False)
+        if len(selections) > 0:
+            to_select = [
+                sge.Alias(
+                    this=expr,
+                    alias=sge.to_identifier(id, quoted=self.quoted),
+                )
+                if expr.alias_or_name != id
+                else expr
+                for id, expr in selections
+            ]
+            new_expr = new_expr.select(*to_select, append=False)
+        else:
+            new_expr = new_expr.select(sge.Star(), append=False)
 
         if len(predicates) > 0:
             condition = _and(predicates)
diff --git a/bigframes/core/sql_nodes.py b/bigframes/core/sql_nodes.py
index a1624a10217..5d921de7aeb 100644
--- a/bigframes/core/sql_nodes.py
+++ b/bigframes/core/sql_nodes.py
@@ -142,6 +142,10 @@ def consumed_ids(self):
     def _node_expressions(self):
         raise NotImplementedError()
 
+    @property
+    def is_star_selection(self) -> bool:
+        return tuple(self.ids) == tuple(self.child.ids)
+
     @functools.cache
     def get_id_mapping(self) -> dict[identifiers.ColumnId, ex.Expression]:
         return {cdef.id: cdef.expression for cdef in self.selections}
diff --git a/tests/unit/core/compile/sqlglot/aggregations/snapshots/test_nullary_compiler/test_size/out.sql b/tests/unit/core/compile/sqlglot/aggregations/snapshots/test_nullary_compiler/test_size/out.sql
index d5f599b5da7..7a4393f8133 100644
--- a/tests/unit/core/compile/sqlglot/aggregations/snapshots/test_nullary_compiler/test_size/out.sql
+++ b/tests/unit/core/compile/sqlglot/aggregations/snapshots/test_nullary_compiler/test_size/out.sql
@@ -1,20 +1,6 @@
 WITH `bfcte_0` AS (
   SELECT
-    `bool_col`,
-    `bytes_col`,
-    `date_col`,
-    `datetime_col`,
-    `geography_col`,
-    `int64_col`,
-    `int64_too`,
-    `numeric_col`,
-    `float64_col`,
-    `rowindex`,
-    `rowindex_2`,
-    `string_col`,
-    `time_col`,
-    `timestamp_col`,
-    `duration_col`
+    *
   FROM `bigframes-dev`.`sqlglot_test`.`scalar_types`
 ), `bfcte_1` AS (
   SELECT
diff --git a/tests/unit/core/compile/sqlglot/snapshots/test_compile_readtable/test_compile_readtable_w_columns_filters/out.sql b/tests/unit/core/compile/sqlglot/snapshots/test_compile_readtable/test_compile_readtable_w_columns_filters/out.sql
index c9a42b73f1a..2dae14b556e 100644
--- a/tests/unit/core/compile/sqlglot/snapshots/test_compile_readtable/test_compile_readtable_w_columns_filters/out.sql
+++ b/tests/unit/core/compile/sqlglot/snapshots/test_compile_readtable/test_compile_readtable_w_columns_filters/out.sql
@@ -6,7 +6,5 @@ WITH `bfcte_0` AS (
     `rowindex` > 0 AND `string_col` IN ('Hello, World!')
 )
 SELECT
-  `rowindex`,
-  `int64_col`,
-  `string_col`
+  *
 FROM `bfcte_0`
\ No newline at end of file
diff --git a/tests/unit/core/compile/sqlglot/snapshots/test_compile_readtable/test_compile_readtable_w_json_types/out.sql b/tests/unit/core/compile/sqlglot/snapshots/test_compile_readtable/test_compile_readtable_w_json_types/out.sql
index f65f3a10f0f..77a17ec893d 100644
--- a/tests/unit/core/compile/sqlglot/snapshots/test_compile_readtable/test_compile_readtable_w_json_types/out.sql
+++ b/tests/unit/core/compile/sqlglot/snapshots/test_compile_readtable/test_compile_readtable_w_json_types/out.sql
@@ -1,4 +1,3 @@
 SELECT
-  `rowindex`,
-  `json_col`
+  *
 FROM `bigframes-dev`.`sqlglot_test`.`json_types`
\ No newline at end of file
diff --git a/tests/unit/core/compile/sqlglot/snapshots/test_compile_readtable/test_compile_readtable_w_system_time/out.sql b/tests/unit/core/compile/sqlglot/snapshots/test_compile_readtable/test_compile_readtable_w_system_time/out.sql
index d188899e7c2..b579e3a6fed 100644
--- a/tests/unit/core/compile/sqlglot/snapshots/test_compile_readtable/test_compile_readtable_w_system_time/out.sql
+++ b/tests/unit/core/compile/sqlglot/snapshots/test_compile_readtable/test_compile_readtable_w_system_time/out.sql
@@ -1,17 +1,3 @@
 SELECT
-  `bool_col`,
-  `bytes_col`,
-  `date_col`,
-  `datetime_col`,
-  `geography_col`,
-  `int64_col`,
-  `int64_too`,
-  `numeric_col`,
-  `float64_col`,
-  `rowindex`,
-  `rowindex_2`,
-  `string_col`,
-  `time_col`,
-  `timestamp_col`,
-  `duration_col`
+  *
 FROM `bigframes-dev`.`sqlglot_test`.`scalar_types` FOR SYSTEM_TIME AS OF '2025-11-09T03:04:05.678901+00:00'
\ No newline at end of file