diff --git a/bigframes/core/compile/sqlglot/compiler.py b/bigframes/core/compile/sqlglot/compiler.py index ce9ed6ce37..a19a0d1a7d 100644 --- a/bigframes/core/compile/sqlglot/compiler.py +++ b/bigframes/core/compile/sqlglot/compiler.py @@ -202,11 +202,13 @@ def compile_readlocal( @_compile_node.register def compile_readtable(node: sql_nodes.SqlDataSource, child: sqlglot_ir.SQLGlotIR): table_obj = node.source.table + columns = () if node.is_star_selection else node.source.schema.names return sqlglot_ir.SQLGlotIR.from_table( table_obj.project_id, table_obj.dataset_id, table_obj.table_id, uid_gen=child.uid_gen, + columns=columns, sql_predicate=node.source.sql_predicate, system_time=node.source.at_time, ) diff --git a/bigframes/core/compile/sqlglot/sqlglot_ir.py b/bigframes/core/compile/sqlglot/sqlglot_ir.py index 8a4413fb8b..42c5ea7475 100644 --- a/bigframes/core/compile/sqlglot/sqlglot_ir.py +++ b/bigframes/core/compile/sqlglot/sqlglot_ir.py @@ -178,6 +178,7 @@ def from_table( dataset_id: str, table_id: str, uid_gen: guid.SequentialUIDGenerator, + columns: typing.Sequence[str] = (), sql_predicate: typing.Optional[str] = None, system_time: typing.Optional[datetime.datetime] = None, ) -> SQLGlotIR: @@ -187,9 +188,8 @@ def from_table( project_id (str): The project ID of the BigQuery table. dataset_id (str): The dataset ID of the BigQuery table. table_id (str): The table ID of the BigQuery table. - col_names (typing.Sequence[str]): The names of the columns to select. - alias_names (typing.Sequence[str]): The aliases for the selected columns. uid_gen (guid.SequentialUIDGenerator): A generator for unique identifiers. + columns (typing.Sequence[str]): The names of the columns to select. sql_predicate (typing.Optional[str]): An optional SQL predicate for filtering. system_time (typing.Optional[str]): An optional system time for time-travel queries. """ @@ -210,14 +210,22 @@ def from_table( version=version, alias=sql.identifier(table_alias), ) - if sql_predicate: + + if not columns and not sql_predicate: + return cls.from_expr(expr=table_expr, uid_gen=uid_gen) + + if len(columns) > 0: + select_cols = [sql.identifier(col) for col in columns] + select_expr = sge.Select().select(*select_cols).from_(table_expr) + else: select_expr = sge.Select().select(sge.Star()).from_(table_expr) + + if sql_predicate: select_expr = select_expr.where( sg.parse_one(sql_predicate, dialect=sql.base.DIALECT), append=False ) - return cls.from_expr(expr=select_expr, uid_gen=uid_gen) - return cls.from_expr(expr=table_expr, uid_gen=uid_gen) + return cls.from_expr(expr=select_expr, uid_gen=uid_gen) @classmethod def from_cte_ref( diff --git a/bigframes/core/sql_nodes.py b/bigframes/core/sql_nodes.py index 45048dc2b1..f5cb540126 100644 --- a/bigframes/core/sql_nodes.py +++ b/bigframes/core/sql_nodes.py @@ -45,6 +45,12 @@ def fields(self) -> Sequence[nodes.Field]: for source_id in self.source.schema.names ) + @property + def is_star_selection(self) -> bool: + return tuple(self.source.schema.names) == tuple( + field.name for field in self.source.table.physical_schema + ) + @property def variables_introduced(self) -> int: # This operation only renames variables, doesn't actually create new ones diff --git a/tests/unit/core/compile/sqlglot/snapshots/test_compile_readtable/test_compile_readtable_w_columns_filters/out.sql b/tests/unit/core/compile/sqlglot/snapshots/test_compile_readtable/test_compile_readtable_w_columns_filters/out.sql index 2b71ef917d..4d1b822245 100644 --- a/tests/unit/core/compile/sqlglot/snapshots/test_compile_readtable/test_compile_readtable_w_columns_filters/out.sql +++ b/tests/unit/core/compile/sqlglot/snapshots/test_compile_readtable/test_compile_readtable_w_columns_filters/out.sql @@ -1,5 +1,7 @@ SELECT - * + `rowindex`, + `int64_col`, + `string_col` FROM `bigframes-dev`.`sqlglot_test`.`scalar_types` AS `bft_0` WHERE `rowindex` > 0 AND `string_col` IN ('Hello, World!') \ No newline at end of file