From 26b683e901660b1774f2e0a0126f4d771138456a Mon Sep 17 00:00:00 2001
From: Garrett Wu <garrettwu@google.com>
Date: Wed, 11 Feb 2026 19:50:17 +0000
Subject: [PATCH 1/4] feat: add bigquery.ai.generate_table function

---
 GEMINI.md                            | 42 +------------
 bigframes/bigquery/_operations/ai.py | 93 ++++++++++++++++++++++++++++
 bigframes/bigquery/ai.py             |  2 +
 tests/unit/bigquery/test_ai.py       | 54 +++++++++++++++-
 4 files changed, 150 insertions(+), 41 deletions(-)
diff --git a/GEMINI.md b/GEMINI.md
index 0d447f17a4..0d74e277ad 100644
--- a/GEMINI.md
+++ b/GEMINI.md
@@ -2,48 +2,12 @@
 
 ## Testing
 
-We use `nox` to instrument our tests.
+We use `pytest` to instrument our tests.
 
-- To test your changes, run unit tests with `nox`:
+- To test your changes, run unit tests with `pytest`:
 
   ```bash
-  nox -r -s unit
-  ```
-
-- To run a single unit test:
-
-  ```bash
-  nox -r -s unit-3.13 -- -k <name of test>
-  ```
-
-- Ignore this step if you lack access to Google Cloud resources. To run system
-  tests, you can execute::
-
-   # Run all system tests
-   $ nox -r -s system
-
-   # Run a single system test
-   $ nox -r -s system-3.13 -- -k <name of test>
-
-- The codebase must have better coverage than it had previously after each
-  change. You can test coverage via `nox -s unit system cover` (takes a long
-  time). Omit `system` if you lack access to cloud resources.
-
-## Code Style
-
-- We use the automatic code formatter `black`. You can run it using
-  the nox session `format`. This will eliminate many lint errors. Run via:
-
-  ```bash
-  nox -r -s format
-  ```
-
-- PEP8 compliance is required, with exceptions defined in the linter configuration.
-  If you have ``nox`` installed, you can test that you have not introduced
-  any non-compliant code via:
-
-  ```
-  nox -r -s lint
+  pytest <test_file>::<test>
   ```
 
 - When writing tests, use the idiomatic "pytest" style.
diff --git a/bigframes/bigquery/_operations/ai.py b/bigframes/bigquery/_operations/ai.py
index bc2ab8dd20..16ca989bf0 100644
--- a/bigframes/bigquery/_operations/ai.py
+++ b/bigframes/bigquery/_operations/ai.py
@@ -601,6 +601,99 @@ def generate_text(
         return session.read_gbq_query(query)
 
 
+@log_adapter.method_logger(custom_base_name="bigquery_ai")
+def generate_table(
+    model: Union[bigframes.ml.base.BaseEstimator, str, pd.Series],
+    data: Union[dataframe.DataFrame, pd.DataFrame],
+    *,
+    output_schema: str,
+    temperature: Optional[float] = None,
+    top_p: Optional[float] = None,
+    max_output_tokens: Optional[int] = None,
+    stop_sequences: Optional[List[str]] = None,
+    request_type: Optional[str] = None,
+) -> dataframe.DataFrame:
+    """
+    Generates a table using a BigQuery ML model.
+
+    See the `AI.GENERATE_TABLE function syntax
+    <https://docs.cloud.google.com/bigquery/docs/reference/standard-sql/bigqueryml-syntax-generate-table>`_
+    for additional reference.
+
+    **Examples:**
+
+        >>> import bigframes.pandas as bpd
+        >>> import bigframes.bigquery as bbq
+        >>> # The user is responsible for constructing a DataFrame that contains
+        >>> # the necessary columns for the model's prompt. For example, a
+        >>> # DataFrame with a 'prompt' column for text classification.
+        >>> df = bpd.DataFrame({'prompt': ["some text to classify"]})
+        >>> result = bbq.ai.generate_table(
+        ...     "project.dataset.model_name",
+        ...     data=df,
+        ...     output_schema="category STRING"
+        ... ) # doctest: +SKIP
+
+    Args:
+        model (bigframes.ml.base.BaseEstimator or str):
+            The model to use for table generation.
+        data (bigframes.pandas.DataFrame or pandas.DataFrame):
+            The data to use as input for table generation. It must contain the
+            columns that the model expects for constructing the prompt.
+        output_schema (str):
+            A string defining the output schema (e.g., "col1 STRING, col2 INT64").
+        temperature (float, optional):
+            A FLOAT64 value that is used for sampling promiscuity. The value
+            must be in the range ``[0.0, 1.0]``.
+        top_p (float, optional):
+            A FLOAT64 value that changes how the model selects tokens for
+            output.
+        max_output_tokens (int, optional):
+            An INT64 value that sets the maximum number of tokens in the
+            generated text.
+        stop_sequences (List[str], optional):
+            An ARRAY<STRING> value that contains the stop sequences for the model.
+        request_type (str, optional):
+            A STRING value that contains the request type for the model.
+
+    Returns:
+        bigframes.pandas.DataFrame:
+            The generated table.
+    """
+    data = _to_dataframe(data, series_rename="prompt")
+    model_name, session = bq_utils.get_model_name_and_session(model, data)
+    table_sql = bq_utils.to_sql(data)
+
+    struct_fields_bq: Dict[str, bigframes.core.sql.literals.STRUCT_VALUES] = {
+        "output_schema": output_schema
+    }
+    if temperature is not None:
+        struct_fields_bq["temperature"] = temperature
+    if top_p is not None:
+        struct_fields_bq["top_p"] = top_p
+    if max_output_tokens is not None:
+        struct_fields_bq["max_output_tokens"] = max_output_tokens
+    if stop_sequences is not None:
+        struct_fields_bq["stop_sequences"] = stop_sequences
+    if request_type is not None:
+        struct_fields_bq["request_type"] = request_type
+
+    struct_sql = bigframes.core.sql.literals.struct_literal(struct_fields_bq)
+    query = f"""
+        SELECT *
+        FROM AI.GENERATE_TABLE(
+            MODEL `{model_name}`,
+            ({table_sql}),
+            {struct_sql}
+        )
+    """
+
+    if session is None:
+        return bpd.read_gbq_query(query)
+    else:
+        return session.read_gbq_query(query)
+
+
 @log_adapter.method_logger(custom_base_name="bigquery_ai")
 def if_(
     prompt: PROMPT_TYPE,
diff --git a/bigframes/bigquery/ai.py b/bigframes/bigquery/ai.py
index 053ee7352a..bb24d5dc33 100644
--- a/bigframes/bigquery/ai.py
+++ b/bigframes/bigquery/ai.py
@@ -24,6 +24,7 @@
     generate_double,
     generate_embedding,
     generate_int,
+    generate_table,
     generate_text,
     if_,
     score,
@@ -37,6 +38,7 @@
     "generate_double",
     "generate_embedding",
     "generate_int",
+    "generate_table",
     "generate_text",
     "if_",
     "score",
diff --git a/tests/unit/bigquery/test_ai.py b/tests/unit/bigquery/test_ai.py
index 0be32b9e8a..d0e5f76414 100644
--- a/tests/unit/bigquery/test_ai.py
+++ b/tests/unit/bigquery/test_ai.py
@@ -220,8 +220,57 @@ def test_generate_text_defaults(mock_dataframe, mock_session):
     assert "STRUCT()" in query
 
 
+def test_generate_table_with_dataframe(mock_dataframe, mock_session):
+    model_name = "project.dataset.model"
+
+    bbq.ai.generate_table(
+        model_name,
+        mock_dataframe,
+        output_schema="col1 STRING, col2 INT64",
+    )
+
+    mock_session.read_gbq_query.assert_called_once()
+    query = mock_session.read_gbq_query.call_args[0][0]
+
+    # Normalize whitespace for comparison
+    query = " ".join(query.split())
+
+    expected_part_1 = "SELECT * FROM AI.GENERATE_TABLE("
+    expected_part_2 = f"MODEL `{model_name}`,"
+    expected_part_3 = "(SELECT * FROM my_table),"
+    expected_part_4 = "STRUCT('col1 STRING, col2 INT64' AS output_schema)"
+
+    assert expected_part_1 in query
+    assert expected_part_2 in query
+    assert expected_part_3 in query
+    assert expected_part_4 in query
+
+
+def test_generate_table_with_options(mock_dataframe, mock_session):
+    model_name = "project.dataset.model"
+
+    bbq.ai.generate_table(
+        model_name,
+        mock_dataframe,
+        output_schema="col1 STRING",
+        temperature=0.5,
+        max_output_tokens=100,
+    )
+
+    mock_session.read_gbq_query.assert_called_once()
+    query = mock_session.read_gbq_query.call_args[0][0]
+    query = " ".join(query.split())
+
+    assert f"MODEL `{model_name}`" in query
+    assert "(SELECT * FROM my_table)" in query
+    assert (
+        "STRUCT('col1 STRING' AS output_schema, 0.5 AS temperature, 100 AS max_output_tokens)"
+        in query
+    )
+
+
 @mock.patch("bigframes.pandas.read_pandas")
-def test_generate_text_with_pandas_dataframe(
+def test_generate_table_with_pandas_dataframe(
     read_pandas_mock, mock_dataframe, mock_session
 ):
     # This tests that pandas input path works and calls read_pandas
@@ -232,9 +281,10 @@ def test_generate_text_with_pandas_dataframe(
 
     pandas_df = pd.DataFrame({"content": ["test"]})
 
-    bbq.ai.generate_text(
+    bbq.ai.generate_table(
         model_name,
         pandas_df,
+        output_schema="col1 STRING",
     )
 
     read_pandas_mock.assert_called_once()

From 1bbbd3d0f9c5191a268d96d710a7c1657904e0d9 Mon Sep 17 00:00:00 2001
From: Garrett Wu <garrettwu@google.com>
Date: Wed, 11 Feb 2026 19:53:04 +0000
Subject: [PATCH 2/4] test

---
 tests/system/large/bigquery/test_ai.py | 17 +++++++++++++++++
 1 file changed, 17 insertions(+)

diff --git a/tests/system/large/bigquery/test_ai.py b/tests/system/large/bigquery/test_ai.py
index e318a8a720..86cf4d7f00 100644
--- a/tests/system/large/bigquery/test_ai.py
+++ b/tests/system/large/bigquery/test_ai.py
@@ -94,3 +94,20 @@ def test_generate_text_with_options(text_model):
 
     # It basically asserts that the results are still returned.
     assert len(result) == 2
+
+
+def test_generate_table(text_model):
+    df = bpd.DataFrame(
+        {"prompt": ["Generate a table of 2 programming languages and their creators."]}
+    )
+
+    result = ai.generate_table(
+        text_model,
+        df,
+        output_schema="language STRING, creator STRING",
+    )
+
+    assert "language" in result.columns
+    assert "creator" in result.columns
+    # The model may not always return the exact number of rows requested.
+    assert len(result) > 0

From 35254874095e422e1eb25171c114b8a815ecf5d8 Mon Sep 17 00:00:00 2001
From: Garrett Wu <garrettwu@google.com>
Date: Wed, 11 Feb 2026 19:55:24 +0000
Subject: [PATCH 3/4] docs

---
 bigframes/bigquery/_operations/ai.py | 12 +++++++-----
 1 file changed, 7 insertions(+), 5 deletions(-)

diff --git a/bigframes/bigquery/_operations/ai.py b/bigframes/bigquery/_operations/ai.py
index 16ca989bf0..7f9c3eb55f 100644
--- a/bigframes/bigquery/_operations/ai.py
+++ b/bigframes/bigquery/_operations/ai.py
@@ -604,7 +604,7 @@ def generate_text(
 @log_adapter.method_logger(custom_base_name="bigquery_ai")
 def generate_table(
     model: Union[bigframes.ml.base.BaseEstimator, str, pd.Series],
-    data: Union[dataframe.DataFrame, pd.DataFrame],
+    data: Union[dataframe.DataFrame, series.Series, pd.DataFrame, pd.Series],
     *,
     output_schema: str,
     temperature: Optional[float] = None,
@@ -637,9 +637,11 @@ def generate_table(
     Args:
         model (bigframes.ml.base.BaseEstimator or str):
             The model to use for table generation.
-        data (bigframes.pandas.DataFrame or pandas.DataFrame):
-            The data to use as input for table generation. It must contain the
-            columns that the model expects for constructing the prompt.
+        data (bigframes.pandas.DataFrame or bigframes.pandas.Series):
+            The data to generate embeddings for. If a Series is provided, it is
+            treated as the 'content' column.  If a DataFrame is provided, it
+            must contain a 'content' column, or you must rename the column you
+            wish to embed to 'content'.
         output_schema (str):
             A string defining the output schema (e.g., "col1 STRING, col2 INT64").
         temperature (float, optional):
@@ -650,7 +652,7 @@ def generate_table(
             output.
         max_output_tokens (int, optional):
             An INT64 value that sets the maximum number of tokens in the
-            generated text.
+            generated table.
         stop_sequences (List[str], optional):
             An ARRAY<STRING> value that contains the stop sequences for the model.
         request_type (str, optional):

From a82d395caed717ea2833a7bb07177da379bbe8f7 Mon Sep 17 00:00:00 2001
From: Garrett Wu <garrettwu@google.com>
Date: Wed, 11 Feb 2026 20:00:06 +0000
Subject: [PATCH 4/4] gemini.md

---
 GEMINI.md                      | 42 +++++++++++++++++++++++++++++++---
 tests/unit/bigquery/test_ai.py |  5 ++--
 2 files changed, 41 insertions(+), 6 deletions(-)

diff --git a/GEMINI.md b/GEMINI.md
index 0d74e277ad..0d447f17a4 100644
--- a/GEMINI.md
+++ b/GEMINI.md
@@ -2,12 +2,48 @@
 
 ## Testing
 
-We use `pytest` to instrument our tests.
+We use `nox` to instrument our tests.
 
-- To test your changes, run unit tests with `pytest`:
+- To test your changes, run unit tests with `nox`:
 
   ```bash
-  pytest <test_file>::<test>
+  nox -r -s unit
+  ```
+
+- To run a single unit test:
+
+  ```bash
+  nox -r -s unit-3.13 -- -k <name of test>
+  ```
+
+- Ignore this step if you lack access to Google Cloud resources. To run system
+  tests, you can execute::
+
+   # Run all system tests
+   $ nox -r -s system
+
+   # Run a single system test
+   $ nox -r -s system-3.13 -- -k <name of test>
+
+- The codebase must have better coverage than it had previously after each
+  change. You can test coverage via `nox -s unit system cover` (takes a long
+  time). Omit `system` if you lack access to cloud resources.
+
+## Code Style
+
+- We use the automatic code formatter `black`. You can run it using
+  the nox session `format`. This will eliminate many lint errors. Run via:
+
+  ```bash
+  nox -r -s format
+  ```
+
+- PEP8 compliance is required, with exceptions defined in the linter configuration.
+  If you have ``nox`` installed, you can test that you have not introduced
+  any non-compliant code via:
+
+  ```
+  nox -r -s lint
   ```
 
 - When writing tests, use the idiomatic "pytest" style.
diff --git a/tests/unit/bigquery/test_ai.py b/tests/unit/bigquery/test_ai.py
index d0e5f76414..796e86f924 100644
--- a/tests/unit/bigquery/test_ai.py
+++ b/tests/unit/bigquery/test_ai.py
@@ -270,7 +270,7 @@ def test_generate_table_with_options(mock_dataframe, mock_session):
 
 
 @mock.patch("bigframes.pandas.read_pandas")
-def test_generate_table_with_pandas_dataframe(
+def test_generate_text_with_pandas_dataframe(
     read_pandas_mock, mock_dataframe, mock_session
 ):
     # This tests that pandas input path works and calls read_pandas
@@ -281,10 +281,9 @@ def test_generate_table_with_pandas_dataframe(
 
     pandas_df = pd.DataFrame({"content": ["test"]})
 
-    bbq.ai.generate_table(
+    bbq.ai.generate_text(
         model_name,
         pandas_df,
-        output_schema="col1 STRING",
     )
 
     read_pandas_mock.assert_called_once()