diff --git a/.pre-commit-config.yaml b/.pre-commit-config.yaml index 101c6bdc13d1..2ffd4d6e87b6 100644 --- a/.pre-commit-config.yaml +++ b/.pre-commit-config.yaml @@ -20,13 +20,10 @@ repos: hooks: - id: trailing-whitespace - id: end-of-file-fixer - - id: check-yaml -- repo: https://github.com/psf/black - rev: 23.7.0 - hooks: - - id: black -- repo: https://github.com/pycqa/flake8 - rev: 6.1.0 # version-scanner: ignore - hooks: - - id: flake8 - args: [--config, packages/google-cloud-alloydb/.flake8] +- repo: https://github.com/astral-sh/ruff-pre-commit + # Ruff version. + rev: v0.14.14 + hooks: + # Run the linter. + - id: ruff-check + args: [ --select, I, --fix, --target-version=py310, --line-length=88 ] diff --git a/packages/bigframes/bigframes/bigquery/__init__.py b/packages/bigframes/bigframes/bigquery/__init__.py index a31d7dd83f93..86a45546b748 100644 --- a/packages/bigframes/bigframes/bigquery/__init__.py +++ b/packages/bigframes/bigframes/bigquery/__init__.py @@ -47,7 +47,7 @@ import sys -from bigframes.bigquery import ai, ml, obj +from bigframes.bigquery import aead, ai, ml, obj from bigframes.bigquery._operations.approx_agg import approx_top_count from bigframes.bigquery._operations.array import ( array_agg, @@ -208,6 +208,7 @@ # io ops "load_data", # Modules / SQL namespaces + "aead", "ai", "ml", "obj", diff --git a/packages/bigframes/bigframes/bigquery/_googlesql.py b/packages/bigframes/bigframes/bigquery/_googlesql.py new file mode 100644 index 000000000000..a37c9790ff56 --- /dev/null +++ b/packages/bigframes/bigframes/bigquery/_googlesql.py @@ -0,0 +1,81 @@ +# Copyright 2026 Google LLC +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +"""Utilities for working with GoogleSqlScalarOps.""" + +from __future__ import annotations + +from typing import Any, Union + +import bigframes.core.col +import bigframes.core.expression as ex +import bigframes.core.sentinels as sentinels +import bigframes.series as series +from bigframes.operations import googlesql + + +def apply_googlesql_scalar_op( + op: googlesql.GoogleSqlScalarOp, + *args: Any, +) -> Union[series.Series, bigframes.core.col.Expression]: + """Applies a GoogleSQL scalar operator to the given arguments. + + Handles a mix of Series, Expression, and literal inputs. + + Args: + op (googlesql.GoogleSqlScalarOp): + The operator to apply. + *args (Any): + The arguments to apply the operator to. + + Returns: + bigframes.pandas.Series | bigframes.core.col.Expression: + The result of the operation. If any of ``args`` is a Series, returns + a Series. Otherwise, returns an Expression. + """ + # Find the first Series to use for alignment + first_series = None + for arg in args: + if isinstance(arg, series.Series): + first_series = arg + break + + if first_series is not None: + processed_args: list[Union[bigframes.core.col.Expression, series.Series]] = [] + block = first_series._block + for arg in args: + if isinstance(arg, bigframes.core.col.Expression): + block, col_id = block.project_expr(bigframes.core.col._as_bf_expr(arg)) + processed_args.append(series.Series(block.select_column(col_id))) + elif arg is sentinels.Sentinel.ARGUMENT_DEFAULT: + processed_args.append(bigframes.core.col.Expression(ex.OmittedArg())) + else: + processed_args.append(arg) + + # Apply the n-ary op. _apply_nary_op handles alignment of Series and literals. + result = first_series._apply_nary_op(op, processed_args, ignore_self=True) + result.name = None + return result + + # No Series, return an Expression + expr_args = [] + for arg in args: + if isinstance(arg, bigframes.core.col.Expression): + expr_args.append(bigframes.core.col._as_bf_expr(arg)) + elif arg is sentinels.Sentinel.ARGUMENT_DEFAULT: + expr_args.append(ex.OmittedArg()) + else: + expr_args.append(ex.const(arg)) + + return bigframes.core.col.Expression(ex.OpExpression(op, tuple(expr_args))) diff --git a/packages/bigframes/bigframes/bigquery/_operations/aead.py b/packages/bigframes/bigframes/bigquery/_operations/aead.py new file mode 100644 index 000000000000..fb98bed59be9 --- /dev/null +++ b/packages/bigframes/bigframes/bigquery/_operations/aead.py @@ -0,0 +1,91 @@ +# Copyright 2026 Google LLC +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# +# DO NOT MODIFY THIS FILE DIRECTLY. +# This file was generated from: scripts/data/sql-functions/aead.yaml +# by the script: scripts/generate_bigframes_bigquery.py + +from __future__ import annotations + +import datetime +from typing import Any, Literal, Optional, TypeVar, Union + +import bigframes.bigquery._googlesql +import bigframes.core.col +import bigframes.core.expression as ex +import bigframes.core.sentinels as sentinels +import bigframes.operations as ops +import bigframes.series as series +from bigframes import dtypes +from bigframes.operations import googlesql + +T = TypeVar("T", series.Series, bigframes.core.col.Expression) + +_DECRYPT_BYTES_OP = googlesql.GoogleSqlScalarOp( + "AEAD.DECRYPT_BYTES", + args=(googlesql.ArgSpec(), googlesql.ArgSpec(), googlesql.ArgSpec()), + signature=lambda *args: dtypes.BYTES_DTYPE, +) +_DECRYPT_STRING_OP = googlesql.GoogleSqlScalarOp( + "AEAD.DECRYPT_STRING", + args=(googlesql.ArgSpec(), googlesql.ArgSpec(), googlesql.ArgSpec()), + signature=lambda *args: dtypes.STRING_DTYPE, +) +_ENCRYPT_OP = googlesql.GoogleSqlScalarOp( + "AEAD.ENCRYPT", + args=(googlesql.ArgSpec(), googlesql.ArgSpec(), googlesql.ArgSpec()), + signature=lambda *args: dtypes.BYTES_DTYPE, +) + + +def decrypt_bytes( + keyset: Union[T, bigframes.core.col.Expression, Union[Literal[sentinels.Sentinel.ARGUMENT_DEFAULT], bytes, dict]], + ciphertext: Union[T, bigframes.core.col.Expression, Union[Literal[sentinels.Sentinel.ARGUMENT_DEFAULT], bytes]], + additional_data: Union[T, bigframes.core.col.Expression, Union[Literal[sentinels.Sentinel.ARGUMENT_DEFAULT], bytes]], +) -> T: + """Uses the matching key from keyset to decrypt ciphertext and verifies the integrity of the data using additional_data. Returns an error if decryption or verification fails.""" + return bigframes.bigquery._googlesql.apply_googlesql_scalar_op( + _DECRYPT_BYTES_OP, + keyset, + ciphertext, + additional_data, + ) # type: ignore + + +def decrypt_string( + keyset: Union[T, bigframes.core.col.Expression, Union[Literal[sentinels.Sentinel.ARGUMENT_DEFAULT], bytes, dict]], + ciphertext: Union[T, bigframes.core.col.Expression, Union[Literal[sentinels.Sentinel.ARGUMENT_DEFAULT], bytes]], + additional_data: Union[T, bigframes.core.col.Expression, Union[Literal[sentinels.Sentinel.ARGUMENT_DEFAULT], str]], +) -> T: + """Like AEAD.DECRYPT_BYTES, but where additional_data is of type STRING.""" + return bigframes.bigquery._googlesql.apply_googlesql_scalar_op( + _DECRYPT_STRING_OP, + keyset, + ciphertext, + additional_data, + ) # type: ignore + + +def encrypt( + keyset: Union[T, bigframes.core.col.Expression, Union[Literal[sentinels.Sentinel.ARGUMENT_DEFAULT], bytes, dict]], + plaintext: Union[T, bigframes.core.col.Expression, Union[Literal[sentinels.Sentinel.ARGUMENT_DEFAULT], bytes, str]], + additional_data: Union[T, bigframes.core.col.Expression, Union[Literal[sentinels.Sentinel.ARGUMENT_DEFAULT], bytes, str]], +) -> T: + """Encrypts plaintext using the primary cryptographic key in keyset. The algorithm of the primary key must be AEAD_AES_GCM_256. Binds the ciphertext to the context defined by additional_data. Returns NULL if any input is NULL.""" + return bigframes.bigquery._googlesql.apply_googlesql_scalar_op( + _ENCRYPT_OP, + keyset, + plaintext, + additional_data, + ) # type: ignore diff --git a/packages/bigframes/bigframes/bigquery/aead.py b/packages/bigframes/bigframes/bigquery/aead.py new file mode 100644 index 000000000000..f18e12bc5cf5 --- /dev/null +++ b/packages/bigframes/bigframes/bigquery/aead.py @@ -0,0 +1,25 @@ +# Copyright 2026 Google LLC +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +"""AEAD encryption functions""" + +from __future__ import annotations + +from bigframes.bigquery._operations.aead import decrypt_bytes, decrypt_string, encrypt + +__all__ = [ + "decrypt_bytes", + "decrypt_string", + "encrypt", +] diff --git a/packages/bigframes/bigframes/core/sentinels.py b/packages/bigframes/bigframes/core/sentinels.py new file mode 100644 index 000000000000..ff9913f7c6fd --- /dev/null +++ b/packages/bigframes/bigframes/core/sentinels.py @@ -0,0 +1,31 @@ +# Copyright 2026 Google LLC +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +"""Sentinel values used throughout BigFrames.""" + +from __future__ import annotations + +import enum + + +class Sentinel(enum.Enum): + """Default values used throughout BigFrames.""" + + """Default value for an optional argument. + + When a parameter is set to this, that parameter is explicitly omitted + from the SQL text. This allows for NULL (None in Python) to be explicitly + passed in to optional parameters. + """ + ARGUMENT_DEFAULT = enum.auto() diff --git a/packages/bigframes/pyproject.toml b/packages/bigframes/pyproject.toml index fed528d4a7a1..e7d9c326a936 100644 --- a/packages/bigframes/pyproject.toml +++ b/packages/bigframes/pyproject.toml @@ -1,3 +1,6 @@ [build-system] requires = ["setuptools"] build-backend = "setuptools.build_meta" + +[tool.ruff.lint.isort] +known-first-party = ["bigframes"] diff --git a/packages/bigframes/scripts/data/sql-functions/aead.yaml b/packages/bigframes/scripts/data/sql-functions/aead.yaml new file mode 100644 index 000000000000..6c289a96e886 --- /dev/null +++ b/packages/bigframes/scripts/data/sql-functions/aead.yaml @@ -0,0 +1,131 @@ +urn: extension:google:bq_scalar_functions +scalar_functions: + - name: "aead.decrypt_bytes" + description: "Uses the matching key from keyset to decrypt ciphertext and verifies the integrity of the data using additional_data. Returns an error if decryption or verification fails." + impls: + # Signature: aead.decrypt_bytes:vbin_vbin_vbin + - args: + - name: "keyset" + value: binary + optional: false + keyword_only: false + - name: "ciphertext" + value: binary + optional: false + keyword_only: false + - name: "additional_data" + value: binary + optional: false + keyword_only: false + return: binary + # Signature: aead.decrypt_bytes:struct_vbin_vbin + - args: + - name: "keyset" + value: struct + optional: false + keyword_only: false + - name: "ciphertext" + value: binary + optional: false + keyword_only: false + - name: "additional_data" + value: binary + optional: false + keyword_only: false + return: binary + - name: "aead.decrypt_string" + description: "Like AEAD.DECRYPT_BYTES, but where additional_data is of type STRING." + impls: + # Signature: aead.decrypt_string:vbin_vbin_str + - args: + - name: "keyset" + value: binary + optional: false + keyword_only: false + - name: "ciphertext" + value: binary + optional: false + keyword_only: false + - name: "additional_data" + value: string + optional: false + keyword_only: false + return: string + # Signature: aead.decrypt_string:struct_vbin_str + - args: + - name: "keyset" + value: struct + optional: false + keyword_only: false + - name: "ciphertext" + value: binary + optional: false + keyword_only: false + - name: "additional_data" + value: string + optional: false + keyword_only: false + return: string + - name: "aead.encrypt" + description: "Encrypts plaintext using the primary cryptographic key in keyset. The algorithm of the primary key must be AEAD_AES_GCM_256. Binds the ciphertext to the context defined by additional_data. Returns NULL if any input is NULL." + impls: + # Signature: aead.encrypt:vbin_str_str + - args: + - name: "keyset" + value: binary + optional: false + keyword_only: false + - name: "plaintext" + value: string + optional: false + keyword_only: false + - name: "additional_data" + value: string + optional: false + keyword_only: false + return: binary + # Signature: aead.encrypt:vbin_vbin_vbin + - args: + - name: "keyset" + value: binary + optional: false + keyword_only: false + - name: "plaintext" + value: binary + optional: false + keyword_only: false + - name: "additional_data" + value: binary + optional: false + keyword_only: false + return: binary + # Signature: aead.encrypt:struct_str_str + - args: + - name: "keyset" + value: struct + optional: false + keyword_only: false + - name: "plaintext" + value: string + optional: false + keyword_only: false + - name: "additional_data" + value: string + optional: false + keyword_only: false + return: binary + # Signature: aead.encrypt:struct_vbin_vbin + - args: + - name: "keyset" + value: struct + optional: false + keyword_only: false + - name: "plaintext" + value: binary + optional: false + keyword_only: false + - name: "additional_data" + value: binary + optional: false + keyword_only: false + return: binary diff --git a/packages/bigframes/scripts/generate_bigframes_bigquery.py b/packages/bigframes/scripts/generate_bigframes_bigquery.py new file mode 100755 index 000000000000..4baaef3ce6b0 --- /dev/null +++ b/packages/bigframes/scripts/generate_bigframes_bigquery.py @@ -0,0 +1,281 @@ +#!/usr/bin/env -S uv run --script +# +# /// script +# dependencies = [ +# "jinja2", +# "pyyaml", +# "ruff==0.14.14", +# ] +# /// +# +# Copyright 2026 Google LLC +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +import pathlib +import re +import subprocess + +import jinja2 +import yaml + +# Directory containing the YAML files +DATA_DIR = pathlib.Path("scripts/data/sql-functions") +# Directory where the generated Python files will be placed +OUTPUT_DIR = pathlib.Path("bigframes/bigquery/_operations") +# Directory where the generated test files will be placed +TEST_OUTPUT_DIR = pathlib.Path("tests/unit/bigquery/_operations") +# Directory containing the Jinja2 templates +TEMPLATE_DIR = pathlib.Path("scripts/templates") + +RUFF_ARGS = [ + "ruff", + "check", + "--select", + "I", + "--fix", + "--target-version=py310", + "--line-length=88", +] + +DTYPE_MAP = { + "binary": "dtypes.BYTES_DTYPE", + "string": "dtypes.STRING_DTYPE", + "int64": "dtypes.INT_DTYPE", + "float64": "dtypes.FLOAT_DTYPE", + "bool": "dtypes.BOOL_DTYPE", + "geography": "dtypes.GEO_DTYPE", + "json": "dtypes.JSON_DTYPE", + "date": "dtypes.DATE_DTYPE", + "time": "dtypes.TIME_DTYPE", + "datetime": "dtypes.DATETIME_DTYPE", + "timestamp": "dtypes.TIMESTAMP_DTYPE", +} + +PY_TYPE_MAP = { + "binary": "bytes", + "string": "str", + "int64": "int", + "float64": "float", + "bool": "bool", + "geography": "Any", + "json": "Any", + "date": "datetime.date", + "time": "datetime.time", + "datetime": "datetime.datetime", + "timestamp": "datetime.datetime", + "struct": "dict", +} + +YAML_TYPE_TO_COL = { + "binary": "bytes_col", + "string": "string_col", + "int64": "int64_col", + "float64": "float64_col", + "bool": "bool_col", + "geography": "geography_col", + "date": "date_col", + "time": "time_col", + "datetime": "datetime_col", + "timestamp": "timestamp_col", +} + + +def to_snake_case(name): + # Replace dots with underscores + name = name.replace(".", "_") + # Handle CamelCase to snake_case + name = re.sub(r"(? 1 + else types[0] + ) + default = "sentinels.Sentinel.ARGUMENT_DEFAULT" if arg_info["optional"] else "" + func_args.append( + { + "name": name, + "type_hint": type_hint, + "default": default, + } + ) + + # Clean up default values for mandatory args + # In Python, mandatory args come first. + for arg in func_args: + if not arg["default"]: + del arg["default"] + + # Test args + test_args = [] + for name in arg_order: + arg_info = args_by_name[name] + some_type = list(arg_info["types"])[0] + col_name = YAML_TYPE_TO_COL.get(some_type, "string_col") + test_args.append({"col_name": col_name}) + + functions_list.append( + { + "name": python_name, + "op_name": internal_op_name, + "description": func_data["description"], + "args": func_args, + "test_args": test_args, + } + ) + + # Render and write + output_file.parent.mkdir(parents=True, exist_ok=True) + content = template.render( + yaml_path=str(yaml_file), + script_path="scripts/generate_bigframes_bigquery.py", + ops=ops_list, + functions=functions_list, + ) + with open(output_file, "w") as f: + f.write(content) + + subprocess.run( + RUFF_ARGS + + [ + str(output_file), + ], + check=True, + ) + print(f" Generated {output_file}") + + # Render and write test + import_path = "bigframes.bigquery._operations." + ".".join(module_path.parts) + test_output_file = TEST_OUTPUT_DIR.joinpath( + module_path.with_name(f"test_{module_path.name}") + ).with_suffix(".py") + + test_output_file.parent.mkdir(parents=True, exist_ok=True) + test_content = test_template.render( + yaml_path=str(yaml_file), + script_path="scripts/generate_bigframes_bigquery.py", + import_path=import_path, + short_name=module_path.name, + functions=functions_list, + ) + with open(test_output_file, "w") as f: + f.write(test_content) + + subprocess.run( + RUFF_ARGS + + [ + str(test_output_file), + ], + check=True, + ) + print(f" Generated {test_output_file}") + + print(f" Updating snapshots for {test_output_file}...") + subprocess.run( + [ + "pytest", + str(test_output_file), + "--snapshot-update", + ], + check=False, + ) + + +if __name__ == "__main__": + main() diff --git a/packages/bigframes/scripts/generate_bigframes_bigquery.py.lock b/packages/bigframes/scripts/generate_bigframes_bigquery.py.lock new file mode 100644 index 000000000000..0c89fde6d406 --- /dev/null +++ b/packages/bigframes/scripts/generate_bigframes_bigquery.py.lock @@ -0,0 +1,104 @@ +version = 1 +revision = 3 +requires-python = ">=3.14" + +[manifest] +requirements = [ + { name = "jinja2" }, + { name = "pyyaml" }, + { name = "ruff", specifier = "==0.14.14" }, +] + +[[package]] +name = "jinja2" +version = "3.1.6" +source = { registry = "https://pypi.org/simple" } +dependencies = [ + { name = "markupsafe" }, +] +sdist = { url = "https://files.pythonhosted.org/packages/df/bf/f7da0350254c0ed7c72f3e33cef02e048281fec7ecec5f032d4aac52226b/jinja2-3.1.6.tar.gz", hash = "sha256:0137fb05990d35f1275a587e9aee6d56da821fc83491a0fb838183be43f66d6d", size = 245115, upload-time = "2025-03-05T20:05:02.478Z" } +wheels = [ + { url = "https://files.pythonhosted.org/packages/62/a1/3d680cbfd5f4b8f15abc1d571870c5fc3e594bb582bc3b64ea099db13e56/jinja2-3.1.6-py3-none-any.whl", hash = "sha256:85ece4451f492d0c13c5dd7c13a64681a86afae63a5f347908daf103ce6d2f67", size = 134899, upload-time = "2025-03-05T20:05:00.369Z" }, +] + +[[package]] +name = "markupsafe" +version = "3.0.3" +source = { registry = "https://pypi.org/simple" } +sdist = { url = "https://files.pythonhosted.org/packages/7e/99/7690b6d4034fffd95959cbe0c02de8deb3098cc577c67bb6a24fe5d7caa7/markupsafe-3.0.3.tar.gz", hash = "sha256:722695808f4b6457b320fdc131280796bdceb04ab50fe1795cd540799ebe1698", size = 80313, upload-time = "2025-09-27T18:37:40.426Z" } +wheels = [ + { url = "https://files.pythonhosted.org/packages/33/8a/8e42d4838cd89b7dde187011e97fe6c3af66d8c044997d2183fbd6d31352/markupsafe-3.0.3-cp314-cp314-macosx_10_13_x86_64.whl", hash = "sha256:eaa9599de571d72e2daf60164784109f19978b327a3910d3e9de8c97b5b70cfe", size = 11619, upload-time = "2025-09-27T18:37:06.342Z" }, + { url = "https://files.pythonhosted.org/packages/b5/64/7660f8a4a8e53c924d0fa05dc3a55c9cee10bbd82b11c5afb27d44b096ce/markupsafe-3.0.3-cp314-cp314-macosx_11_0_arm64.whl", hash = "sha256:c47a551199eb8eb2121d4f0f15ae0f923d31350ab9280078d1e5f12b249e0026", size = 12029, upload-time = "2025-09-27T18:37:07.213Z" }, + { url = "https://files.pythonhosted.org/packages/da/ef/e648bfd021127bef5fa12e1720ffed0c6cbb8310c8d9bea7266337ff06de/markupsafe-3.0.3-cp314-cp314-manylinux2014_aarch64.manylinux_2_17_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:f34c41761022dd093b4b6896d4810782ffbabe30f2d443ff5f083e0cbbb8c737", size = 24408, upload-time = "2025-09-27T18:37:09.572Z" }, + { url = "https://files.pythonhosted.org/packages/41/3c/a36c2450754618e62008bf7435ccb0f88053e07592e6028a34776213d877/markupsafe-3.0.3-cp314-cp314-manylinux2014_x86_64.manylinux_2_17_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:457a69a9577064c05a97c41f4e65148652db078a3a509039e64d3467b9e7ef97", size = 23005, upload-time = "2025-09-27T18:37:10.58Z" }, + { url = "https://files.pythonhosted.org/packages/bc/20/b7fdf89a8456b099837cd1dc21974632a02a999ec9bf7ca3e490aacd98e7/markupsafe-3.0.3-cp314-cp314-manylinux_2_31_riscv64.manylinux_2_39_riscv64.whl", hash = "sha256:e8afc3f2ccfa24215f8cb28dcf43f0113ac3c37c2f0f0806d8c70e4228c5cf4d", size = 22048, upload-time = "2025-09-27T18:37:11.547Z" }, + { url = "https://files.pythonhosted.org/packages/9a/a7/591f592afdc734f47db08a75793a55d7fbcc6902a723ae4cfbab61010cc5/markupsafe-3.0.3-cp314-cp314-musllinux_1_2_aarch64.whl", hash = "sha256:ec15a59cf5af7be74194f7ab02d0f59a62bdcf1a537677ce67a2537c9b87fcda", size = 23821, upload-time = "2025-09-27T18:37:12.48Z" }, + { url = "https://files.pythonhosted.org/packages/7d/33/45b24e4f44195b26521bc6f1a82197118f74df348556594bd2262bda1038/markupsafe-3.0.3-cp314-cp314-musllinux_1_2_riscv64.whl", hash = "sha256:0eb9ff8191e8498cca014656ae6b8d61f39da5f95b488805da4bb029cccbfbaf", size = 21606, upload-time = "2025-09-27T18:37:13.485Z" }, + { url = "https://files.pythonhosted.org/packages/ff/0e/53dfaca23a69fbfbbf17a4b64072090e70717344c52eaaaa9c5ddff1e5f0/markupsafe-3.0.3-cp314-cp314-musllinux_1_2_x86_64.whl", hash = "sha256:2713baf880df847f2bece4230d4d094280f4e67b1e813eec43b4c0e144a34ffe", size = 23043, upload-time = "2025-09-27T18:37:14.408Z" }, + { url = "https://files.pythonhosted.org/packages/46/11/f333a06fc16236d5238bfe74daccbca41459dcd8d1fa952e8fbd5dccfb70/markupsafe-3.0.3-cp314-cp314-win32.whl", hash = "sha256:729586769a26dbceff69f7a7dbbf59ab6572b99d94576a5592625d5b411576b9", size = 14747, upload-time = "2025-09-27T18:37:15.36Z" }, + { url = "https://files.pythonhosted.org/packages/28/52/182836104b33b444e400b14f797212f720cbc9ed6ba34c800639d154e821/markupsafe-3.0.3-cp314-cp314-win_amd64.whl", hash = "sha256:bdc919ead48f234740ad807933cdf545180bfbe9342c2bb451556db2ed958581", size = 15341, upload-time = "2025-09-27T18:37:16.496Z" }, + { url = "https://files.pythonhosted.org/packages/6f/18/acf23e91bd94fd7b3031558b1f013adfa21a8e407a3fdb32745538730382/markupsafe-3.0.3-cp314-cp314-win_arm64.whl", hash = "sha256:5a7d5dc5140555cf21a6fefbdbf8723f06fcd2f63ef108f2854de715e4422cb4", size = 14073, upload-time = "2025-09-27T18:37:17.476Z" }, + { url = "https://files.pythonhosted.org/packages/3c/f0/57689aa4076e1b43b15fdfa646b04653969d50cf30c32a102762be2485da/markupsafe-3.0.3-cp314-cp314t-macosx_10_13_x86_64.whl", hash = "sha256:1353ef0c1b138e1907ae78e2f6c63ff67501122006b0f9abad68fda5f4ffc6ab", size = 11661, upload-time = "2025-09-27T18:37:18.453Z" }, + { url = "https://files.pythonhosted.org/packages/89/c3/2e67a7ca217c6912985ec766c6393b636fb0c2344443ff9d91404dc4c79f/markupsafe-3.0.3-cp314-cp314t-macosx_11_0_arm64.whl", hash = "sha256:1085e7fbddd3be5f89cc898938f42c0b3c711fdcb37d75221de2666af647c175", size = 12069, upload-time = "2025-09-27T18:37:19.332Z" }, + { url = "https://files.pythonhosted.org/packages/f0/00/be561dce4e6ca66b15276e184ce4b8aec61fe83662cce2f7d72bd3249d28/markupsafe-3.0.3-cp314-cp314t-manylinux2014_aarch64.manylinux_2_17_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:1b52b4fb9df4eb9ae465f8d0c228a00624de2334f216f178a995ccdcf82c4634", size = 25670, upload-time = "2025-09-27T18:37:20.245Z" }, + { url = "https://files.pythonhosted.org/packages/50/09/c419f6f5a92e5fadde27efd190eca90f05e1261b10dbd8cbcb39cd8ea1dc/markupsafe-3.0.3-cp314-cp314t-manylinux2014_x86_64.manylinux_2_17_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:fed51ac40f757d41b7c48425901843666a6677e3e8eb0abcff09e4ba6e664f50", size = 23598, upload-time = "2025-09-27T18:37:21.177Z" }, + { url = "https://files.pythonhosted.org/packages/22/44/a0681611106e0b2921b3033fc19bc53323e0b50bc70cffdd19f7d679bb66/markupsafe-3.0.3-cp314-cp314t-manylinux_2_31_riscv64.manylinux_2_39_riscv64.whl", hash = "sha256:f190daf01f13c72eac4efd5c430a8de82489d9cff23c364c3ea822545032993e", size = 23261, upload-time = "2025-09-27T18:37:22.167Z" }, + { url = "https://files.pythonhosted.org/packages/5f/57/1b0b3f100259dc9fffe780cfb60d4be71375510e435efec3d116b6436d43/markupsafe-3.0.3-cp314-cp314t-musllinux_1_2_aarch64.whl", hash = "sha256:e56b7d45a839a697b5eb268c82a71bd8c7f6c94d6fd50c3d577fa39a9f1409f5", size = 24835, upload-time = "2025-09-27T18:37:23.296Z" }, + { url = "https://files.pythonhosted.org/packages/26/6a/4bf6d0c97c4920f1597cc14dd720705eca0bf7c787aebc6bb4d1bead5388/markupsafe-3.0.3-cp314-cp314t-musllinux_1_2_riscv64.whl", hash = "sha256:f3e98bb3798ead92273dc0e5fd0f31ade220f59a266ffd8a4f6065e0a3ce0523", size = 22733, upload-time = "2025-09-27T18:37:24.237Z" }, + { url = "https://files.pythonhosted.org/packages/14/c7/ca723101509b518797fedc2fdf79ba57f886b4aca8a7d31857ba3ee8281f/markupsafe-3.0.3-cp314-cp314t-musllinux_1_2_x86_64.whl", hash = "sha256:5678211cb9333a6468fb8d8be0305520aa073f50d17f089b5b4b477ea6e67fdc", size = 23672, upload-time = "2025-09-27T18:37:25.271Z" }, + { url = "https://files.pythonhosted.org/packages/fb/df/5bd7a48c256faecd1d36edc13133e51397e41b73bb77e1a69deab746ebac/markupsafe-3.0.3-cp314-cp314t-win32.whl", hash = "sha256:915c04ba3851909ce68ccc2b8e2cd691618c4dc4c4232fb7982bca3f41fd8c3d", size = 14819, upload-time = "2025-09-27T18:37:26.285Z" }, + { url = "https://files.pythonhosted.org/packages/1a/8a/0402ba61a2f16038b48b39bccca271134be00c5c9f0f623208399333c448/markupsafe-3.0.3-cp314-cp314t-win_amd64.whl", hash = "sha256:4faffd047e07c38848ce017e8725090413cd80cbc23d86e55c587bf979e579c9", size = 15426, upload-time = "2025-09-27T18:37:27.316Z" }, + { url = "https://files.pythonhosted.org/packages/70/bc/6f1c2f612465f5fa89b95bead1f44dcb607670fd42891d8fdcd5d039f4f4/markupsafe-3.0.3-cp314-cp314t-win_arm64.whl", hash = "sha256:32001d6a8fc98c8cb5c947787c5d08b0a50663d139f1305bac5885d98d9b40fa", size = 14146, upload-time = "2025-09-27T18:37:28.327Z" }, +] + +[[package]] +name = "pyyaml" +version = "6.0.3" +source = { registry = "https://pypi.org/simple" } +sdist = { url = "https://files.pythonhosted.org/packages/05/8e/961c0007c59b8dd7729d542c61a4d537767a59645b82a0b521206e1e25c2/pyyaml-6.0.3.tar.gz", hash = "sha256:d76623373421df22fb4cf8817020cbb7ef15c725b9d5e45f17e189bfc384190f", size = 130960, upload-time = "2025-09-25T21:33:16.546Z" } +wheels = [ + { url = "https://files.pythonhosted.org/packages/9d/8c/f4bd7f6465179953d3ac9bc44ac1a8a3e6122cf8ada906b4f96c60172d43/pyyaml-6.0.3-cp314-cp314-macosx_10_13_x86_64.whl", hash = "sha256:8d1fab6bb153a416f9aeb4b8763bc0f22a5586065f86f7664fc23339fc1c1fac", size = 181814, upload-time = "2025-09-25T21:32:35.712Z" }, + { url = "https://files.pythonhosted.org/packages/bd/9c/4d95bb87eb2063d20db7b60faa3840c1b18025517ae857371c4dd55a6b3a/pyyaml-6.0.3-cp314-cp314-macosx_11_0_arm64.whl", hash = "sha256:34d5fcd24b8445fadc33f9cf348c1047101756fd760b4dacb5c3e99755703310", size = 173809, upload-time = "2025-09-25T21:32:36.789Z" }, + { url = "https://files.pythonhosted.org/packages/92/b5/47e807c2623074914e29dabd16cbbdd4bf5e9b2db9f8090fa64411fc5382/pyyaml-6.0.3-cp314-cp314-manylinux2014_aarch64.manylinux_2_17_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:501a031947e3a9025ed4405a168e6ef5ae3126c59f90ce0cd6f2bfc477be31b7", size = 766454, upload-time = "2025-09-25T21:32:37.966Z" }, + { url = "https://files.pythonhosted.org/packages/02/9e/e5e9b168be58564121efb3de6859c452fccde0ab093d8438905899a3a483/pyyaml-6.0.3-cp314-cp314-manylinux2014_s390x.manylinux_2_17_s390x.manylinux_2_28_s390x.whl", hash = "sha256:b3bc83488de33889877a0f2543ade9f70c67d66d9ebb4ac959502e12de895788", size = 836355, upload-time = "2025-09-25T21:32:39.178Z" }, + { url = "https://files.pythonhosted.org/packages/88/f9/16491d7ed2a919954993e48aa941b200f38040928474c9e85ea9e64222c3/pyyaml-6.0.3-cp314-cp314-manylinux2014_x86_64.manylinux_2_17_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:c458b6d084f9b935061bc36216e8a69a7e293a2f1e68bf956dcd9e6cbcd143f5", size = 794175, upload-time = "2025-09-25T21:32:40.865Z" }, + { url = "https://files.pythonhosted.org/packages/dd/3f/5989debef34dc6397317802b527dbbafb2b4760878a53d4166579111411e/pyyaml-6.0.3-cp314-cp314-musllinux_1_2_aarch64.whl", hash = "sha256:7c6610def4f163542a622a73fb39f534f8c101d690126992300bf3207eab9764", size = 755228, upload-time = "2025-09-25T21:32:42.084Z" }, + { url = "https://files.pythonhosted.org/packages/d7/ce/af88a49043cd2e265be63d083fc75b27b6ed062f5f9fd6cdc223ad62f03e/pyyaml-6.0.3-cp314-cp314-musllinux_1_2_x86_64.whl", hash = "sha256:5190d403f121660ce8d1d2c1bb2ef1bd05b5f68533fc5c2ea899bd15f4399b35", size = 789194, upload-time = "2025-09-25T21:32:43.362Z" }, + { url = "https://files.pythonhosted.org/packages/23/20/bb6982b26a40bb43951265ba29d4c246ef0ff59c9fdcdf0ed04e0687de4d/pyyaml-6.0.3-cp314-cp314-win_amd64.whl", hash = "sha256:4a2e8cebe2ff6ab7d1050ecd59c25d4c8bd7e6f400f5f82b96557ac0abafd0ac", size = 156429, upload-time = "2025-09-25T21:32:57.844Z" }, + { url = "https://files.pythonhosted.org/packages/f4/f4/a4541072bb9422c8a883ab55255f918fa378ecf083f5b85e87fc2b4eda1b/pyyaml-6.0.3-cp314-cp314-win_arm64.whl", hash = "sha256:93dda82c9c22deb0a405ea4dc5f2d0cda384168e466364dec6255b293923b2f3", size = 143912, upload-time = "2025-09-25T21:32:59.247Z" }, + { url = "https://files.pythonhosted.org/packages/7c/f9/07dd09ae774e4616edf6cda684ee78f97777bdd15847253637a6f052a62f/pyyaml-6.0.3-cp314-cp314t-macosx_10_13_x86_64.whl", hash = "sha256:02893d100e99e03eda1c8fd5c441d8c60103fd175728e23e431db1b589cf5ab3", size = 189108, upload-time = "2025-09-25T21:32:44.377Z" }, + { url = "https://files.pythonhosted.org/packages/4e/78/8d08c9fb7ce09ad8c38ad533c1191cf27f7ae1effe5bb9400a46d9437fcf/pyyaml-6.0.3-cp314-cp314t-macosx_11_0_arm64.whl", hash = "sha256:c1ff362665ae507275af2853520967820d9124984e0f7466736aea23d8611fba", size = 183641, upload-time = "2025-09-25T21:32:45.407Z" }, + { url = "https://files.pythonhosted.org/packages/7b/5b/3babb19104a46945cf816d047db2788bcaf8c94527a805610b0289a01c6b/pyyaml-6.0.3-cp314-cp314t-manylinux2014_aarch64.manylinux_2_17_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:6adc77889b628398debc7b65c073bcb99c4a0237b248cacaf3fe8a557563ef6c", size = 831901, upload-time = "2025-09-25T21:32:48.83Z" }, + { url = "https://files.pythonhosted.org/packages/8b/cc/dff0684d8dc44da4d22a13f35f073d558c268780ce3c6ba1b87055bb0b87/pyyaml-6.0.3-cp314-cp314t-manylinux2014_s390x.manylinux_2_17_s390x.manylinux_2_28_s390x.whl", hash = "sha256:a80cb027f6b349846a3bf6d73b5e95e782175e52f22108cfa17876aaeff93702", size = 861132, upload-time = "2025-09-25T21:32:50.149Z" }, + { url = "https://files.pythonhosted.org/packages/b1/5e/f77dc6b9036943e285ba76b49e118d9ea929885becb0a29ba8a7c75e29fe/pyyaml-6.0.3-cp314-cp314t-manylinux2014_x86_64.manylinux_2_17_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:00c4bdeba853cc34e7dd471f16b4114f4162dc03e6b7afcc2128711f0eca823c", size = 839261, upload-time = "2025-09-25T21:32:51.808Z" }, + { url = "https://files.pythonhosted.org/packages/ce/88/a9db1376aa2a228197c58b37302f284b5617f56a5d959fd1763fb1675ce6/pyyaml-6.0.3-cp314-cp314t-musllinux_1_2_aarch64.whl", hash = "sha256:66e1674c3ef6f541c35191caae2d429b967b99e02040f5ba928632d9a7f0f065", size = 805272, upload-time = "2025-09-25T21:32:52.941Z" }, + { url = "https://files.pythonhosted.org/packages/da/92/1446574745d74df0c92e6aa4a7b0b3130706a4142b2d1a5869f2eaa423c6/pyyaml-6.0.3-cp314-cp314t-musllinux_1_2_x86_64.whl", hash = "sha256:16249ee61e95f858e83976573de0f5b2893b3677ba71c9dd36b9cf8be9ac6d65", size = 829923, upload-time = "2025-09-25T21:32:54.537Z" }, + { url = "https://files.pythonhosted.org/packages/f0/7a/1c7270340330e575b92f397352af856a8c06f230aa3e76f86b39d01b416a/pyyaml-6.0.3-cp314-cp314t-win_amd64.whl", hash = "sha256:4ad1906908f2f5ae4e5a8ddfce73c320c2a1429ec52eafd27138b7f1cbe341c9", size = 174062, upload-time = "2025-09-25T21:32:55.767Z" }, + { url = "https://files.pythonhosted.org/packages/f1/12/de94a39c2ef588c7e6455cfbe7343d3b2dc9d6b6b2f40c4c6565744c873d/pyyaml-6.0.3-cp314-cp314t-win_arm64.whl", hash = "sha256:ebc55a14a21cb14062aa4162f906cd962b28e2e9ea38f9b4391244cd8de4ae0b", size = 149341, upload-time = "2025-09-25T21:32:56.828Z" }, +] + +[[package]] +name = "ruff" +version = "0.14.14" +source = { registry = "https://pypi.org/simple" } +sdist = { url = "https://files.pythonhosted.org/packages/2e/06/f71e3a86b2df0dfa2d2f72195941cd09b44f87711cb7fa5193732cb9a5fc/ruff-0.14.14.tar.gz", hash = "sha256:2d0f819c9a90205f3a867dbbd0be083bee9912e170fd7d9704cc8ae45824896b", size = 4515732, upload-time = "2026-01-22T22:30:17.527Z" } +wheels = [ + { url = "https://files.pythonhosted.org/packages/d2/89/20a12e97bc6b9f9f68343952da08a8099c57237aef953a56b82711d55edd/ruff-0.14.14-py3-none-linux_armv6l.whl", hash = "sha256:7cfe36b56e8489dee8fbc777c61959f60ec0f1f11817e8f2415f429552846aed", size = 10467650, upload-time = "2026-01-22T22:30:08.578Z" }, + { url = "https://files.pythonhosted.org/packages/a3/b1/c5de3fd2d5a831fcae21beda5e3589c0ba67eec8202e992388e4b17a6040/ruff-0.14.14-py3-none-macosx_10_12_x86_64.whl", hash = "sha256:6006a0082336e7920b9573ef8a7f52eec837add1265cc74e04ea8a4368cd704c", size = 10883245, upload-time = "2026-01-22T22:30:04.155Z" }, + { url = "https://files.pythonhosted.org/packages/b8/7c/3c1db59a10e7490f8f6f8559d1db8636cbb13dccebf18686f4e3c9d7c772/ruff-0.14.14-py3-none-macosx_11_0_arm64.whl", hash = "sha256:026c1d25996818f0bf498636686199d9bd0d9d6341c9c2c3b62e2a0198b758de", size = 10231273, upload-time = "2026-01-22T22:30:34.642Z" }, + { url = "https://files.pythonhosted.org/packages/a1/6e/5e0e0d9674be0f8581d1f5e0f0a04761203affce3232c1a1189d0e3b4dad/ruff-0.14.14-py3-none-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:f666445819d31210b71e0a6d1c01e24447a20b85458eea25a25fe8142210ae0e", size = 10585753, upload-time = "2026-01-22T22:30:31.781Z" }, + { url = "https://files.pythonhosted.org/packages/23/09/754ab09f46ff1884d422dc26d59ba18b4e5d355be147721bb2518aa2a014/ruff-0.14.14-py3-none-manylinux_2_17_armv7l.manylinux2014_armv7l.whl", hash = "sha256:3c0f18b922c6d2ff9a5e6c3ee16259adc513ca775bcf82c67ebab7cbd9da5bc8", size = 10286052, upload-time = "2026-01-22T22:30:24.827Z" }, + { url = "https://files.pythonhosted.org/packages/c8/cc/e71f88dd2a12afb5f50733851729d6b571a7c3a35bfdb16c3035132675a0/ruff-0.14.14-py3-none-manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:1629e67489c2dea43e8658c3dba659edbfd87361624b4040d1df04c9740ae906", size = 11043637, upload-time = "2026-01-22T22:30:13.239Z" }, + { url = "https://files.pythonhosted.org/packages/67/b2/397245026352494497dac935d7f00f1468c03a23a0c5db6ad8fc49ca3fb2/ruff-0.14.14-py3-none-manylinux_2_17_ppc64.manylinux2014_ppc64.whl", hash = "sha256:27493a2131ea0f899057d49d303e4292b2cae2bb57253c1ed1f256fbcd1da480", size = 12194761, upload-time = "2026-01-22T22:30:22.542Z" }, + { url = "https://files.pythonhosted.org/packages/5b/06/06ef271459f778323112c51b7587ce85230785cd64e91772034ddb88f200/ruff-0.14.14-py3-none-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:01ff589aab3f5b539e35db38425da31a57521efd1e4ad1ae08fc34dbe30bd7df", size = 12005701, upload-time = "2026-01-22T22:30:20.499Z" }, + { url = "https://files.pythonhosted.org/packages/41/d6/99364514541cf811ccc5ac44362f88df66373e9fec1b9d1c4cc830593fe7/ruff-0.14.14-py3-none-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:1cc12d74eef0f29f51775f5b755913eb523546b88e2d733e1d701fe65144e89b", size = 11282455, upload-time = "2026-01-22T22:29:59.679Z" }, + { url = "https://files.pythonhosted.org/packages/ca/71/37daa46f89475f8582b7762ecd2722492df26421714a33e72ccc9a84d7a5/ruff-0.14.14-py3-none-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:bb8481604b7a9e75eff53772496201690ce2687067e038b3cc31aaf16aa0b974", size = 11215882, upload-time = "2026-01-22T22:29:57.032Z" }, + { url = "https://files.pythonhosted.org/packages/2c/10/a31f86169ec91c0705e618443ee74ede0bdd94da0a57b28e72db68b2dbac/ruff-0.14.14-py3-none-manylinux_2_31_riscv64.whl", hash = "sha256:14649acb1cf7b5d2d283ebd2f58d56b75836ed8c6f329664fa91cdea19e76e66", size = 11180549, upload-time = "2026-01-22T22:30:27.175Z" }, + { url = "https://files.pythonhosted.org/packages/fd/1e/c723f20536b5163adf79bdd10c5f093414293cdf567eed9bdb7b83940f3f/ruff-0.14.14-py3-none-musllinux_1_2_aarch64.whl", hash = "sha256:e8058d2145566510790eab4e2fad186002e288dec5e0d343a92fe7b0bc1b3e13", size = 10543416, upload-time = "2026-01-22T22:30:01.964Z" }, + { url = "https://files.pythonhosted.org/packages/3e/34/8a84cea7e42c2d94ba5bde1d7a4fae164d6318f13f933d92da6d7c2041ff/ruff-0.14.14-py3-none-musllinux_1_2_armv7l.whl", hash = "sha256:e651e977a79e4c758eb807f0481d673a67ffe53cfa92209781dfa3a996cf8412", size = 10285491, upload-time = "2026-01-22T22:30:29.51Z" }, + { url = "https://files.pythonhosted.org/packages/55/ef/b7c5ea0be82518906c978e365e56a77f8de7678c8bb6651ccfbdc178c29f/ruff-0.14.14-py3-none-musllinux_1_2_i686.whl", hash = "sha256:cc8b22da8d9d6fdd844a68ae937e2a0adf9b16514e9a97cc60355e2d4b219fc3", size = 10733525, upload-time = "2026-01-22T22:30:06.499Z" }, + { url = "https://files.pythonhosted.org/packages/6a/5b/aaf1dfbcc53a2811f6cc0a1759de24e4b03e02ba8762daabd9b6bd8c59e3/ruff-0.14.14-py3-none-musllinux_1_2_x86_64.whl", hash = "sha256:16bc890fb4cc9781bb05beb5ab4cd51be9e7cb376bf1dd3580512b24eb3fda2b", size = 11315626, upload-time = "2026-01-22T22:30:36.848Z" }, + { url = "https://files.pythonhosted.org/packages/2c/aa/9f89c719c467dfaf8ad799b9bae0df494513fb21d31a6059cb5870e57e74/ruff-0.14.14-py3-none-win32.whl", hash = "sha256:b530c191970b143375b6a68e6f743800b2b786bbcf03a7965b06c4bf04568167", size = 10502442, upload-time = "2026-01-22T22:30:38.93Z" }, + { url = "https://files.pythonhosted.org/packages/87/44/90fa543014c45560cae1fffc63ea059fb3575ee6e1cb654562197e5d16fb/ruff-0.14.14-py3-none-win_amd64.whl", hash = "sha256:3dde1435e6b6fe5b66506c1dff67a421d0b7f6488d466f651c07f4cab3bf20fd", size = 11630486, upload-time = "2026-01-22T22:30:10.852Z" }, + { url = "https://files.pythonhosted.org/packages/9e/6a/40fee331a52339926a92e17ae748827270b288a35ef4a15c9c8f2ec54715/ruff-0.14.14-py3-none-win_arm64.whl", hash = "sha256:56e6981a98b13a32236a72a8da421d7839221fa308b223b9283312312e5ac76c", size = 10920448, upload-time = "2026-01-22T22:30:15.417Z" }, +] diff --git a/packages/bigframes/scripts/templates/license.py.j2 b/packages/bigframes/scripts/templates/license.py.j2 new file mode 100644 index 000000000000..58d482ea3866 --- /dev/null +++ b/packages/bigframes/scripts/templates/license.py.j2 @@ -0,0 +1,13 @@ +# Copyright 2026 Google LLC +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. diff --git a/packages/bigframes/scripts/templates/operation.py.j2 b/packages/bigframes/scripts/templates/operation.py.j2 new file mode 100644 index 000000000000..7e7c7f95b62f --- /dev/null +++ b/packages/bigframes/scripts/templates/operation.py.j2 @@ -0,0 +1,46 @@ +{% include 'license.py.j2' %} + +# +# DO NOT MODIFY THIS FILE DIRECTLY. +# This file was generated from: {{ yaml_path }} +# by the script: {{ script_path }} + +from __future__ import annotations + +import datetime +from typing import Any, Literal, Optional, TypeVar, Union + +from bigframes import dtypes +import bigframes.bigquery._googlesql +import bigframes.core.col +import bigframes.core.expression as ex +import bigframes.core.sentinels as sentinels +from bigframes.operations import googlesql +import bigframes.operations as ops +import bigframes.series as series + +T = TypeVar("T", series.Series, bigframes.core.col.Expression) + +{% for op in ops %} +{{ op.internal_name }} = googlesql.GoogleSqlScalarOp( + "{{ op.sql_name }}", + args=({{ op.arg_specs }}), + signature={{ op.signature }}, +) +{% endfor %} +{% for func in functions %} + + +def {{ func.name }}( +{% for arg in func.args %} + {{ arg.name }}: Union[T, bigframes.core.col.Expression, {{ arg.type_hint }}]{% if arg.default %} = {{ arg.default }}{% endif %}, +{% endfor %} +) -> T: + """{{ func.description }}""" + return bigframes.bigquery._googlesql.apply_googlesql_scalar_op( + {{ func.op_name }}, +{% for arg in func.args %} + {{ arg.name }}, +{% endfor %} + ) # type: ignore +{% endfor %} diff --git a/packages/bigframes/scripts/templates/test_operation.py.j2 b/packages/bigframes/scripts/templates/test_operation.py.j2 new file mode 100644 index 000000000000..aa87fcb94bee --- /dev/null +++ b/packages/bigframes/scripts/templates/test_operation.py.j2 @@ -0,0 +1,28 @@ +{% include 'license.py.j2' %} + +# +# DO NOT MODIFY THIS FILE DIRECTLY. +# This file was generated from: {{ yaml_path }} +# by the script: {{ script_path }} + +from typing import cast + +import pytest + +import bigframes.pandas as bpd +import {{ import_path }} as {{ short_name }} + +pytest.importorskip("pytest_snapshot") + + +{% for func in functions %} +def test_{{ func.name }}(scalar_types_df: bpd.DataFrame, snapshot): + result = {{ short_name }}.{{ func.name }}( +{% for arg in func.test_args %} + cast(bpd.Series, scalar_types_df["{{ arg.col_name }}"]), +{% endfor %} + ).to_frame() + snapshot.assert_match(result.sql.rstrip() + "\n", "out.sql") + + +{% endfor %} diff --git a/packages/bigframes/specs/bigframes-bigquery-generator.md b/packages/bigframes/specs/bigframes-bigquery-generator.md new file mode 100644 index 000000000000..26d1d45c9f51 --- /dev/null +++ b/packages/bigframes/specs/bigframes-bigquery-generator.md @@ -0,0 +1,101 @@ +# Code generation for bigframes.bigquery + +This document describes code generation for the `bigframes.bigquery` modules. +For detailed specifications on input and output types, refer to +[Contributing to bigframes.bigquery](./bigframes-bigquery-contributing.md). + +## Overview + +The script at `packages/bigframes/scripts/generate_bigframes_bigquery.py` +generates python submodules for the `bigframes.bigquery` module. When run +without any arguments, it iterates through all yaml files at +`packages/bigframes/scripts/data/sql-functions/**/*.yaml` to generate the code. + +The script at `packages/bigframes/scripts/check_bigframes_bigquery.py` iterates +through all the same yaml files and checks that the functions have been included +in the `bigframes.bigquery` module, as the `__init__.py` file requires manual +updates. + +## Running the generator + +Since the dependencies for the script differ from that of bigframes +and its test suite, use the self-contained Python script technique described at +https://docs.astral.sh/uv/guides/scripts/ +to automatically manage dependencies using `uv`. Therefore, the header of the +script will look something like: + +```python +#!/usr/bin/env -S uv run --script +# +# /// script +# dependencies = [ +# "jinja2", +# "pyyaml", +# ] +# /// +# +# Copyright 2026 Google LLC +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# ... +``` + +To run the script: + +```bash +cd packages/bigframes +uv run scripts/generate_bigframes_bigquery.py +``` + +To improve reproducibility, we also check in the uv lock file generated by +running `uv lock --script scripts/generate_bigframes_bigquery.py`. + +## Generated code organization + +The `generate_bigframes_bigquery.py` script generates submodules of +`bigframes.bigquery._operations`, with the full path reflecting the organization +of the YAML files. For example, a YAML file at +`packages/bigframes/scripts/data/sql-functions/aead.yaml` corresponds to a +generated Python module at `bigframes.bigquery._operations.aead`. Likewise, +`packages/bigframes/scripts/data/sql-functions/builtins/bit.yaml` corresponds +to the `bigframes.bigquery._operations.builtins.bit` submodule. + +## Generated module implementation + +Each generated module has all functions defined in the YAML file converted to +the equivalent Python definition, including keyword arguments and docstrings. + +### Code generation + +The code will be templated using the jinja2 template engine. This allows +proposed changes to the templated code to be reviewed more easily. + +### Handling optional arguments + +When the user calls a Python function without specifying the optional +argument, that argument is omitted from the SQL text. To allow for explicit +NULL values to be passed in (None in Python), the default value is specified +to be a default sentinel value enum `bigframes.core.sentinels.DEFAULT`. For +example: + +```python +import bigframes.core.sentinels + +def current_date( + time_zone_expression: str | bigframes.core.sentinels.Default = bigframes.core.sentinels.DEFAULT, +): + ... +``` + +### Input and output types + +Refer to the table in +[Contributing to bigframes.bigquery](./bigframes-bigquery-contributing.md). + +### Internal bigframes operator + +Scalar functions should generate an expression using the `GoogleSqlScalarOp`. +This keeps the implementation as scalar SQL functions consistent. + +Aggregate, analytic, and table-valued functions currently require custom ops. As +such, those functions are currently out of scope for this generator. diff --git a/packages/bigframes/tests/unit/bigquery/_operations/snapshots/test_aead/test_decrypt_bytes/out.sql b/packages/bigframes/tests/unit/bigquery/_operations/snapshots/test_aead/test_decrypt_bytes/out.sql new file mode 100644 index 000000000000..5b8b6416b36f --- /dev/null +++ b/packages/bigframes/tests/unit/bigquery/_operations/snapshots/test_aead/test_decrypt_bytes/out.sql @@ -0,0 +1,4 @@ +SELECT + `rowindex`, + AEAD.DECRYPT_BYTES(`bytes_col`, `bytes_col`, `bytes_col`) AS `0` +FROM `bigframes-dev`.`sqlglot_test`.`scalar_types` AS `bft_0` diff --git a/packages/bigframes/tests/unit/bigquery/_operations/snapshots/test_aead/test_decrypt_string/out.sql b/packages/bigframes/tests/unit/bigquery/_operations/snapshots/test_aead/test_decrypt_string/out.sql new file mode 100644 index 000000000000..97b1ccff9c75 --- /dev/null +++ b/packages/bigframes/tests/unit/bigquery/_operations/snapshots/test_aead/test_decrypt_string/out.sql @@ -0,0 +1,4 @@ +SELECT + `rowindex`, + AEAD.DECRYPT_STRING(`bytes_col`, `bytes_col`, `string_col`) AS `0` +FROM `bigframes-dev`.`sqlglot_test`.`scalar_types` AS `bft_0` diff --git a/packages/bigframes/tests/unit/bigquery/_operations/snapshots/test_aead/test_encrypt/out.sql b/packages/bigframes/tests/unit/bigquery/_operations/snapshots/test_aead/test_encrypt/out.sql new file mode 100644 index 000000000000..9ab9f8c0a7bb --- /dev/null +++ b/packages/bigframes/tests/unit/bigquery/_operations/snapshots/test_aead/test_encrypt/out.sql @@ -0,0 +1,4 @@ +SELECT + `rowindex`, + AEAD.ENCRYPT(`bytes_col`, `bytes_col`, `bytes_col`) AS `0` +FROM `bigframes-dev`.`sqlglot_test`.`scalar_types` AS `bft_0` diff --git a/packages/bigframes/tests/unit/bigquery/_operations/test_aead.py b/packages/bigframes/tests/unit/bigquery/_operations/test_aead.py new file mode 100644 index 000000000000..f9f8fdd2dd88 --- /dev/null +++ b/packages/bigframes/tests/unit/bigquery/_operations/test_aead.py @@ -0,0 +1,53 @@ +# Copyright 2026 Google LLC +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# +# DO NOT MODIFY THIS FILE DIRECTLY. +# This file was generated from: scripts/data/sql-functions/aead.yaml +# by the script: scripts/generate_bigframes_bigquery.py + +from typing import cast + +import pytest + +import bigframes.bigquery._operations.aead as aead +import bigframes.pandas as bpd + +pytest.importorskip("pytest_snapshot") + + +def test_decrypt_bytes(scalar_types_df: bpd.DataFrame, snapshot): + result = aead.decrypt_bytes( + cast(bpd.Series, scalar_types_df["bytes_col"]), + cast(bpd.Series, scalar_types_df["bytes_col"]), + cast(bpd.Series, scalar_types_df["bytes_col"]), + ).to_frame() + snapshot.assert_match(result.sql.rstrip() + "\n", "out.sql") + + +def test_decrypt_string(scalar_types_df: bpd.DataFrame, snapshot): + result = aead.decrypt_string( + cast(bpd.Series, scalar_types_df["bytes_col"]), + cast(bpd.Series, scalar_types_df["bytes_col"]), + cast(bpd.Series, scalar_types_df["string_col"]), + ).to_frame() + snapshot.assert_match(result.sql.rstrip() + "\n", "out.sql") + + +def test_encrypt(scalar_types_df: bpd.DataFrame, snapshot): + result = aead.encrypt( + cast(bpd.Series, scalar_types_df["bytes_col"]), + cast(bpd.Series, scalar_types_df["bytes_col"]), + cast(bpd.Series, scalar_types_df["bytes_col"]), + ).to_frame() + snapshot.assert_match(result.sql.rstrip() + "\n", "out.sql") diff --git a/packages/bigframes/tests/unit/conftest.py b/packages/bigframes/tests/unit/conftest.py index a9b26afeef29..3ab217cf09ba 100644 --- a/packages/bigframes/tests/unit/conftest.py +++ b/packages/bigframes/tests/unit/conftest.py @@ -12,7 +12,22 @@ # See the License for the specific language governing permissions and # limitations under the License. +import pathlib +import typing + +import pandas as pd +import pyarrow as pa import pytest +from google.cloud import bigquery + +import bigframes.core as core +import bigframes.pandas as bpd +import bigframes.testing.mocks as mocks +import bigframes.testing.utils +from bigframes import dtypes + +CURRENT_DIR = pathlib.Path(__file__).parent +DATA_DIR = CURRENT_DIR.parent / "data" @pytest.fixture(scope="session") @@ -22,3 +37,253 @@ def polars_session(): from bigframes.testing import polars_session return polars_session.TestSession() + + +def _create_compiler_session(table_name, table_schema): + """Helper function to create a compiler session.""" + from bigframes.testing import compiler_session + + anonymous_dataset = bigquery.DatasetReference.from_string( + "bigframes-dev.sqlglot_test" + ) + session = mocks.create_bigquery_session( + table_name=table_name, + table_schema=table_schema, + anonymous_dataset=anonymous_dataset, + ) + session._executor = compiler_session.SQLCompilerExecutor() + return session + + +@pytest.fixture(scope="session") +def compiler_session(scalar_types_table_schema): + """Compiler session for scalar types.""" + return _create_compiler_session("scalar_types", scalar_types_table_schema) + + +@pytest.fixture(scope="session") +def compiler_session_w_repeated_types(repeated_types_table_schema): + """Compiler session for repeated data types.""" + return _create_compiler_session("repeated_types", repeated_types_table_schema) + + +@pytest.fixture(scope="session") +def compiler_session_w_nested_structs_types(nested_structs_types_table_schema): + """Compiler session for nested STRUCT data types.""" + return _create_compiler_session( + "nested_structs_types", nested_structs_types_table_schema + ) + + +@pytest.fixture(scope="session") +def compiler_session_w_json_types(json_types_table_schema): + """Compiler session for JSON data types.""" + return _create_compiler_session("json_types", json_types_table_schema) + + +@pytest.fixture(scope="session") +def scalar_types_table_schema() -> typing.Sequence[bigquery.SchemaField]: + return [ + bigquery.SchemaField("bool_col", "BOOLEAN"), + bigquery.SchemaField("bytes_col", "BYTES"), + bigquery.SchemaField("date_col", "DATE"), + bigquery.SchemaField("datetime_col", "DATETIME"), + bigquery.SchemaField("geography_col", "GEOGRAPHY"), + bigquery.SchemaField("int64_col", "INTEGER"), + bigquery.SchemaField("int64_too", "INTEGER"), + bigquery.SchemaField("numeric_col", "NUMERIC"), + bigquery.SchemaField("float64_col", "FLOAT"), + bigquery.SchemaField("rowindex", "INTEGER"), + bigquery.SchemaField("rowindex_2", "INTEGER", mode="REQUIRED"), + bigquery.SchemaField("string_col", "STRING"), + bigquery.SchemaField("time_col", "TIME"), + bigquery.SchemaField("timestamp_col", "TIMESTAMP"), + bigquery.SchemaField("duration_col", "INTEGER"), + ] + + +@pytest.fixture(scope="session") +def scalar_types_df(compiler_session) -> bpd.DataFrame: + """Returns a BigFrames DataFrame containing all scalar types and using the `rowindex` + column as the index.""" + bf_df = compiler_session._loader.read_gbq_table( + "bigframes-dev.sqlglot_test.scalar_types", + enable_snapshot=False, + ) + bf_df = bf_df.set_index("rowindex", drop=False) + return bf_df + + +@pytest.fixture(scope="session") +def scalar_types_pandas_df() -> pd.DataFrame: + """Returns a pandas DataFrame containing all scalar types and using the `rowindex` + column as the index.""" + # TODO: add tests for empty dataframes + df = pd.read_json( + DATA_DIR / "scalars.jsonl", + lines=True, + ) + bigframes.testing.utils.convert_pandas_dtypes(df, bytes_col=True) + + df = df.set_index("rowindex", drop=False) + return df + + +@pytest.fixture(scope="module") +def scalar_types_array_value( + scalar_types_pandas_df: pd.DataFrame, compiler_session: bigframes.Session +) -> core.ArrayValue: + managed_data_source = core.local_data.ManagedArrowTable.from_pandas( + scalar_types_pandas_df + ) + return core.ArrayValue.from_managed(managed_data_source, compiler_session) + + +@pytest.fixture(scope="session") +def nested_structs_types_table_schema() -> typing.Sequence[bigquery.SchemaField]: + return [ + bigquery.SchemaField("id", "INTEGER"), + bigquery.SchemaField( + "people", + "RECORD", + fields=[ + bigquery.SchemaField("name", "STRING"), + bigquery.SchemaField("age", "INTEGER"), + bigquery.SchemaField( + "address", + "RECORD", + fields=[ + bigquery.SchemaField("city", "STRING"), + bigquery.SchemaField("country", "STRING"), + ], + ), + ], + ), + ] + + +@pytest.fixture(scope="session") +def nested_structs_types_df(compiler_session_w_nested_structs_types) -> bpd.DataFrame: + """Returns a BigFrames DataFrame containing all scalar types and using the `rowindex` + column as the index.""" + bf_df = compiler_session_w_nested_structs_types._loader.read_gbq_table( + "bigframes-dev.sqlglot_test.nested_structs_types", + enable_snapshot=False, + ) + bf_df = bf_df.set_index("id", drop=False) + return bf_df + + +@pytest.fixture(scope="session") +def nested_structs_pandas_df() -> pd.DataFrame: + """Returns a pandas DataFrame containing STRUCT types and using the `id` + column as the index.""" + + df = pd.read_json( + DATA_DIR / "nested_structs.jsonl", + lines=True, + ) + df = df.set_index("id") + + address_struct_schema = pa.struct( + [pa.field("city", pa.string()), pa.field("country", pa.string())] + ) + person_struct_schema = pa.struct( + [ + pa.field("name", pa.string()), + pa.field("age", pa.int64()), + pa.field("address", address_struct_schema), + ] + ) + df["person"] = df["person"].astype(pd.ArrowDtype(person_struct_schema)) + return df + + +@pytest.fixture(scope="session") +def repeated_types_table_schema() -> typing.Sequence[bigquery.SchemaField]: + return [ + bigquery.SchemaField("rowindex", "INTEGER"), + bigquery.SchemaField("int_list_col", "INTEGER", "REPEATED"), + bigquery.SchemaField("bool_list_col", "BOOLEAN", "REPEATED"), + bigquery.SchemaField("float_list_col", "FLOAT", "REPEATED"), + bigquery.SchemaField("date_list_col", "DATE", "REPEATED"), + bigquery.SchemaField("date_time_list_col", "DATETIME", "REPEATED"), + bigquery.SchemaField("numeric_list_col", "NUMERIC", "REPEATED"), + bigquery.SchemaField("string_list_col", "STRING", "REPEATED"), + ] + + +@pytest.fixture(scope="session") +def repeated_types_df(compiler_session_w_repeated_types) -> bpd.DataFrame: + """Returns a BigFrames DataFrame containing all scalar types and using the `rowindex` + column as the index.""" + bf_df = compiler_session_w_repeated_types._loader.read_gbq_table( + "bigframes-dev.sqlglot_test.repeated_types", + enable_snapshot=False, + ) + bf_df = bf_df.set_index("rowindex", drop=False) + return bf_df + + +@pytest.fixture(scope="session") +def repeated_types_pandas_df() -> pd.DataFrame: + """Returns a pandas DataFrame containing LIST types and using the `rowindex` + column as the index.""" + + df = pd.read_json( + DATA_DIR / "repeated.jsonl", + lines=True, + ) + # TODO: add dtype conversion here if needed. + df = df.set_index("rowindex") + return df + + +@pytest.fixture(scope="session") +def json_types_table_schema() -> typing.Sequence[bigquery.SchemaField]: + return [ + bigquery.SchemaField("rowindex", "INTEGER"), + bigquery.SchemaField("json_col", "JSON"), + ] + + +@pytest.fixture(scope="session") +def json_types_df(compiler_session_w_json_types) -> bpd.DataFrame: + """Returns a BigFrames DataFrame containing JSON types and using the `rowindex` + column as the index.""" + bf_df = compiler_session_w_json_types._loader.read_gbq_table( + "bigframes-dev.sqlglot_test.json_types", + enable_snapshot=False, + ) + # TODO(b/427305807): Why `drop=False` will produce two "rowindex" columns? + bf_df = bf_df.set_index("rowindex", drop=True) + return bf_df + + +@pytest.fixture(scope="session") +def json_pandas_df() -> pd.DataFrame: + """Returns a pandas DataFrame containing JSON types and using the `rowindex` + column as the index.""" + json_data = [ + "null", + "true", + "100", + "0.98", + '"a string"', + "[]", + "[1, 2, 3]", + '[{"a": 1}, {"a": 2}, {"a": null}, {}]', + '"100"', + '{"date": "2024-07-16"}', + '{"int_value": 2, "null_filed": null}', + '{"list_data": [10, 20, 30]}', + ] + df = pd.DataFrame( + { + "rowindex": pd.Series(range(len(json_data)), dtype=dtypes.INT_DTYPE), + "json_col": pd.Series(json_data, dtype=dtypes.JSON_DTYPE), + }, + ) + # TODO(b/427305807): Why `drop=False` will produce two "rowindex" columns? + df = df.set_index("rowindex", drop=True) + return df diff --git a/packages/bigframes/tests/unit/core/compile/sqlglot/conftest.py b/packages/bigframes/tests/unit/core/compile/sqlglot/conftest.py deleted file mode 100644 index fd914f589a50..000000000000 --- a/packages/bigframes/tests/unit/core/compile/sqlglot/conftest.py +++ /dev/null @@ -1,280 +0,0 @@ -# Copyright 2025 Google LLC -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. - -import pathlib -import typing - -import pandas as pd -import pyarrow as pa -import pytest -from google.cloud import bigquery - -import bigframes.core as core -import bigframes.pandas as bpd -import bigframes.testing.mocks as mocks -import bigframes.testing.utils -from bigframes import dtypes - -CURRENT_DIR = pathlib.Path(__file__).parent -DATA_DIR = CURRENT_DIR.parent.parent.parent.parent / "data" - - -def _create_compiler_session(table_name, table_schema): - """Helper function to create a compiler session.""" - from bigframes.testing import compiler_session - - anonymous_dataset = bigquery.DatasetReference.from_string( - "bigframes-dev.sqlglot_test" - ) - session = mocks.create_bigquery_session( - table_name=table_name, - table_schema=table_schema, - anonymous_dataset=anonymous_dataset, - ) - session._executor = compiler_session.SQLCompilerExecutor() - return session - - -@pytest.fixture(scope="session") -def compiler_session(scalar_types_table_schema): - """Compiler session for scalar types.""" - return _create_compiler_session("scalar_types", scalar_types_table_schema) - - -@pytest.fixture(scope="session") -def compiler_session_w_repeated_types(repeated_types_table_schema): - """Compiler session for repeated data types.""" - return _create_compiler_session("repeated_types", repeated_types_table_schema) - - -@pytest.fixture(scope="session") -def compiler_session_w_nested_structs_types(nested_structs_types_table_schema): - """Compiler session for nested STRUCT data types.""" - return _create_compiler_session( - "nested_structs_types", nested_structs_types_table_schema - ) - - -@pytest.fixture(scope="session") -def compiler_session_w_json_types(json_types_table_schema): - """Compiler session for JSON data types.""" - return _create_compiler_session("json_types", json_types_table_schema) - - -@pytest.fixture(scope="session") -def scalar_types_table_schema() -> typing.Sequence[bigquery.SchemaField]: - return [ - bigquery.SchemaField("bool_col", "BOOLEAN"), - bigquery.SchemaField("bytes_col", "BYTES"), - bigquery.SchemaField("date_col", "DATE"), - bigquery.SchemaField("datetime_col", "DATETIME"), - bigquery.SchemaField("geography_col", "GEOGRAPHY"), - bigquery.SchemaField("int64_col", "INTEGER"), - bigquery.SchemaField("int64_too", "INTEGER"), - bigquery.SchemaField("numeric_col", "NUMERIC"), - bigquery.SchemaField("float64_col", "FLOAT"), - bigquery.SchemaField("rowindex", "INTEGER"), - bigquery.SchemaField("rowindex_2", "INTEGER", mode="REQUIRED"), - bigquery.SchemaField("string_col", "STRING"), - bigquery.SchemaField("time_col", "TIME"), - bigquery.SchemaField("timestamp_col", "TIMESTAMP"), - bigquery.SchemaField("duration_col", "INTEGER"), - ] - - -@pytest.fixture(scope="session") -def scalar_types_df(compiler_session) -> bpd.DataFrame: - """Returns a BigFrames DataFrame containing all scalar types and using the `rowindex` - column as the index.""" - bf_df = compiler_session._loader.read_gbq_table( - "bigframes-dev.sqlglot_test.scalar_types", - enable_snapshot=False, - ) - bf_df = bf_df.set_index("rowindex", drop=False) - return bf_df - - -@pytest.fixture(scope="session") -def scalar_types_pandas_df() -> pd.DataFrame: - """Returns a pandas DataFrame containing all scalar types and using the `rowindex` - column as the index.""" - # TODO: add tests for empty dataframes - df = pd.read_json( - DATA_DIR / "scalars.jsonl", - lines=True, - ) - bigframes.testing.utils.convert_pandas_dtypes(df, bytes_col=True) - - df = df.set_index("rowindex", drop=False) - return df - - -@pytest.fixture(scope="module") -def scalar_types_array_value( - scalar_types_pandas_df: pd.DataFrame, compiler_session: bigframes.Session -) -> core.ArrayValue: - managed_data_source = core.local_data.ManagedArrowTable.from_pandas( - scalar_types_pandas_df - ) - return core.ArrayValue.from_managed(managed_data_source, compiler_session) - - -@pytest.fixture(scope="session") -def nested_structs_types_table_schema() -> typing.Sequence[bigquery.SchemaField]: - return [ - bigquery.SchemaField("id", "INTEGER"), - bigquery.SchemaField( - "people", - "RECORD", - fields=[ - bigquery.SchemaField("name", "STRING"), - bigquery.SchemaField("age", "INTEGER"), - bigquery.SchemaField( - "address", - "RECORD", - fields=[ - bigquery.SchemaField("city", "STRING"), - bigquery.SchemaField("country", "STRING"), - ], - ), - ], - ), - ] - - -@pytest.fixture(scope="session") -def nested_structs_types_df(compiler_session_w_nested_structs_types) -> bpd.DataFrame: - """Returns a BigFrames DataFrame containing all scalar types and using the `rowindex` - column as the index.""" - bf_df = compiler_session_w_nested_structs_types._loader.read_gbq_table( - "bigframes-dev.sqlglot_test.nested_structs_types", - enable_snapshot=False, - ) - bf_df = bf_df.set_index("id", drop=False) - return bf_df - - -@pytest.fixture(scope="session") -def nested_structs_pandas_df() -> pd.DataFrame: - """Returns a pandas DataFrame containing STRUCT types and using the `id` - column as the index.""" - - df = pd.read_json( - DATA_DIR / "nested_structs.jsonl", - lines=True, - ) - df = df.set_index("id") - - address_struct_schema = pa.struct( - [pa.field("city", pa.string()), pa.field("country", pa.string())] - ) - person_struct_schema = pa.struct( - [ - pa.field("name", pa.string()), - pa.field("age", pa.int64()), - pa.field("address", address_struct_schema), - ] - ) - df["person"] = df["person"].astype(pd.ArrowDtype(person_struct_schema)) - return df - - -@pytest.fixture(scope="session") -def repeated_types_table_schema() -> typing.Sequence[bigquery.SchemaField]: - return [ - bigquery.SchemaField("rowindex", "INTEGER"), - bigquery.SchemaField("int_list_col", "INTEGER", "REPEATED"), - bigquery.SchemaField("bool_list_col", "BOOLEAN", "REPEATED"), - bigquery.SchemaField("float_list_col", "FLOAT", "REPEATED"), - bigquery.SchemaField("date_list_col", "DATE", "REPEATED"), - bigquery.SchemaField("date_time_list_col", "DATETIME", "REPEATED"), - bigquery.SchemaField("numeric_list_col", "NUMERIC", "REPEATED"), - bigquery.SchemaField("string_list_col", "STRING", "REPEATED"), - ] - - -@pytest.fixture(scope="session") -def repeated_types_df(compiler_session_w_repeated_types) -> bpd.DataFrame: - """Returns a BigFrames DataFrame containing all scalar types and using the `rowindex` - column as the index.""" - bf_df = compiler_session_w_repeated_types._loader.read_gbq_table( - "bigframes-dev.sqlglot_test.repeated_types", - enable_snapshot=False, - ) - bf_df = bf_df.set_index("rowindex", drop=False) - return bf_df - - -@pytest.fixture(scope="session") -def repeated_types_pandas_df() -> pd.DataFrame: - """Returns a pandas DataFrame containing LIST types and using the `rowindex` - column as the index.""" - - df = pd.read_json( - DATA_DIR / "repeated.jsonl", - lines=True, - ) - # TODO: add dtype conversion here if needed. - df = df.set_index("rowindex") - return df - - -@pytest.fixture(scope="session") -def json_types_table_schema() -> typing.Sequence[bigquery.SchemaField]: - return [ - bigquery.SchemaField("rowindex", "INTEGER"), - bigquery.SchemaField("json_col", "JSON"), - ] - - -@pytest.fixture(scope="session") -def json_types_df(compiler_session_w_json_types) -> bpd.DataFrame: - """Returns a BigFrames DataFrame containing JSON types and using the `rowindex` - column as the index.""" - bf_df = compiler_session_w_json_types._loader.read_gbq_table( - "bigframes-dev.sqlglot_test.json_types", - enable_snapshot=False, - ) - # TODO(b/427305807): Why `drop=False` will produce two "rowindex" columns? - bf_df = bf_df.set_index("rowindex", drop=True) - return bf_df - - -@pytest.fixture(scope="session") -def json_pandas_df() -> pd.DataFrame: - """Returns a pandas DataFrame containing JSON types and using the `rowindex` - column as the index.""" - json_data = [ - "null", - "true", - "100", - "0.98", - '"a string"', - "[]", - "[1, 2, 3]", - '[{"a": 1}, {"a": 2}, {"a": null}, {}]', - '"100"', - '{"date": "2024-07-16"}', - '{"int_value": 2, "null_filed": null}', - '{"list_data": [10, 20, 30]}', - ] - df = pd.DataFrame( - { - "rowindex": pd.Series(range(len(json_data)), dtype=dtypes.INT_DTYPE), - "json_col": pd.Series(json_data, dtype=dtypes.JSON_DTYPE), - }, - ) - # TODO(b/427305807): Why `drop=False` will produce two "rowindex" columns? - df = df.set_index("rowindex", drop=True) - return df