From e80e8f81f92fec6cc5b2ce1d76cb4bf42c575f75 Mon Sep 17 00:00:00 2001 From: Trevor Bergeron Date: Wed, 1 Apr 2026 01:07:04 +0000 Subject: [PATCH] feat: Add numpy ufunc support to col expressions --- bigframes/core/col.py | 37 +++++++++++++++++++++++++++++++------ tests/unit/test_col.py | 24 ++++++++++++++++++++++++ 2 files changed, 55 insertions(+), 6 deletions(-) diff --git a/bigframes/core/col.py b/bigframes/core/col.py index cad30f8f33..83e2808cff 100644 --- a/bigframes/core/col.py +++ b/bigframes/core/col.py @@ -17,6 +17,7 @@ from typing import Any, Hashable, Literal, TYPE_CHECKING import bigframes_vendored.pandas.core.col as pd_col +import numpy from bigframes.core import agg_expressions, window_spec import bigframes.core.expression as bf_expression @@ -56,14 +57,10 @@ def _apply_binary_op( alignment: Literal["outer", "left"] = "outer", reverse: bool = False, ): - if isinstance(other, Expression): - other_value = other._value - else: - other_value = bf_expression.const(other) if reverse: - return Expression(op.as_expr(other_value, self._value)) + return Expression(op.as_expr(_as_bf_expr(other), self._value)) else: - return Expression(op.as_expr(self._value, other_value)) + return Expression(op.as_expr(self._value, _as_bf_expr(other))) def __add__(self, other: Any) -> Expression: return self._apply_binary_op(other, bf_ops.add_op) @@ -170,6 +167,34 @@ def str(self) -> strings.StringMethods: return strings.StringMethods(self) + def __array_ufunc__( + self, ufunc: numpy.ufunc, method: __builtins__.str, *inputs, **kwargs + ) -> Expression: + """Used to support numpy ufuncs. + See: https://numpy.org/doc/stable/reference/ufuncs.html + """ + # Only __call__ supported with zero arguments + if method != "__call__" or len(inputs) > 2 or len(kwargs) > 0: + return NotImplemented + + if len(inputs) == 1 and ufunc in bf_ops.NUMPY_TO_OP: + op = bf_ops.NUMPY_TO_OP[ufunc] + return Expression(op.as_expr(self._value)) + if len(inputs) == 2 and ufunc in bf_ops.NUMPY_TO_BINOP: + binop = bf_ops.NUMPY_TO_BINOP[ufunc] + if inputs[0] is self: + return Expression(binop.as_expr(self._value, _as_bf_expr(inputs[1]))) + else: + return Expression(binop.as_expr(_as_bf_expr(inputs[0]), self._value)) + + return NotImplemented + + +def _as_bf_expr(arg: Any) -> bf_expression.Expression: + if isinstance(arg, Expression): + return arg._value + return bf_expression.const(arg) + def col(col_name: Hashable) -> Expression: return Expression(bf_expression.free_var(col_name)) diff --git a/tests/unit/test_col.py b/tests/unit/test_col.py index c3fcb10c9d..c25d48746c 100644 --- a/tests/unit/test_col.py +++ b/tests/unit/test_col.py @@ -16,6 +16,7 @@ import pathlib from typing import Generator +import numpy as np import pandas as pd import pytest @@ -246,3 +247,26 @@ def test_col_dt_accessor(scalars_dfs): # int64[pyarrow] vs Int64 assert_frame_equal(bf_result, pd_result, check_dtype=False) + + +def test_col_numpy_ufunc(scalars_dfs): + scalars_df, scalars_pandas_df = scalars_dfs + + bf_kwargs = { + "sqrt": np.sqrt(bpd.col("float64_col")), # type: ignore + "add_const": np.add(bpd.col("float64_col"), 2.4), # type: ignore + "radd_const": np.add(2.4, bpd.col("float64_col")), # type: ignore + "add_cols": np.add(bpd.col("float64_col"), bpd.col("int64_col")), # type: ignore + } + pd_kwargs = { + "sqrt": np.sqrt(pd.col("float64_col")), # type: ignore + "add_const": np.add(pd.col("float64_col"), 2.4), # type: ignore + "radd_const": np.add(2.4, pd.col("float64_col")), # type: ignore + "add_cols": np.add(pd.col("float64_col"), pd.col("int64_col")), # type: ignore + } + + bf_result = scalars_df.assign(**bf_kwargs).to_pandas() + pd_result = scalars_pandas_df.assign(**pd_kwargs) # type: ignore + + # int64[pyarrow] vs Int64 + assert_frame_equal(bf_result, pd_result, check_dtype=False)