diff --git a/bigframes/testing/__init__.py b/bigframes/testing/__init__.py index 529c08241d..9c1fb7c283 100644 --- a/bigframes/testing/__init__.py +++ b/bigframes/testing/__init__.py @@ -17,3 +17,10 @@ These modules are provided for testing the BigQuery DataFrames package. The interface is not considered stable. """ +from bigframes.testing.utils import ( + assert_frame_equal, + assert_index_equal, + assert_series_equal, +) + +__all__ = ["assert_frame_equal", "assert_series_equal", "assert_index_equal"] diff --git a/bigframes/testing/utils.py b/bigframes/testing/utils.py index 6679f53b2c..8d2018b77f 100644 --- a/bigframes/testing/utils.py +++ b/bigframes/testing/utils.py @@ -157,6 +157,10 @@ def assert_series_equal( pd.testing.assert_series_equal(left, right, **kwargs) +def assert_index_equal(left, right, **kwargs): + pd.testing.assert_index_equal(left, right, **kwargs) + + def _standardize_index(idx): return pd.Index(list(idx), name=idx.name) diff --git a/noxfile.py b/noxfile.py index a8a1a84987..87b87bc909 100644 --- a/noxfile.py +++ b/noxfile.py @@ -20,7 +20,6 @@ import multiprocessing import os import pathlib -import re import shutil import time from typing import Dict, List @@ -588,99 +587,36 @@ def prerelease(session: nox.sessions.Session, tests_path, extra_pytest_options=( constraints_path = str( CURRENT_DIRECTORY / "testing" / f"constraints-{session.python}.txt" ) - - # Ignore officially released versions of certain packages specified in - # testing/constraints-*.txt and install a more recent, pre-release versions - # directly - already_installed = set() + session.install( + *set(UNIT_TEST_STANDARD_DEPENDENCIES + SYSTEM_TEST_STANDARD_DEPENDENCIES), + "-c", + constraints_path, + "-e", + ".", + ) # PyArrow prerelease packages are published to an alternative PyPI host. # https://arrow.apache.org/docs/python/install.html#installing-nightly-packages session.install( + "--no-deps", + "--upgrade", "--extra-index-url", "https://pypi.fury.io/arrow-nightlies/", - "--prefer-binary", - "--pre", - "--upgrade", "pyarrow", - ) - already_installed.add("pyarrow") - - session.install( - "--prefer-binary", - "--pre", - "--upgrade", # We exclude each version individually so that we can continue to test # some prerelease packages. See: # https://github.com/googleapis/python-bigquery-dataframes/pull/268#discussion_r1423205172 # "pandas!=2.1.4, !=2.2.0rc0, !=2.2.0, !=2.2.1", "pandas", - ) - already_installed.add("pandas") - - # Try to avoid a cap on our SQLGlot so that bigframes - # can be integrated with SQLMesh. See: - # https://github.com/googleapis/python-bigquery-dataframes/issues/942 - # If SQLGlot introduces something that breaks us, lets file an issue - # upstream and/or make sure we fix bigframes to work with it. - session.install( - "--upgrade", - "git+https://github.com/tobymao/sqlglot.git#egg=sqlglot", - ) - already_installed.add("sqlglot") - - # Workaround https://github.com/googleapis/python-db-dtypes-pandas/issues/178 - session.install("--no-deps", "db-dtypes") - already_installed.add("db-dtypes") - - # Ensure we catch breaking changes in the client libraries early. - session.install( - "--upgrade", + # Workaround https://github.com/googleapis/python-db-dtypes-pandas/issues/178 + "db-dtypes", + # Ensure we catch breaking changes in the client libraries early. "git+https://github.com/googleapis/python-bigquery.git#egg=google-cloud-bigquery", - ) - already_installed.add("google-cloud-bigquery") - session.install( "--upgrade", "-e", "git+https://github.com/googleapis/google-cloud-python.git#egg=google-cloud-bigquery-storage&subdirectory=packages/google-cloud-bigquery-storage", - ) - already_installed.add("google-cloud-bigquery-storage") - session.install( - "--upgrade", "git+https://github.com/googleapis/python-bigquery-pandas.git#egg=pandas-gbq", ) - already_installed.add("pandas-gbq") - - session.install( - *set(UNIT_TEST_STANDARD_DEPENDENCIES + SYSTEM_TEST_STANDARD_DEPENDENCIES), - "-c", - constraints_path, - ) - - # Because we test minimum dependency versions on the minimum Python - # version, the first version we test with in the unit tests sessions has a - # constraints file containing all dependencies and extras. - with open( - CURRENT_DIRECTORY / "testing" / f"constraints-{DEFAULT_PYTHON_VERSION}.txt", - encoding="utf-8", - ) as constraints_file: - constraints_text = constraints_file.read() - - # Ignore leading whitespace and comment lines. - deps = [ - match.group(1) - for match in re.finditer( - r"^\s*(\S+)(?===\S+)", constraints_text, flags=re.MULTILINE - ) - if match.group(1) not in already_installed - ] - - print(already_installed) - - # We use --no-deps to ensure that pre-release versions aren't overwritten - # by the version ranges in setup.py. - session.install(*deps) - session.install("--no-deps", "-e", ".") # Print out prerelease package versions. session.run("python", "-m", "pip", "freeze") diff --git a/tests/system/small/bigquery/test_array.py b/tests/system/small/bigquery/test_array.py index 2ceb90e22c..2c2b2001eb 100644 --- a/tests/system/small/bigquery/test_array.py +++ b/tests/system/small/bigquery/test_array.py @@ -67,7 +67,11 @@ ) def test_array_length(input_data, expected): series = bpd.Series(input_data) - expected = pd.Series(expected, dtype=bigframes.dtypes.INT_DTYPE) + expected = pd.Series( + expected, + index=pd.Index(range(len(input_data)), dtype="Int64"), + dtype=bigframes.dtypes.INT_DTYPE, + ) pd.testing.assert_series_equal( bbq.array_length(series).to_pandas(), expected, diff --git a/tests/system/small/bigquery/test_sql.py b/tests/system/small/bigquery/test_sql.py index c519b427fa..fa43c24965 100644 --- a/tests/system/small/bigquery/test_sql.py +++ b/tests/system/small/bigquery/test_sql.py @@ -12,12 +12,12 @@ # See the License for the specific language governing permissions and # limitations under the License. -import pandas as pd import pytest import bigframes.bigquery as bbq import bigframes.dtypes as dtypes import bigframes.pandas as bpd +import bigframes.testing def test_sql_scalar_for_all_scalar_types(scalars_df_null_index): @@ -59,8 +59,8 @@ def test_sql_scalar_for_bool_series(scalars_df_index): series: bpd.Series = scalars_df_index["bool_col"] result = bbq.sql_scalar("CAST({0} AS INT64)", [series]) expected = series.astype(dtypes.INT_DTYPE) - expected.name = None - pd.testing.assert_series_equal(result.to_pandas(), expected.to_pandas()) + expected.name = result.name + bigframes.testing.assert_series_equal(result.to_pandas(), expected.to_pandas()) @pytest.mark.parametrize( @@ -83,8 +83,8 @@ def test_sql_scalar_outputs_all_scalar_types(scalars_df_index, column_name): series: bpd.Series = scalars_df_index[column_name] result = bbq.sql_scalar("{0}", [series]) expected = series - expected.name = None - pd.testing.assert_series_equal(result.to_pandas(), expected.to_pandas()) + expected.name = result.name + bigframes.testing.assert_series_equal(result.to_pandas(), expected.to_pandas()) def test_sql_scalar_for_array_series(repeated_df): @@ -114,14 +114,14 @@ def test_sql_scalar_for_array_series(repeated_df): + repeated_df["numeric_list_col"].list.len() + repeated_df["string_list_col"].list.len() ) - pd.testing.assert_series_equal(result.to_pandas(), expected.to_pandas()) + bigframes.testing.assert_series_equal(result.to_pandas(), expected.to_pandas()) def test_sql_scalar_outputs_array_series(repeated_df): result = bbq.sql_scalar("{0}", [repeated_df["int_list_col"]]) expected = repeated_df["int_list_col"] - expected.name = None - pd.testing.assert_series_equal(result.to_pandas(), expected.to_pandas()) + expected.name = result.name + bigframes.testing.assert_series_equal(result.to_pandas(), expected.to_pandas()) def test_sql_scalar_for_struct_series(nested_structs_df): @@ -132,14 +132,14 @@ def test_sql_scalar_for_struct_series(nested_structs_df): expected = nested_structs_df["person"].struct.field( "name" ).str.len() + nested_structs_df["person"].struct.field("age") - pd.testing.assert_series_equal(result.to_pandas(), expected.to_pandas()) + bigframes.testing.assert_series_equal(result.to_pandas(), expected.to_pandas()) def test_sql_scalar_outputs_struct_series(nested_structs_df): result = bbq.sql_scalar("{0}", [nested_structs_df["person"]]) expected = nested_structs_df["person"] - expected.name = None - pd.testing.assert_series_equal(result.to_pandas(), expected.to_pandas()) + expected.name = result.name + bigframes.testing.assert_series_equal(result.to_pandas(), expected.to_pandas()) def test_sql_scalar_for_json_series(json_df): @@ -150,12 +150,12 @@ def test_sql_scalar_for_json_series(json_df): ], ) expected = bbq.json_value(json_df["json_col"], "$.int_value") - expected.name = None - pd.testing.assert_series_equal(result.to_pandas(), expected.to_pandas()) + expected.name = result.name + bigframes.testing.assert_series_equal(result.to_pandas(), expected.to_pandas()) def test_sql_scalar_outputs_json_series(json_df): result = bbq.sql_scalar("{0}", [json_df["json_col"]]) expected = json_df["json_col"] - expected.name = None - pd.testing.assert_series_equal(result.to_pandas(), expected.to_pandas()) + expected.name = result.name + bigframes.testing.assert_series_equal(result.to_pandas(), expected.to_pandas()) diff --git a/tests/system/small/bigquery/test_struct.py b/tests/system/small/bigquery/test_struct.py index 58c822f642..04ca974ed2 100644 --- a/tests/system/small/bigquery/test_struct.py +++ b/tests/system/small/bigquery/test_struct.py @@ -12,11 +12,11 @@ # See the License for the specific language governing permissions and # limitations under the License. -import pandas as pd import pytest import bigframes.bigquery as bbq import bigframes.series as series +import bigframes.testing @pytest.mark.parametrize( @@ -53,9 +53,10 @@ def test_struct_from_dataframe(columns_arg): srs = series.Series( columns_arg, ) - pd.testing.assert_series_equal( + bigframes.testing.assert_series_equal( srs.to_pandas(), bbq.struct(srs.struct.explode()).to_pandas(), check_index_type=False, check_dtype=False, + check_namesbool=False, # None vs nan version dependent ) diff --git a/tests/system/small/ml/test_metrics.py b/tests/system/small/ml/test_metrics.py index 040d4d97f6..a0f0f6b48c 100644 --- a/tests/system/small/ml/test_metrics.py +++ b/tests/system/small/ml/test_metrics.py @@ -20,6 +20,7 @@ import bigframes from bigframes.ml import metrics +import bigframes.testing def test_r2_score_perfect_fit(session): @@ -161,7 +162,7 @@ def test_roc_curve_binary_classification_prediction_returns_expected(session): pd_tpr = tpr.to_pandas() pd_thresholds = thresholds.to_pandas() - pd.testing.assert_series_equal( + bigframes.testing.assert_series_equal( # skip testing the first value, as it is redundant and inconsistent across sklearn versions pd_thresholds[1:], pd.Series( @@ -171,7 +172,7 @@ def test_roc_curve_binary_classification_prediction_returns_expected(session): ), check_index=False, ) - pd.testing.assert_series_equal( + bigframes.testing.assert_series_equal( pd_fpr, pd.Series( [0.0, 0.0, 0.0, 0.25, 0.25, 0.5, 0.5, 0.75, 0.75, 0.75, 1.0], @@ -180,7 +181,7 @@ def test_roc_curve_binary_classification_prediction_returns_expected(session): ), check_index_type=False, ) - pd.testing.assert_series_equal( + bigframes.testing.assert_series_equal( pd_tpr, pd.Series( [ @@ -261,7 +262,7 @@ def test_roc_curve_binary_classification_decision_returns_expected(session): pd_tpr = tpr.to_pandas() pd_thresholds = thresholds.to_pandas() - pd.testing.assert_series_equal( + bigframes.testing.assert_series_equal( # skip testing the first value, as it is redundant and inconsistent across sklearn versions pd_thresholds[1:], pd.Series( @@ -271,7 +272,7 @@ def test_roc_curve_binary_classification_decision_returns_expected(session): ), check_index=False, ) - pd.testing.assert_series_equal( + bigframes.testing.assert_series_equal( pd_fpr, pd.Series( [0.0, 0.0, 1.0], @@ -280,7 +281,7 @@ def test_roc_curve_binary_classification_decision_returns_expected(session): ), check_index_type=False, ) - pd.testing.assert_series_equal( + bigframes.testing.assert_series_equal( pd_tpr, pd.Series( [ @@ -353,7 +354,7 @@ def test_roc_curve_binary_classification_prediction_series(session): pd_tpr = tpr.to_pandas() pd_thresholds = thresholds.to_pandas() - pd.testing.assert_series_equal( + bigframes.testing.assert_series_equal( # skip testing the first value, as it is redundant and inconsistent across sklearn versions pd_thresholds[1:], pd.Series( @@ -363,7 +364,7 @@ def test_roc_curve_binary_classification_prediction_series(session): ), check_index=False, ) - pd.testing.assert_series_equal( + bigframes.testing.assert_series_equal( pd_fpr, pd.Series( [0.0, 0.0, 0.0, 0.25, 0.25, 0.5, 0.5, 0.75, 0.75, 0.75, 1.0], @@ -372,7 +373,7 @@ def test_roc_curve_binary_classification_prediction_series(session): ), check_index_type=False, ) - pd.testing.assert_series_equal( + bigframes.testing.assert_series_equal( pd_tpr, pd.Series( [ @@ -505,7 +506,7 @@ def test_confusion_matrix(session): 2: [0, 1, 2], } ).astype("int64") - pd.testing.assert_frame_equal( + bigframes.testing.assert_frame_equal( confusion_matrix, expected_pd_df, check_index_type=False ) @@ -523,7 +524,7 @@ def test_confusion_matrix_column_index(session): {1: [1, 0, 1, 0], 2: [0, 0, 2, 0], 3: [0, 0, 0, 0], 4: [0, 1, 0, 1]}, index=[1, 2, 3, 4], ).astype("int64") - pd.testing.assert_frame_equal( + bigframes.testing.assert_frame_equal( confusion_matrix, expected_pd_df, check_index_type=False ) @@ -542,7 +543,7 @@ def test_confusion_matrix_matches_sklearn(session): pd_df[["y_true"]], pd_df[["y_pred"]] ) expected_pd_df = pd.DataFrame(expected_confusion_matrix) - pd.testing.assert_frame_equal( + bigframes.testing.assert_frame_equal( confusion_matrix, expected_pd_df, check_index_type=False ) @@ -564,7 +565,7 @@ def test_confusion_matrix_str_matches_sklearn(session): expected_confusion_matrix, index=["ant", "bird", "cat"] ) expected_pd_df.columns = pd.Index(["ant", "bird", "cat"]) - pd.testing.assert_frame_equal( + bigframes.testing.assert_frame_equal( confusion_matrix, expected_pd_df, check_index_type=False ) @@ -585,7 +586,7 @@ def test_confusion_matrix_series(session): 2: [0, 1, 2], } ).astype("int64") - pd.testing.assert_frame_equal( + bigframes.testing.assert_frame_equal( confusion_matrix, expected_pd_df, check_index_type=False ) @@ -605,7 +606,9 @@ def test_recall_score(session): expected_index = [0, 1, 2] expected_recall = pd.Series(expected_values, index=expected_index) - pd.testing.assert_series_equal(recall, expected_recall, check_index_type=False) + bigframes.testing.assert_series_equal( + recall, expected_recall, check_index_type=False + ) def test_recall_score_matches_sklearn(session): @@ -623,7 +626,9 @@ def test_recall_score_matches_sklearn(session): ) expected_index = [0, 1, 2] expected_recall = pd.Series(expected_values, index=expected_index) - pd.testing.assert_series_equal(recall, expected_recall, check_index_type=False) + bigframes.testing.assert_series_equal( + recall, expected_recall, check_index_type=False + ) def test_recall_score_str_matches_sklearn(session): @@ -641,7 +646,9 @@ def test_recall_score_str_matches_sklearn(session): ) expected_index = ["ant", "bird", "cat"] expected_recall = pd.Series(expected_values, index=expected_index) - pd.testing.assert_series_equal(recall, expected_recall, check_index_type=False) + bigframes.testing.assert_series_equal( + recall, expected_recall, check_index_type=False + ) def test_recall_score_series(session): @@ -657,7 +664,9 @@ def test_recall_score_series(session): expected_index = [0, 1, 2] expected_recall = pd.Series(expected_values, index=expected_index) - pd.testing.assert_series_equal(recall, expected_recall, check_index_type=False) + bigframes.testing.assert_series_equal( + recall, expected_recall, check_index_type=False + ) def test_precision_score(session): @@ -675,7 +684,7 @@ def test_precision_score(session): expected_index = [0, 1, 2] expected_precision = pd.Series(expected_values, index=expected_index) - pd.testing.assert_series_equal( + bigframes.testing.assert_series_equal( precision_score, expected_precision, check_index_type=False ) @@ -698,7 +707,7 @@ def test_precision_score_matches_sklearn(session): ) expected_index = [0, 1, 2] expected_precision = pd.Series(expected_values, index=expected_index) - pd.testing.assert_series_equal( + bigframes.testing.assert_series_equal( precision_score, expected_precision, check_index_type=False ) @@ -720,7 +729,7 @@ def test_precision_score_str_matches_sklearn(session): ) expected_index = ["ant", "bird", "cat"] expected_precision = pd.Series(expected_values, index=expected_index) - pd.testing.assert_series_equal( + bigframes.testing.assert_series_equal( precision_score, expected_precision, check_index_type=False ) @@ -738,7 +747,7 @@ def test_precision_score_series(session): expected_index = [0, 1, 2] expected_precision = pd.Series(expected_values, index=expected_index) - pd.testing.assert_series_equal( + bigframes.testing.assert_series_equal( precision_score, expected_precision, check_index_type=False ) @@ -823,7 +832,7 @@ def test_f1_score(session): expected_index = [0, 1, 2] expected_f1 = pd.Series(expected_values, index=expected_index) - pd.testing.assert_series_equal(f1_score, expected_f1, check_index_type=False) + bigframes.testing.assert_series_equal(f1_score, expected_f1, check_index_type=False) def test_f1_score_matches_sklearn(session): @@ -841,7 +850,7 @@ def test_f1_score_matches_sklearn(session): ) expected_index = [0, 1, 2] expected_f1 = pd.Series(expected_values, index=expected_index) - pd.testing.assert_series_equal(f1_score, expected_f1, check_index_type=False) + bigframes.testing.assert_series_equal(f1_score, expected_f1, check_index_type=False) def test_f1_score_str_matches_sklearn(session): @@ -859,7 +868,7 @@ def test_f1_score_str_matches_sklearn(session): ) expected_index = ["ant", "bird", "cat"] expected_f1 = pd.Series(expected_values, index=expected_index) - pd.testing.assert_series_equal(f1_score, expected_f1, check_index_type=False) + bigframes.testing.assert_series_equal(f1_score, expected_f1, check_index_type=False) def test_f1_score_series(session): @@ -875,7 +884,7 @@ def test_f1_score_series(session): expected_index = [0, 1, 2] expected_f1 = pd.Series(expected_values, index=expected_index) - pd.testing.assert_series_equal(f1_score, expected_f1, check_index_type=False) + bigframes.testing.assert_series_equal(f1_score, expected_f1, check_index_type=False) def test_mean_squared_error(session: bigframes.Session): diff --git a/tests/system/small/operations/test_datetimes.py b/tests/system/small/operations/test_datetimes.py index 0e023189d5..94b696d09d 100644 --- a/tests/system/small/operations/test_datetimes.py +++ b/tests/system/small/operations/test_datetimes.py @@ -17,13 +17,12 @@ import numpy from packaging import version -from pandas import testing import pandas as pd import pytest import bigframes.pandas as bpd import bigframes.series -from bigframes.testing.utils import assert_series_equal +from bigframes.testing.utils import assert_frame_equal, assert_series_equal DATETIME_COL_NAMES = [("datetime_col",), ("timestamp_col",)] DATE_COLUMNS = [ @@ -304,7 +303,7 @@ def test_dt_isocalendar(session): actual_result = bf_s.dt.isocalendar().to_pandas() expected_result = pd_s.dt.isocalendar() - testing.assert_frame_equal( + assert_frame_equal( actual_result, expected_result, check_dtype=False, check_index_type=False ) @@ -353,7 +352,7 @@ def test_dt_strftime(scalars_df_index, scalars_pandas_df_index, column, date_for pytest.importorskip("pandas", minversion="2.0.0") bf_result = scalars_df_index[column].dt.strftime(date_format).to_pandas() pd_result = scalars_pandas_df_index[column].dt.strftime(date_format) - pd.testing.assert_series_equal(bf_result, pd_result, check_dtype=False) + assert_series_equal(bf_result, pd_result, check_dtype=False) assert bf_result.dtype == "string[pyarrow]" @@ -365,7 +364,7 @@ def test_dt_strftime_date(): expected_result = pd.Series(["08/15/2014", "08/15/2215", "02/29/2016"]) bf_result = bf_series.dt.strftime("%m/%d/%Y").to_pandas() - pd.testing.assert_series_equal( + assert_series_equal( bf_result, expected_result, check_index_type=False, check_dtype=False ) assert bf_result.dtype == "string[pyarrow]" @@ -381,7 +380,7 @@ def test_dt_strftime_time(): ) bf_result = bf_series.dt.strftime("%X").to_pandas() - pd.testing.assert_series_equal( + assert_series_equal( bf_result, expected_result, check_index_type=False, check_dtype=False ) assert bf_result.dtype == "string[pyarrow]" @@ -521,7 +520,7 @@ def test_timestamp_diff_two_dataframes(scalars_dfs): actual_result = (bf_df - bf_df).to_pandas() expected_result = pd_df - pd_df - testing.assert_frame_equal(actual_result, expected_result) + assert_frame_equal(actual_result, expected_result) def test_timestamp_diff_two_series_with_different_types_raise_error(scalars_dfs): @@ -575,7 +574,12 @@ def test_timestamp_series_diff_agg(scalars_dfs, column): actual_result = bf_series.diff().to_pandas() - expected_result = pd_series.diff() + # overflows for no good reason + # related? https://github.com/apache/arrow/issues/43031 + expected_result = pd_series.ffill().diff() + expected_result = expected_result.mask( + pd_series.isnull() | pd_series.shift(1).isnull() + ) assert_series_equal(actual_result, expected_result) @@ -630,6 +634,6 @@ def test_to_datetime(scalars_dfs, col): ).to_pandas() expected_result = pd.Series(pd.to_datetime(pd_df[col])) - testing.assert_series_equal( + assert_series_equal( actual_result, expected_result, check_dtype=False, check_index_type=False ) diff --git a/tests/system/small/operations/test_timedeltas.py b/tests/system/small/operations/test_timedeltas.py index 18c88db8eb..0eebd56996 100644 --- a/tests/system/small/operations/test_timedeltas.py +++ b/tests/system/small/operations/test_timedeltas.py @@ -19,49 +19,62 @@ import numpy as np from packaging import version import pandas as pd -import pandas.testing import pyarrow as pa import pytest from bigframes import dtypes +import bigframes.testing @pytest.fixture(scope="module") def temporal_dfs(session): pandas_df = pd.DataFrame( { - "datetime_col": [ - pd.Timestamp("2025-02-01 01:00:01"), - pd.Timestamp("2019-01-02 02:00:00"), - pd.Timestamp("1997-01-01 19:00:00"), - ], - "timestamp_col": [ - pd.Timestamp("2023-01-01 01:00:01", tz="UTC"), - pd.Timestamp("2024-01-02 02:00:00", tz="UTC"), - pd.Timestamp("2005-03-05 02:00:00", tz="UTC"), - ], + "datetime_col": pd.Series( + [ + pd.Timestamp("2025-02-01 01:00:01"), + pd.Timestamp("2019-01-02 02:00:00"), + pd.Timestamp("1997-01-01 19:00:00"), + ], + dtype=dtypes.DATETIME_DTYPE, + ), + "timestamp_col": pd.Series( + [ + pd.Timestamp("2023-01-01 01:00:01", tz="UTC"), + pd.Timestamp("2024-01-02 02:00:00", tz="UTC"), + pd.Timestamp("2005-03-05 02:00:00", tz="UTC"), + ], + dtype=dtypes.TIMESTAMP_DTYPE, + ), "date_col": pd.Series( [ datetime.date(2000, 1, 1), datetime.date(2001, 2, 3), datetime.date(2020, 9, 30), ], - dtype=pd.ArrowDtype(pa.date32()), + dtype=dtypes.DATE_DTYPE, ), - "timedelta_col_1": [ - pd.Timedelta(5, "s"), - pd.Timedelta(-4, "m"), - pd.Timedelta(5, "h"), - ], - "timedelta_col_2": [ - pd.Timedelta(3, "s"), - pd.Timedelta(-4, "m"), - pd.Timedelta(6, "h"), - ], - "float_col": [1.5, 2, -3], - "int_col": [1, 2, -3], - "positive_int_col": [1, 2, 3], - } + "timedelta_col_1": pd.Series( + [ + pd.Timedelta(5, "s"), + pd.Timedelta(-4, "m"), + pd.Timedelta(5, "h"), + ], + dtype=dtypes.TIMEDELTA_DTYPE, + ), + "timedelta_col_2": pd.Series( + [ + pd.Timedelta(3, "s"), + pd.Timedelta(-4, "m"), + pd.Timedelta(6, "h"), + ], + dtype=dtypes.TIMEDELTA_DTYPE, + ), + "float_col": pd.Series([1.5, 2, -3], dtype=dtypes.FLOAT_DTYPE), + "int_col": pd.Series([1, 2, -3], dtype="Int64"), + "positive_int_col": pd.Series([1, 2, 3], dtype="Int64"), + }, + index=pd.Index(range(3), dtype="Int64"), ) bigframes_df = session.read_pandas(pandas_df) @@ -71,20 +84,11 @@ def temporal_dfs(session): def _assert_series_equal(actual: pd.Series, expected: pd.Series): """Helper function specifically for timedelta testsing. Don't use it outside of this module.""" - if actual.dtype == dtypes.FLOAT_DTYPE: - pandas.testing.assert_series_equal( - actual, expected.astype("Float64"), check_index_type=False - ) - elif actual.dtype == dtypes.INT_DTYPE: - pandas.testing.assert_series_equal( - actual, expected.astype("Int64"), check_index_type=False - ) - else: - pandas.testing.assert_series_equal( - actual.astype("timedelta64[ns]"), - expected.dt.floor("us"), # in BF the precision is microsecond - check_index_type=False, - ) + bigframes.testing.assert_series_equal( + actual, + expected, + check_index_type=False, + ) @pytest.mark.parametrize( @@ -176,12 +180,10 @@ def test_timedelta_unary_ops(temporal_dfs, op): def test_timestamp_add__ts_series_plus_td_series(temporal_dfs, column, pd_dtype): bf_df, pd_df = temporal_dfs - actual_result = ( - (bf_df[column] + bf_df["timedelta_col_1"]).to_pandas().astype(pd_dtype) - ) + actual_result = (bf_df[column] + bf_df["timedelta_col_1"]).to_pandas() expected_result = pd_df[column] + pd_df["timedelta_col_1"] - pandas.testing.assert_series_equal( + bigframes.testing.assert_series_equal( actual_result, expected_result, check_index_type=False ) @@ -207,12 +209,10 @@ def test_timestamp_add__ts_series_plus_td_series__explicit_cast(temporal_dfs, co def test_timestamp_add__ts_series_plus_td_literal(temporal_dfs, literal): bf_df, pd_df = temporal_dfs - actual_result = ( - (bf_df["timestamp_col"] + literal).to_pandas().astype("datetime64[ns, UTC]") - ) + actual_result = (bf_df["timestamp_col"] + literal).to_pandas() expected_result = pd_df["timestamp_col"] + literal - pandas.testing.assert_series_equal( + bigframes.testing.assert_series_equal( actual_result, expected_result, check_index_type=False ) @@ -227,12 +227,10 @@ def test_timestamp_add__ts_series_plus_td_literal(temporal_dfs, literal): def test_timestamp_add__td_series_plus_ts_series(temporal_dfs, column, pd_dtype): bf_df, pd_df = temporal_dfs - actual_result = ( - (bf_df["timedelta_col_1"] + bf_df[column]).to_pandas().astype(pd_dtype) - ) + actual_result = (bf_df["timedelta_col_1"] + bf_df[column]).to_pandas() expected_result = pd_df["timedelta_col_1"] + pd_df[column] - pandas.testing.assert_series_equal( + bigframes.testing.assert_series_equal( actual_result, expected_result, check_index_type=False ) @@ -241,10 +239,10 @@ def test_timestamp_add__td_literal_plus_ts_series(temporal_dfs): bf_df, pd_df = temporal_dfs timedelta = pd.Timedelta(1, unit="s") - actual_result = (timedelta + bf_df["datetime_col"]).to_pandas().astype(" pd.Timedelta(1, "h"))] - .to_pandas() - .astype(" pd.Timedelta(1, "h")) + ].to_pandas() expected_result = pd_series[(pd_series - timestamp) > pd.Timedelta(1, "h")] - pandas.testing.assert_series_equal( + bigframes.testing.assert_series_equal( actual_result, expected_result, check_index_type=False ) @@ -554,15 +538,10 @@ def test_timedelta_ordering(session): ) bf_df = session.read_pandas(pd_df) - actual_result = ( - (bf_df["col_2"] - bf_df["col_1"]) - .sort_values() - .to_pandas() - .astype("timedelta64[ns]") - ) + actual_result = (bf_df["col_2"] - bf_df["col_1"]).sort_values().to_pandas() expected_result = (pd_df["col_2"] - pd_df["col_1"]).sort_values() - pandas.testing.assert_series_equal( + bigframes.testing.assert_series_equal( actual_result, expected_result, check_index_type=False ) @@ -629,6 +608,6 @@ def test_timestamp_diff_after_type_casting(temporal_dfs): expected_result = pd_df["timestamp_col"] - pd_df["positive_int_col"].astype( "datetime64[us, UTC]" ) - pandas.testing.assert_series_equal( + bigframes.testing.assert_series_equal( actual_result, expected_result, check_index_type=False, check_dtype=False ) diff --git a/tests/system/small/test_dataframe.py b/tests/system/small/test_dataframe.py index fa82cce605..5b28156e73 100644 --- a/tests/system/small/test_dataframe.py +++ b/tests/system/small/test_dataframe.py @@ -5968,6 +5968,8 @@ def test_resample_with_column( pd_result = scalars_pandas_df_index.resample(rule=rule, on=on, origin=origin)[ ["int64_col", "int64_too"] ].max() + # TODO: (b/484364312) + pd_result.index.names = bf_result.index.names pd.testing.assert_frame_equal( bf_result, pd_result, check_dtype=False, check_index_type=False ) @@ -6023,6 +6025,8 @@ def test_resample_with_index( .resample(rule=rule, level=level, closed=closed, origin=origin, label=label) .min() ) + # TODO: (b/484364312) + pd_result.index.names = bf_result.index.names assert_frame_equal(bf_result, pd_result) @@ -6076,6 +6080,8 @@ def test_resample_start_time(rule, origin, data): pd_result = scalars_pandas_df_index.resample(rule=rule, origin=origin).min() + # TODO: (b/484364312) + pd_result.index.names = bf_result.index.names pd.testing.assert_frame_equal( bf_result, pd_result, check_dtype=False, check_index_type=False ) diff --git a/tests/system/small/test_numpy.py b/tests/system/small/test_numpy.py index 490f927114..d04fb81a0a 100644 --- a/tests/system/small/test_numpy.py +++ b/tests/system/small/test_numpy.py @@ -16,6 +16,8 @@ import pandas as pd import pytest +import bigframes.testing + @pytest.mark.parametrize( ("opname",), @@ -45,7 +47,7 @@ def test_series_ufuncs(floats_pd, floats_bf, opname): bf_result = getattr(np, opname)(floats_bf).to_pandas() pd_result = getattr(np, opname)(floats_pd) - pd.testing.assert_series_equal(bf_result, pd_result) + bigframes.testing.assert_series_equal(bf_result, pd_result, nulls_are_nan=True) @pytest.mark.parametrize( @@ -79,7 +81,7 @@ def test_df_ufuncs(scalars_dfs, opname): ): pd_result["int64_col"] = pd_result["int64_col"].astype(pd.Float64Dtype()) - pd.testing.assert_frame_equal(bf_result, pd_result) + bigframes.testing.assert_frame_equal(bf_result, pd_result, nulls_are_nan=True) @pytest.mark.parametrize( @@ -99,7 +101,7 @@ def test_df_binary_ufuncs(scalars_dfs, opname): bf_result = op(scalars_df[["float64_col", "int64_col"]], 5.1).to_pandas() pd_result = op(scalars_pandas_df[["float64_col", "int64_col"]], 5.1) - pd.testing.assert_frame_equal(bf_result, pd_result) + bigframes.testing.assert_frame_equal(bf_result, pd_result, nulls_are_nan=True) # Operations tested here don't work on full dataframe in numpy+pandas @@ -131,7 +133,7 @@ def test_series_binary_ufuncs(scalars_dfs, x, y, opname): bf_result = op(scalars_df[x], scalars_df[y]).to_pandas() pd_result = op(scalars_pandas_df[x], scalars_pandas_df[y]) - pd.testing.assert_series_equal(bf_result, pd_result) + bigframes.testing.assert_series_equal(bf_result, pd_result, nulls_are_nan=True) def test_series_binary_ufuncs_reverse(scalars_dfs): @@ -141,7 +143,7 @@ def test_series_binary_ufuncs_reverse(scalars_dfs): bf_result = np.subtract(5.1, scalars_df["int64_col"]).to_pandas() pd_result = np.subtract(5.1, scalars_pandas_df["int64_col"]) - pd.testing.assert_series_equal(bf_result, pd_result) + bigframes.testing.assert_series_equal(bf_result, pd_result, nulls_are_nan=True) def test_df_binary_ufuncs_reverse(scalars_dfs): @@ -154,4 +156,4 @@ def test_df_binary_ufuncs_reverse(scalars_dfs): scalars_pandas_df[["float64_col", "int64_col"]], ) - pd.testing.assert_frame_equal(bf_result, pd_result) + bigframes.testing.assert_frame_equal(bf_result, pd_result, nulls_are_nan=True) diff --git a/tests/system/small/test_pandas.py b/tests/system/small/test_pandas.py index a1c0dc9851..b9e75a9d95 100644 --- a/tests/system/small/test_pandas.py +++ b/tests/system/small/test_pandas.py @@ -21,7 +21,8 @@ import pytz import bigframes.pandas as bpd -from bigframes.testing.utils import assert_frame_equal +import bigframes.testing +from bigframes.testing.utils import assert_frame_equal, assert_series_equal @pytest.mark.parametrize( @@ -64,7 +65,7 @@ def test_concat_series(scalars_dfs): ] ) - pd.testing.assert_series_equal(bf_result, pd_result) + bigframes.testing.assert_series_equal(bf_result, pd_result) @pytest.mark.parametrize( @@ -548,9 +549,9 @@ def _convert_pandas_category(pd_s: pd.Series): right_key = "right_inclusive" subtype = pd_s.cat.categories.dtype.subtype # type: ignore - if pd.api.types.is_float_dtype(subtype): + if pd.api.types.is_float_dtype(subtype): # type: ignore interval_dtype = pa.float64() - elif pd.api.types.is_integer_dtype(subtype): + elif pd.api.types.is_integer_dtype(subtype): # type: ignore interval_dtype = pa.int64() else: raise ValueError(f"Unknown category type: {subtype}") @@ -591,7 +592,7 @@ def test_cut_for_array(): bf_result = bpd.cut(sc, x) pd_result = _convert_pandas_category(pd_result) - pd.testing.assert_series_equal(bf_result.to_pandas(), pd_result) + bigframes.testing.assert_series_equal(bf_result.to_pandas(), pd_result) @pytest.mark.parametrize( @@ -610,7 +611,7 @@ def test_cut_by_int_bins(scalars_dfs, labels, right): bf_result = bpd.cut(scalars_df["float64_col"], 5, labels=labels, right=right) pd_result = _convert_pandas_category(pd_result) - pd.testing.assert_series_equal(bf_result.to_pandas(), pd_result) + bigframes.testing.assert_series_equal(bf_result.to_pandas(), pd_result) def test_cut_by_int_bins_w_labels(scalars_dfs): @@ -621,7 +622,7 @@ def test_cut_by_int_bins_w_labels(scalars_dfs): bf_result = bpd.cut(scalars_df["float64_col"], 5, labels=labels) pd_result = _convert_pandas_category(pd_result) - pd.testing.assert_series_equal(bf_result.to_pandas(), pd_result) + bigframes.testing.assert_series_equal(bf_result.to_pandas(), pd_result) @pytest.mark.parametrize( @@ -664,7 +665,7 @@ def test_cut_by_numeric_breaks(scalars_dfs, breaks, right, labels): ).to_pandas() pd_result_converted = _convert_pandas_category(pd_result) - pd.testing.assert_series_equal(bf_result, pd_result_converted) + bigframes.testing.assert_series_equal(bf_result, pd_result_converted) def test_cut_by_numeric_breaks_w_labels(scalars_dfs): @@ -676,7 +677,7 @@ def test_cut_by_numeric_breaks_w_labels(scalars_dfs): bf_result = bpd.cut(scalars_df["float64_col"], bins, labels=labels) pd_result = _convert_pandas_category(pd_result) - pd.testing.assert_series_equal(bf_result.to_pandas(), pd_result) + bigframes.testing.assert_series_equal(bf_result.to_pandas(), pd_result) @pytest.mark.parametrize( @@ -716,7 +717,7 @@ def test_cut_by_interval_bins(scalars_dfs, bins, right, labels): pd_result = pd.cut(scalars_pandas_df["int64_too"], bins, labels=labels, right=right) pd_result_converted = _convert_pandas_category(pd_result) - pd.testing.assert_series_equal(bf_result, pd_result_converted) + bigframes.testing.assert_series_equal(bf_result, pd_result_converted) def test_cut_by_interval_bins_w_labels(scalars_dfs): @@ -728,7 +729,7 @@ def test_cut_by_interval_bins_w_labels(scalars_dfs): bf_result = bpd.cut(scalars_df["float64_col"], bins, labels=labels) pd_result = _convert_pandas_category(pd_result) - pd.testing.assert_series_equal(bf_result.to_pandas(), pd_result) + bigframes.testing.assert_series_equal(bf_result.to_pandas(), pd_result) @pytest.mark.parametrize( @@ -745,7 +746,7 @@ def test_cut_by_edge_cases_bins(scalars_dfs, bins, labels): pd_result = pd.cut(scalars_pandas_df["int64_too"], bins, labels=labels) pd_result_converted = _convert_pandas_category(pd_result) - pd.testing.assert_series_equal(bf_result, pd_result_converted) + bigframes.testing.assert_series_equal(bf_result, pd_result_converted) def test_cut_empty_array_raises_error(): @@ -774,7 +775,7 @@ def test_qcut(scalars_dfs, q): bf_result = bpd.qcut(scalars_df["float64_col"], q, labels=False, duplicates="drop") pd_result = pd_result.astype("Int64") - pd.testing.assert_series_equal(bf_result.to_pandas(), pd_result) + bigframes.testing.assert_series_equal(bf_result.to_pandas(), pd_result) @pytest.mark.parametrize( @@ -819,7 +820,7 @@ def test_to_datetime_iterable(arg, utc, unit, format): pd_result = pd.Series( pd.to_datetime(arg, utc=utc, unit=unit, format=format) ).dt.floor("us") - pd.testing.assert_series_equal( + bigframes.testing.assert_series_equal( bf_result, pd_result, check_index_type=False, check_names=False ) @@ -831,7 +832,7 @@ def test_to_datetime_series(scalars_dfs): bpd.to_datetime(scalars_df[col], unit="s").to_pandas().astype("datetime64[s]") ) pd_result = pd.Series(pd.to_datetime(scalars_pandas_df[col], unit="s")) - pd.testing.assert_series_equal( + bigframes.testing.assert_series_equal( bf_result, pd_result, check_index_type=False, check_names=False ) @@ -853,7 +854,7 @@ def test_to_datetime_series(scalars_dfs): def test_to_datetime_unit_param(arg, unit): bf_result = bpd.to_datetime(arg, unit=unit).to_pandas().astype("datetime64[ns]") pd_result = pd.Series(pd.to_datetime(arg, unit=unit)).dt.floor("us") - pd.testing.assert_series_equal( + bigframes.testing.assert_series_equal( bf_result, pd_result, check_index_type=False, check_names=False ) @@ -874,7 +875,7 @@ def test_to_datetime_format_param(arg, utc, format): .astype("datetime64[ns, UTC]" if utc else "datetime64[ns]") ) pd_result = pd.Series(pd.to_datetime(arg, utc=utc, format=format)).dt.floor("us") - pd.testing.assert_series_equal( + bigframes.testing.assert_series_equal( bf_result, pd_result, check_index_type=False, check_names=False ) @@ -927,7 +928,7 @@ def test_to_datetime_string_inputs(arg, utc, output_in_utc, format): .astype("datetime64[ns, UTC]" if output_in_utc else "datetime64[ns]") ) pd_result = pd.Series(pd.to_datetime(arg, utc=utc, format=format)).dt.floor("us") - pd.testing.assert_series_equal( + bigframes.testing.assert_series_equal( bf_result, pd_result, check_index_type=False, check_names=False ) @@ -970,7 +971,7 @@ def test_to_datetime_timestamp_inputs(arg, utc, output_in_utc): .astype("datetime64[ns, UTC]" if output_in_utc else "datetime64[ns]") ) pd_result = pd.Series(pd.to_datetime(arg, utc=utc)).dt.floor("us") - pd.testing.assert_series_equal( + bigframes.testing.assert_series_equal( bf_result, pd_result, check_index_type=False, check_names=False ) @@ -1019,10 +1020,8 @@ def test_to_timedelta_with_bf_integer_series(session, unit): .astype("timedelta64[ns]") ) - expected_result = pd.to_timedelta(pd_series, unit) - pd.testing.assert_series_equal( - actual_result, expected_result, check_index_type=False - ) + expected_result = pd.to_timedelta(pd_series, unit).astype("timedelta64[ns]") + assert_series_equal(actual_result, expected_result, check_index_type=False) def test_to_timedelta_with_bf_float_series_value_rounded_down(session): @@ -1034,8 +1033,10 @@ def test_to_timedelta_with_bf_float_series_value_rounded_down(session): .astype("timedelta64[ns]") ) - expected_result = pd.Series([pd.Timedelta(1, "us"), pd.Timedelta(2, "us")]) - pd.testing.assert_series_equal( + expected_result = pd.Series([pd.Timedelta(1, "us"), pd.Timedelta(2, "us")]).astype( + "timedelta64[ns]" + ) + bigframes.testing.assert_series_equal( actual_result, expected_result, check_index_type=False ) @@ -1055,8 +1056,8 @@ def test_to_timedelta_with_list_like_input(session, input): .astype("timedelta64[ns]") ) - expected_result = pd.Series(pd.to_timedelta(input, "s")) - pd.testing.assert_series_equal( + expected_result = pd.Series(pd.to_timedelta(input, "s")).astype("timedelta64[ns]") + bigframes.testing.assert_series_equal( actual_result, expected_result, check_index_type=False ) @@ -1087,6 +1088,6 @@ def test_to_timedelta_on_timedelta_series__should_be_no_op(scalars_dfs): ) expected_result = pd.to_timedelta(pd_series, unit="s") - pd.testing.assert_series_equal( + bigframes.testing.assert_series_equal( actual_result, expected_result, check_index_type=False ) diff --git a/tests/system/small/test_series.py b/tests/system/small/test_series.py index f5408dc323..faa3928f91 100644 --- a/tests/system/small/test_series.py +++ b/tests/system/small/test_series.py @@ -32,6 +32,7 @@ import bigframes.features import bigframes.pandas import bigframes.series as series +import bigframes.testing from bigframes.testing.utils import ( assert_frame_equal, assert_series_equal, @@ -47,7 +48,7 @@ def test_series_construct_copy(scalars_dfs): pd_result = pd.Series( scalars_pandas_df["int64_col"], name="test_series", dtype="Float64" ) - pd.testing.assert_series_equal(bf_result, pd_result) + bigframes.testing.assert_series_equal(bf_result, pd_result) def test_series_construct_nullable_ints(): @@ -62,7 +63,7 @@ def test_series_construct_nullable_ints(): ) expected = pd.Series([1, 3, pd.NA], dtype=pd.Int64Dtype(), index=expected_index) - pd.testing.assert_series_equal(bf_result, expected) + bigframes.testing.assert_series_equal(bf_result, expected) def test_series_construct_timestamps(): @@ -74,7 +75,7 @@ def test_series_construct_timestamps(): bf_result = series.Series(datetimes).to_pandas() pd_result = pd.Series(datetimes, dtype=pd.ArrowDtype(pa.timestamp("us"))) - pd.testing.assert_series_equal(bf_result, pd_result, check_index_type=False) + bigframes.testing.assert_series_equal(bf_result, pd_result, check_index_type=False) def test_series_construct_copy_with_index(scalars_dfs): @@ -91,7 +92,7 @@ def test_series_construct_copy_with_index(scalars_dfs): dtype="Float64", index=scalars_pandas_df["int64_too"], ) - pd.testing.assert_series_equal(bf_result, pd_result) + bigframes.testing.assert_series_equal(bf_result, pd_result) def test_series_construct_copy_index(scalars_dfs): @@ -108,7 +109,7 @@ def test_series_construct_copy_index(scalars_dfs): dtype="Float64", index=scalars_pandas_df["int64_too"], ) - pd.testing.assert_series_equal(bf_result, pd_result) + bigframes.testing.assert_series_equal(bf_result, pd_result) def test_series_construct_pandas(scalars_dfs): @@ -120,7 +121,7 @@ def test_series_construct_pandas(scalars_dfs): scalars_pandas_df["int64_col"], name="test_series", dtype="Float64" ) assert bf_result.shape == pd_result.shape - pd.testing.assert_series_equal(bf_result.to_pandas(), pd_result) + bigframes.testing.assert_series_equal(bf_result.to_pandas(), pd_result) def test_series_construct_from_list(): @@ -130,7 +131,7 @@ def test_series_construct_from_list(): # BigQuery DataFrame default indices use nullable Int64 always pd_result.index = pd_result.index.astype("Int64") - pd.testing.assert_series_equal(bf_result, pd_result) + bigframes.testing.assert_series_equal(bf_result, pd_result) def test_series_construct_reindex(): @@ -141,7 +142,7 @@ def test_series_construct_reindex(): # BigQuery DataFrame default indices use nullable Int64 always pd_result.index = pd_result.index.astype("Int64") - pd.testing.assert_series_equal(bf_result, pd_result) + bigframes.testing.assert_series_equal(bf_result, pd_result) def test_series_construct_from_list_w_index(): @@ -155,7 +156,7 @@ def test_series_construct_from_list_w_index(): # BigQuery DataFrame default indices use nullable Int64 always pd_result.index = pd_result.index.astype("Int64") - pd.testing.assert_series_equal(bf_result, pd_result) + bigframes.testing.assert_series_equal(bf_result, pd_result) def test_series_construct_empty(session: bigframes.Session): @@ -176,7 +177,7 @@ def test_series_construct_scalar_no_index(): # BigQuery DataFrame default indices use nullable Int64 always pd_result.index = pd_result.index.astype("Int64") - pd.testing.assert_series_equal(bf_result, pd_result) + bigframes.testing.assert_series_equal(bf_result, pd_result) def test_series_construct_scalar_w_index(): @@ -188,7 +189,7 @@ def test_series_construct_scalar_w_index(): # BigQuery DataFrame default indices use nullable Int64 always pd_result.index = pd_result.index.astype("Int64") - pd.testing.assert_series_equal(bf_result, pd_result) + bigframes.testing.assert_series_equal(bf_result, pd_result) def test_series_construct_nan(): @@ -198,7 +199,7 @@ def test_series_construct_nan(): pd_result.index = pd_result.index.astype("Int64") pd_result = pd_result.astype("Float64") - pd.testing.assert_series_equal(bf_result, pd_result) + bigframes.testing.assert_series_equal(bf_result, pd_result) def test_series_construct_scalar_w_bf_index(): @@ -209,7 +210,7 @@ def test_series_construct_scalar_w_bf_index(): pd_result = pd_result.astype("string[pyarrow]") - pd.testing.assert_series_equal(bf_result, pd_result) + bigframes.testing.assert_series_equal(bf_result, pd_result) def test_series_construct_from_list_escaped_strings(): @@ -225,7 +226,7 @@ def test_series_construct_from_list_escaped_strings(): # BigQuery DataFrame default indices use nullable Int64 always pd_result.index = pd_result.index.astype("Int64") - pd.testing.assert_series_equal(bf_result.to_pandas(), pd_result) + bigframes.testing.assert_series_equal(bf_result.to_pandas(), pd_result) def test_series_construct_geodata(): @@ -240,7 +241,7 @@ def test_series_construct_geodata(): series = bigframes.pandas.Series(pd_series) - pd.testing.assert_series_equal( + bigframes.testing.assert_series_equal( pd_series, series.to_pandas(), check_index_type=False ) @@ -258,7 +259,7 @@ def test_series_construct_w_dtype(dtype): expected = pd.Series(data, dtype=dtype) expected.index = expected.index.astype("Int64") series = bigframes.pandas.Series(data, dtype=dtype) - pd.testing.assert_series_equal(series.to_pandas(), expected) + bigframes.testing.assert_series_equal(series.to_pandas(), expected) def test_series_construct_w_dtype_for_struct(): @@ -275,7 +276,7 @@ def test_series_construct_w_dtype_for_struct(): series = bigframes.pandas.Series(data, dtype=dtype) expected = pd.Series(data, dtype=dtype) expected.index = expected.index.astype("Int64") - pd.testing.assert_series_equal(series.to_pandas(), expected) + bigframes.testing.assert_series_equal(series.to_pandas(), expected) def test_series_construct_w_dtype_for_array_string(): @@ -293,7 +294,7 @@ def test_series_construct_w_dtype_for_array_string(): else: check_dtype = False - pd.testing.assert_series_equal( + bigframes.testing.assert_series_equal( series.to_pandas(), expected, check_dtype=check_dtype ) @@ -313,7 +314,7 @@ def test_series_construct_w_dtype_for_array_struct(): else: check_dtype = False - pd.testing.assert_series_equal( + bigframes.testing.assert_series_equal( series.to_pandas(), expected, check_dtype=check_dtype ) @@ -323,7 +324,7 @@ def test_series_construct_local_unordered_has_sequential_index(unordered_session ["Sun", "Mon", "Tues", "Wed", "Thurs", "Fri", "Sat"], session=unordered_session ) expected: pd.Index = pd.Index([0, 1, 2, 3, 4, 5, 6], dtype=pd.Int64Dtype()) - pd.testing.assert_index_equal(series.index.to_pandas(), expected) + bigframes.testing.assert_index_equal(series.index.to_pandas(), expected) @pytest.mark.parametrize( @@ -385,14 +386,14 @@ def test_series_construct_w_nested_json_dtype(): ), ) - pd.testing.assert_series_equal(s.to_pandas(), s2.to_pandas()) + bigframes.testing.assert_series_equal(s.to_pandas(), s2.to_pandas()) def test_series_keys(scalars_dfs): scalars_df, scalars_pandas_df = scalars_dfs bf_result = scalars_df["int64_col"].keys().to_pandas() pd_result = scalars_pandas_df["int64_col"].keys() - pd.testing.assert_index_equal(bf_result, pd_result) + bigframes.testing.assert_index_equal(bf_result, pd_result) @pytest.mark.parametrize( @@ -537,7 +538,7 @@ def test_series___getitem__(scalars_dfs, index_col, key): scalars_pandas_df = scalars_pandas_df.set_index(index_col, drop=False) bf_result = scalars_df[col_name][key] pd_result = scalars_pandas_df[col_name][key] - pd.testing.assert_series_equal(bf_result.to_pandas(), pd_result) + bigframes.testing.assert_series_equal(bf_result.to_pandas(), pd_result) @pytest.mark.parametrize( @@ -589,7 +590,7 @@ def test_series___setitem__(scalars_dfs, index_col, key, value): bf_series[key] = value pd_series[key] = value - pd.testing.assert_series_equal(bf_series.to_pandas(), pd_series) + bigframes.testing.assert_series_equal(bf_series.to_pandas(), pd_series) @pytest.mark.parametrize( @@ -614,7 +615,7 @@ def test_series___setitem___with_int_key_numeric(scalars_dfs, key, value): bf_series[key] = value pd_series[key] = value - pd.testing.assert_series_equal(bf_series.to_pandas(), pd_series) + bigframes.testing.assert_series_equal(bf_series.to_pandas(), pd_series) def test_series___setitem___with_default_index(scalars_dfs): @@ -711,7 +712,7 @@ def test_series_replace_scalar_scalar(scalars_dfs): ) pd_result = scalars_pandas_df[col_name].replace("Hello, World!", "Howdy, Planet!") - pd.testing.assert_series_equal( + bigframes.testing.assert_series_equal( pd_result, bf_result, ) @@ -727,7 +728,7 @@ def test_series_replace_regex_scalar(scalars_dfs): "^H.l", "Howdy, Planet!", regex=True ) - pd.testing.assert_series_equal( + bigframes.testing.assert_series_equal( pd_result, bf_result, ) @@ -745,7 +746,7 @@ def test_series_replace_list_scalar(scalars_dfs): ["Hello, World!", "T"], "Howdy, Planet!" ) - pd.testing.assert_series_equal( + bigframes.testing.assert_series_equal( pd_result, bf_result, ) @@ -757,7 +758,7 @@ def test_series_replace_nans_with_pd_na(scalars_dfs): bf_result = scalars_df[col_name].replace({pd.NA: "UNKNOWN"}).to_pandas() pd_result = scalars_pandas_df[col_name].replace({pd.NA: "UNKNOWN"}) - pd.testing.assert_series_equal( + bigframes.testing.assert_series_equal( pd_result, bf_result, ) @@ -782,7 +783,7 @@ def test_series_replace_dict(scalars_dfs, replacement_dict): bf_result = scalars_df[col_name].replace(replacement_dict).to_pandas() pd_result = scalars_pandas_df[col_name].replace(replacement_dict) - pd.testing.assert_series_equal( + bigframes.testing.assert_series_equal( pd_result, bf_result, ) @@ -838,7 +839,7 @@ def test_series_dropna(scalars_dfs, ignore_index): col_name = "string_col" bf_result = scalars_df[col_name].dropna(ignore_index=ignore_index).to_pandas() pd_result = scalars_pandas_df[col_name].dropna(ignore_index=ignore_index) - pd.testing.assert_series_equal(pd_result, bf_result, check_index_type=False) + bigframes.testing.assert_series_equal(pd_result, bf_result, check_index_type=False) @pytest.mark.parametrize( @@ -874,7 +875,7 @@ def test_series_agg_multi_string(scalars_dfs): # Pandas may produce narrower numeric types, but bigframes always produces Float64 pd_result = pd_result.astype("Float64") - pd.testing.assert_series_equal(pd_result, bf_result, check_index_type=False) + bigframes.testing.assert_series_equal(pd_result, bf_result, check_index_type=False) @pytest.mark.parametrize( @@ -991,7 +992,7 @@ def test_mode_stat(scalars_df_index, scalars_pandas_df_index, col_name): ## Mode implicitly resets index, and bigframes default indices use nullable Int64 pd_result.index = pd_result.index.astype("Int64") - pd.testing.assert_series_equal( + bigframes.testing.assert_series_equal( bf_result, pd_result, ) @@ -1159,7 +1160,7 @@ def test_mods(scalars_dfs, col_x, col_y, method): else: bf_result = bf_series.astype("Float64").to_pandas() pd_result = getattr(scalars_pandas_df[col_x], method)(scalars_pandas_df[col_y]) - pd.testing.assert_series_equal(pd_result, bf_result) + bigframes.testing.assert_series_equal(pd_result, bf_result) # We work around a pandas bug that doesn't handle correlating nullable dtypes by doing this @@ -1224,16 +1225,16 @@ def test_divmods_series(scalars_dfs, col_x, col_y, method): ) # BigQuery's mod functions return NUMERIC values for non-INT64 inputs. if bf_div_result.dtype == pd.Int64Dtype(): - pd.testing.assert_series_equal(pd_div_result, bf_div_result.to_pandas()) + bigframes.testing.assert_series_equal(pd_div_result, bf_div_result.to_pandas()) else: - pd.testing.assert_series_equal( + bigframes.testing.assert_series_equal( pd_div_result, bf_div_result.astype("Float64").to_pandas() ) if bf_mod_result.dtype == pd.Int64Dtype(): - pd.testing.assert_series_equal(pd_mod_result, bf_mod_result.to_pandas()) + bigframes.testing.assert_series_equal(pd_mod_result, bf_mod_result.to_pandas()) else: - pd.testing.assert_series_equal( + bigframes.testing.assert_series_equal( pd_mod_result, bf_mod_result.astype("Float64").to_pandas() ) @@ -1265,16 +1266,16 @@ def test_divmods_scalars(scalars_dfs, col_x, other, method): pd_div_result, pd_mod_result = getattr(scalars_pandas_df[col_x], method)(other) # BigQuery's mod functions return NUMERIC values for non-INT64 inputs. if bf_div_result.dtype == pd.Int64Dtype(): - pd.testing.assert_series_equal(pd_div_result, bf_div_result.to_pandas()) + bigframes.testing.assert_series_equal(pd_div_result, bf_div_result.to_pandas()) else: - pd.testing.assert_series_equal( + bigframes.testing.assert_series_equal( pd_div_result, bf_div_result.astype("Float64").to_pandas() ) if bf_mod_result.dtype == pd.Int64Dtype(): - pd.testing.assert_series_equal(pd_mod_result, bf_mod_result.to_pandas()) + bigframes.testing.assert_series_equal(pd_mod_result, bf_mod_result.to_pandas()) else: - pd.testing.assert_series_equal( + bigframes.testing.assert_series_equal( pd_mod_result, bf_mod_result.astype("Float64").to_pandas() ) @@ -1347,7 +1348,9 @@ def test_series_add_different_table_default_index( + scalars_df_2_default_index["float64_col"].to_pandas() ) # TODO(swast): Can remove sort_index() when there's default ordering. - pd.testing.assert_series_equal(bf_result.sort_index(), pd_result.sort_index()) + bigframes.testing.assert_series_equal( + bf_result.sort_index(), pd_result.sort_index() + ) def test_series_add_different_table_with_index( @@ -1358,7 +1361,7 @@ def test_series_add_different_table_with_index( # When index values are unique, we can emulate with values from the same # DataFrame. pd_result = scalars_pandas_df["float64_col"] + scalars_pandas_df["int64_col"] - pd.testing.assert_series_equal(bf_result.to_pandas(), pd_result) + bigframes.testing.assert_series_equal(bf_result.to_pandas(), pd_result) def test_reset_index_drop(scalars_df_index, scalars_pandas_df_index): @@ -1377,7 +1380,7 @@ def test_reset_index_drop(scalars_df_index, scalars_pandas_df_index): # BigQuery DataFrames default indices use nullable Int64 always pd_result.index = pd_result.index.astype("Int64") - pd.testing.assert_series_equal(bf_result.to_pandas(), pd_result) + bigframes.testing.assert_series_equal(bf_result.to_pandas(), pd_result) def test_series_reset_index_allow_duplicates(scalars_df_index, scalars_pandas_df_index): @@ -1396,7 +1399,7 @@ def test_series_reset_index_allow_duplicates(scalars_df_index, scalars_pandas_df pd_result.index = pd_result.index.astype(pd.Int64Dtype()) # reset_index should maintain the original ordering. - pd.testing.assert_frame_equal(bf_result, pd_result) + bigframes.testing.assert_frame_equal(bf_result, pd_result) def test_series_reset_index_duplicates_error(scalars_df_index): @@ -1415,7 +1418,7 @@ def test_series_reset_index_inplace(scalars_df_index, scalars_pandas_df_index): # BigQuery DataFrames default indices use nullable Int64 always pd_result.index = pd_result.index.astype("Int64") - pd.testing.assert_series_equal(bf_result.to_pandas(), pd_result) + bigframes.testing.assert_series_equal(bf_result.to_pandas(), pd_result) @pytest.mark.parametrize( @@ -1442,7 +1445,7 @@ def test_reset_index_no_drop(scalars_df_index, scalars_pandas_df_index, name): # BigQuery DataFrames default indices use nullable Int64 always pd_result.index = pd_result.index.astype("Int64") - pd.testing.assert_frame_equal(bf_result.to_pandas(), pd_result) + bigframes.testing.assert_frame_equal(bf_result.to_pandas(), pd_result) def test_copy(scalars_df_index, scalars_pandas_df_index): @@ -1459,7 +1462,7 @@ def test_copy(scalars_df_index, scalars_pandas_df_index): pd_series.loc[0] = 3.4 assert bf_copy.to_pandas().loc[0] != bf_series.to_pandas().loc[0] - pd.testing.assert_series_equal(bf_copy.to_pandas(), pd_copy) + bigframes.testing.assert_series_equal(bf_copy.to_pandas(), pd_copy) def test_isin_raise_error(scalars_df_index, scalars_pandas_df_index): @@ -1500,7 +1503,7 @@ def test_isin(scalars_dfs, col_name, test_set): scalars_df, scalars_pandas_df = scalars_dfs bf_result = scalars_df[col_name].isin(test_set).to_pandas() pd_result = scalars_pandas_df[col_name].isin(test_set).astype("boolean") - pd.testing.assert_series_equal( + bigframes.testing.assert_series_equal( pd_result, bf_result, ) @@ -1540,7 +1543,7 @@ def test_isin_bigframes_values(scalars_dfs, col_name, test_set, session): scalars_df[col_name].isin(series.Series(test_set, session=session)).to_pandas() ) pd_result = scalars_pandas_df[col_name].isin(test_set).astype("boolean") - pd.testing.assert_series_equal( + bigframes.testing.assert_series_equal( pd_result, bf_result, ) @@ -1558,7 +1561,7 @@ def test_isin_bigframes_index(scalars_dfs, session): .isin(pd.Index(["Hello, World!", "Hi", "こんにちは"])) .astype("boolean") ) - pd.testing.assert_series_equal( + bigframes.testing.assert_series_equal( pd_result, bf_result, ) @@ -1603,7 +1606,7 @@ def test_isin_bigframes_values_as_predicate( pd_predicate = scalars_pandas_df[col_name].isin(test_set) pd_result = scalars_pandas_df[pd_predicate] - pd.testing.assert_frame_equal( + bigframes.testing.assert_frame_equal( pd_result.reset_index(), bf_result.reset_index(), ) @@ -1704,10 +1707,10 @@ def test_loc_setitem_cell(scalars_df_index, scalars_pandas_df_index): pd_series.loc[2] = "This value isn't in the test data." bf_result = bf_series.to_pandas() pd_result = pd_series - pd.testing.assert_series_equal(bf_result, pd_result) + bigframes.testing.assert_series_equal(bf_result, pd_result) # Per Copy-on-Write semantics, other references to the original DataFrame # should remain unchanged. - pd.testing.assert_series_equal(bf_original.to_pandas(), pd_original) + bigframes.testing.assert_series_equal(bf_original.to_pandas(), pd_original) def test_at_setitem_row_label_scalar(scalars_dfs): @@ -1718,7 +1721,7 @@ def test_at_setitem_row_label_scalar(scalars_dfs): pd_series.at[1] = 1000 bf_result = bf_series.to_pandas() pd_result = pd_series.astype("Int64") - pd.testing.assert_series_equal(bf_result, pd_result) + bigframes.testing.assert_series_equal(bf_result, pd_result) def test_ne_obj_series(scalars_dfs): @@ -1998,7 +2001,7 @@ def test_series_quantile(scalars_dfs): pd_result = pd_series.quantile([0.0, 0.4, 0.6, 1.0]) bf_result = bf_series.quantile([0.0, 0.4, 0.6, 1.0]) - pd.testing.assert_series_equal( + bigframes.testing.assert_series_equal( pd_result, bf_result.to_pandas(), check_dtype=False, check_index_type=False ) @@ -2047,7 +2050,7 @@ def test_cumprod(scalars_dfs): col_name = "float64_col" bf_result = scalars_df[col_name].cumprod() pd_result = scalars_pandas_df[col_name].cumprod() - pd.testing.assert_series_equal( + bigframes.testing.assert_series_equal( pd_result, bf_result.to_pandas(), ) @@ -2148,7 +2151,7 @@ def test_groupby_level_sum(scalars_dfs): bf_series = scalars_df[col_name].groupby(level=0).sum() pd_series = scalars_pandas_df[col_name].groupby(level=0).sum() # TODO(swast): Update groupby to use index based on group by key(s). - pd.testing.assert_series_equal( + bigframes.testing.assert_series_equal( pd_series.sort_index(), bf_series.to_pandas().sort_index(), ) @@ -2162,7 +2165,7 @@ def test_groupby_level_list_sum(scalars_dfs): bf_series = scalars_df[col_name].groupby(level=["rowindex"]).sum() pd_series = scalars_pandas_df[col_name].groupby(level=["rowindex"]).sum() # TODO(swast): Update groupby to use index based on group by key(s). - pd.testing.assert_series_equal( + bigframes.testing.assert_series_equal( pd_series.sort_index(), bf_series.to_pandas().sort_index(), ) @@ -2279,7 +2282,7 @@ def test_groupby_window_ops(scalars_df_index, scalars_pandas_df_index, operator) scalars_pandas_df_index[col_name].groupby(scalars_pandas_df_index[group_key]) ).astype(bf_series.dtype) - pd.testing.assert_series_equal( + bigframes.testing.assert_series_equal( pd_series, bf_series, ) @@ -2295,7 +2298,7 @@ def test_groupby_window_ops(scalars_df_index, scalars_pandas_df_index, operator) def test_drop_label(scalars_df_index, scalars_pandas_df_index, label, col_name): bf_series = scalars_df_index[col_name].drop(label).to_pandas() pd_series = scalars_pandas_df_index[col_name].drop(label) - pd.testing.assert_series_equal( + bigframes.testing.assert_series_equal( pd_series, bf_series, ) @@ -2305,7 +2308,7 @@ def test_drop_label_list(scalars_df_index, scalars_pandas_df_index): col_name = "int64_col" bf_series = scalars_df_index[col_name].drop([1, 3]).to_pandas() pd_series = scalars_pandas_df_index[col_name].drop([1, 3]) - pd.testing.assert_series_equal( + bigframes.testing.assert_series_equal( pd_series, bf_series, ) @@ -2329,7 +2332,7 @@ def test_drop_label_list(scalars_df_index, scalars_pandas_df_index): def test_drop_duplicates(scalars_df_index, scalars_pandas_df_index, keep, col_name): bf_series = scalars_df_index[col_name].drop_duplicates(keep=keep).to_pandas() pd_series = scalars_pandas_df_index[col_name].drop_duplicates(keep=keep) - pd.testing.assert_series_equal( + bigframes.testing.assert_series_equal( pd_series, bf_series, ) @@ -2366,7 +2369,7 @@ def test_unique(scalars_df_index, scalars_pandas_df_index, col_name): def test_duplicated(scalars_df_index, scalars_pandas_df_index, keep, col_name): bf_series = scalars_df_index[col_name].duplicated(keep=keep).to_pandas() pd_series = scalars_pandas_df_index[col_name].duplicated(keep=keep) - pd.testing.assert_series_equal(pd_series, bf_series, check_dtype=False) + bigframes.testing.assert_series_equal(pd_series, bf_series, check_dtype=False) def test_shape(scalars_dfs): @@ -2500,7 +2503,7 @@ def test_head_then_scalar_operation(scalars_dfs): bf_result = (scalars_df["float64_col"].head(1) + 4).to_pandas() pd_result = scalars_pandas_df["float64_col"].head(1) + 4 - pd.testing.assert_series_equal( + bigframes.testing.assert_series_equal( bf_result, pd_result, ) @@ -2516,7 +2519,7 @@ def test_head_then_series_operation(scalars_dfs): "float64_col" ].head(2) - pd.testing.assert_series_equal( + bigframes.testing.assert_series_equal( bf_result, pd_result, ) @@ -2527,7 +2530,7 @@ def test_series_peek(scalars_dfs): peek_result = scalars_df["float64_col"].peek(n=3, force=False) - pd.testing.assert_series_equal( + bigframes.testing.assert_series_equal( peek_result, scalars_pandas_df["float64_col"].reindex_like(peek_result), ) @@ -2546,7 +2549,7 @@ def test_series_peek_with_large_results_not_allowed(scalars_dfs): # The metrics won't be fully updated when we call query_and_wait. print(session.slot_millis_sum - slot_millis_sum) assert session.slot_millis_sum - slot_millis_sum < 500 - pd.testing.assert_series_equal( + bigframes.testing.assert_series_equal( peek_result, scalars_pandas_df["float64_col"].reindex_like(peek_result), ) @@ -2560,7 +2563,7 @@ def test_series_peek_multi_index(scalars_dfs): pd_series = scalars_pandas_df.set_index(["string_col", "bool_col"])["float64_col"] pd_series.name = ("2-part", "name") peek_result = bf_series.peek(n=3, force=False) - pd.testing.assert_series_equal( + bigframes.testing.assert_series_equal( peek_result, pd_series.reindex_like(peek_result), ) @@ -2572,7 +2575,7 @@ def test_series_peek_filtered(scalars_dfs): n=3, force=False ) pd_result = scalars_pandas_df[scalars_pandas_df.int64_col > 0]["float64_col"] - pd.testing.assert_series_equal( + bigframes.testing.assert_series_equal( peek_result, pd_result.reindex_like(peek_result), ) @@ -2588,7 +2591,7 @@ def test_series_peek_force(scalars_dfs): peek_result = df_filtered.peek(n=3, force=True) pd_cumsum_df = scalars_pandas_df[["int64_col", "int64_too"]].cumsum() pd_result = pd_cumsum_df[pd_cumsum_df.int64_col > 0]["int64_too"] - pd.testing.assert_series_equal( + bigframes.testing.assert_series_equal( peek_result, pd_result.reindex_like(peek_result), ) @@ -2604,7 +2607,7 @@ def test_series_peek_force_float(scalars_dfs): peek_result = df_filtered.peek(n=3, force=True) pd_cumsum_df = scalars_pandas_df[["int64_col", "float64_col"]].cumsum() pd_result = pd_cumsum_df[pd_cumsum_df.float64_col > 0]["float64_col"] - pd.testing.assert_series_equal( + bigframes.testing.assert_series_equal( peek_result, pd_result.reindex_like(peek_result), ) @@ -2616,7 +2619,7 @@ def test_shift(scalars_df_index, scalars_pandas_df_index): # cumsum does not behave well on nullable ints in pandas, produces object type and never ignores NA pd_result = scalars_pandas_df_index[col_name].shift().astype(pd.Int64Dtype()) - pd.testing.assert_series_equal( + bigframes.testing.assert_series_equal( bf_result, pd_result, ) @@ -2627,7 +2630,7 @@ def test_series_ffill(scalars_df_index, scalars_pandas_df_index): bf_result = scalars_df_index[col_name].ffill(limit=1).to_pandas() pd_result = scalars_pandas_df_index[col_name].ffill(limit=1) - pd.testing.assert_series_equal( + bigframes.testing.assert_series_equal( bf_result, pd_result, ) @@ -2638,7 +2641,7 @@ def test_series_bfill(scalars_df_index, scalars_pandas_df_index): bf_result = scalars_df_index[col_name].bfill(limit=2).to_pandas() pd_result = scalars_pandas_df_index[col_name].bfill(limit=2) - pd.testing.assert_series_equal( + bigframes.testing.assert_series_equal( bf_result, pd_result, ) @@ -2653,7 +2656,7 @@ def test_cumsum_int(scalars_df_index, scalars_pandas_df_index): # cumsum does not behave well on nullable ints in pandas, produces object type and never ignores NA pd_result = scalars_pandas_df_index[col_name].cumsum().astype(pd.Int64Dtype()) - pd.testing.assert_series_equal( + bigframes.testing.assert_series_equal( bf_result, pd_result, ) @@ -2674,7 +2677,7 @@ def test_cumsum_int_ordered(scalars_df_index, scalars_pandas_df_index): .astype(pd.Int64Dtype()) ) - pd.testing.assert_series_equal( + bigframes.testing.assert_series_equal( bf_result, pd_result, ) @@ -2693,7 +2696,7 @@ def test_series_nlargest(scalars_df_index, scalars_pandas_df_index, keep): bf_result = scalars_df_index[col_name].nlargest(4, keep=keep).to_pandas() pd_result = scalars_pandas_df_index[col_name].nlargest(4, keep=keep) - pd.testing.assert_series_equal( + bigframes.testing.assert_series_equal( bf_result, pd_result, ) @@ -2716,7 +2719,7 @@ def test_diff(scalars_df_index, scalars_pandas_df_index, periods): .astype(pd.Int64Dtype()) ) - pd.testing.assert_series_equal( + bigframes.testing.assert_series_equal( bf_result, pd_result, ) @@ -2735,7 +2738,7 @@ def test_series_pct_change(scalars_df_index, scalars_pandas_df_index, periods): # cumsum does not behave well on nullable ints in pandas, produces object type and never ignores NA pd_result = scalars_pandas_df_index["int64_col"].ffill().pct_change(periods=periods) - pd.testing.assert_series_equal( + bigframes.testing.assert_series_equal( bf_result, pd_result, ) @@ -2754,7 +2757,7 @@ def test_series_nsmallest(scalars_df_index, scalars_pandas_df_index, keep): bf_result = scalars_df_index[col_name].nsmallest(2, keep=keep).to_pandas() pd_result = scalars_pandas_df_index[col_name].nsmallest(2, keep=keep) - pd.testing.assert_series_equal( + bigframes.testing.assert_series_equal( bf_result, pd_result, ) @@ -2803,7 +2806,7 @@ def test_series_rank( .astype(pd.Float64Dtype()) ) - pd.testing.assert_series_equal( + bigframes.testing.assert_series_equal( bf_result, pd_result, ) @@ -2815,7 +2818,7 @@ def test_cast_float_to_int(scalars_df_index, scalars_pandas_df_index): # cumsum does not behave well on nullable floats in pandas, produces object type and never ignores NA pd_result = scalars_pandas_df_index[col_name].astype(pd.Int64Dtype()) - pd.testing.assert_series_equal( + bigframes.testing.assert_series_equal( bf_result, pd_result, ) @@ -2827,7 +2830,7 @@ def test_cast_float_to_bool(scalars_df_index, scalars_pandas_df_index): # cumsum does not behave well on nullable floats in pandas, produces object type and never ignores NA pd_result = scalars_pandas_df_index[col_name].astype(pd.BooleanDtype()) - pd.testing.assert_series_equal( + bigframes.testing.assert_series_equal( bf_result, pd_result, ) @@ -2845,7 +2848,7 @@ def test_cumsum_nested(scalars_df_index, scalars_pandas_df_index): .astype(pd.Float64Dtype()) ) - pd.testing.assert_series_equal( + bigframes.testing.assert_series_equal( bf_result, pd_result, ) @@ -2874,7 +2877,7 @@ def test_nested_analytic_ops_align(scalars_df_index, scalars_pandas_df_index): + pd_series.expanding().max() ) - pd.testing.assert_series_equal( + bigframes.testing.assert_series_equal( bf_result, pd_result, ) @@ -2890,7 +2893,7 @@ def test_cumsum_int_filtered(scalars_df_index, scalars_pandas_df_index): # cumsum does not behave well on nullable ints in pandas, produces object type and never ignores NA pd_result = pd_col[pd_col > -2].cumsum().astype(pd.Int64Dtype()) - pd.testing.assert_series_equal( + bigframes.testing.assert_series_equal( bf_result, pd_result, ) @@ -2902,7 +2905,7 @@ def test_cumsum_float(scalars_df_index, scalars_pandas_df_index): # cumsum does not behave well on nullable floats in pandas, produces object type and never ignores NA pd_result = scalars_pandas_df_index[col_name].cumsum().astype(pd.Float64Dtype()) - pd.testing.assert_series_equal( + bigframes.testing.assert_series_equal( bf_result, pd_result, ) @@ -2913,7 +2916,7 @@ def test_cummin_int(scalars_df_index, scalars_pandas_df_index): bf_result = scalars_df_index[col_name].cummin().to_pandas() pd_result = scalars_pandas_df_index[col_name].cummin() - pd.testing.assert_series_equal( + bigframes.testing.assert_series_equal( bf_result, pd_result, ) @@ -2924,7 +2927,7 @@ def test_cummax_int(scalars_df_index, scalars_pandas_df_index): bf_result = scalars_df_index[col_name].cummax().to_pandas() pd_result = scalars_pandas_df_index[col_name].cummax() - pd.testing.assert_series_equal( + bigframes.testing.assert_series_equal( bf_result, pd_result, ) @@ -2957,7 +2960,7 @@ def test_value_counts(scalars_dfs, kwargs): bf_result = s.value_counts(**kwargs).to_pandas() pd_result = pd_s.value_counts(**kwargs) - pd.testing.assert_series_equal( + bigframes.testing.assert_series_equal( bf_result, pd_result, ) @@ -2995,7 +2998,7 @@ def test_value_counts_w_cut(scalars_dfs): pd_result = pd_cut.value_counts() pd_result.index = pd_result.index.astype(pd.Int64Dtype()) - pd.testing.assert_series_equal( + bigframes.testing.assert_series_equal( bf_result, pd_result.astype(pd.Int64Dtype()), ) @@ -3006,7 +3009,7 @@ def test_iloc_nested(scalars_df_index, scalars_pandas_df_index): bf_result = scalars_df_index["string_col"].iloc[1:].iloc[1:].to_pandas() pd_result = scalars_pandas_df_index["string_col"].iloc[1:].iloc[1:] - pd.testing.assert_series_equal( + bigframes.testing.assert_series_equal( bf_result, pd_result, ) @@ -3035,7 +3038,7 @@ def test_iloc_nested(scalars_df_index, scalars_pandas_df_index): def test_series_iloc(scalars_df_index, scalars_pandas_df_index, start, stop, step): bf_result = scalars_df_index["string_col"].iloc[start:stop:step].to_pandas() pd_result = scalars_pandas_df_index["string_col"].iloc[start:stop:step] - pd.testing.assert_series_equal( + bigframes.testing.assert_series_equal( bf_result, pd_result, ) @@ -3071,7 +3074,7 @@ def test_series_add_prefix(scalars_df_index, scalars_pandas_df_index): pd_result = scalars_pandas_df_index["int64_too"].add_prefix("prefix_") # Index will be object type in pandas, string type in bigframes, but same values - pd.testing.assert_series_equal( + bigframes.testing.assert_series_equal( bf_result, pd_result, check_index_type=False, @@ -3084,7 +3087,7 @@ def test_series_add_suffix(scalars_df_index, scalars_pandas_df_index): pd_result = scalars_pandas_df_index["int64_too"].add_suffix("_suffix") # Index will be object type in pandas, string type in bigframes, but same values - pd.testing.assert_series_equal( + bigframes.testing.assert_series_equal( bf_result, pd_result, check_index_type=False, @@ -3112,7 +3115,7 @@ def test_series_filter_like(scalars_df_index, scalars_pandas_df_index): pd_result = scalars_pandas_df_index["float64_col"].filter(like="ello") - pd.testing.assert_series_equal( + bigframes.testing.assert_series_equal( bf_result, pd_result, ) @@ -3126,7 +3129,7 @@ def test_series_filter_regex(scalars_df_index, scalars_pandas_df_index): pd_result = scalars_pandas_df_index["float64_col"].filter(regex="^[GH].*") - pd.testing.assert_series_equal( + bigframes.testing.assert_series_equal( bf_result, pd_result, ) @@ -3141,7 +3144,7 @@ def test_series_reindex(scalars_df_index, scalars_pandas_df_index): # Pandas uses int64 instead of Int64 (nullable) dtype. pd_result.index = pd_result.index.astype(pd.Int64Dtype()) - pd.testing.assert_series_equal( + bigframes.testing.assert_series_equal( bf_result, pd_result, ) @@ -3168,7 +3171,7 @@ def test_series_reindex_like(scalars_df_index, scalars_pandas_df_index): # Pandas uses int64 instead of Int64 (nullable) dtype. pd_result.index = pd_result.index.astype(pd.Int64Dtype()) - pd.testing.assert_series_equal( + bigframes.testing.assert_series_equal( bf_result, pd_result, ) @@ -3184,7 +3187,7 @@ def test_where_with_series(scalars_df_index, scalars_pandas_df_index): scalars_pandas_df_index["bool_col"], scalars_pandas_df_index["int64_too"] ) - pd.testing.assert_series_equal( + bigframes.testing.assert_series_equal( bf_result, pd_result, ) @@ -3209,7 +3212,7 @@ def test_where_with_different_indices(scalars_df_index, scalars_pandas_df_index) ) ) - pd.testing.assert_series_equal( + bigframes.testing.assert_series_equal( bf_result, pd_result, ) @@ -3223,7 +3226,7 @@ def test_where_with_default(scalars_df_index, scalars_pandas_df_index): scalars_pandas_df_index["bool_col"] ) - pd.testing.assert_series_equal( + bigframes.testing.assert_series_equal( bf_result, pd_result, ) @@ -3243,7 +3246,7 @@ def _is_positive(x): cond=_is_positive, other=lambda x: x * 10 ) - pd.testing.assert_series_equal( + bigframes.testing.assert_series_equal( bf_result, pd_result, ) @@ -3292,7 +3295,7 @@ def test_clip_filtered_two_sided(scalars_df_index, scalars_pandas_df_index): upper_pd = scalars_pandas_df_index["int64_too"].iloc[:5] + 1 pd_result = col_pd.clip(lower_pd, upper_pd) - pd.testing.assert_series_equal( + bigframes.testing.assert_series_equal( bf_result, pd_result, ) @@ -3307,7 +3310,7 @@ def test_clip_filtered_one_sided(scalars_df_index, scalars_pandas_df_index): lower_pd = scalars_pandas_df_index["int64_too"].iloc[2:] - 1 pd_result = col_pd.clip(lower_pd, None) - pd.testing.assert_series_equal( + bigframes.testing.assert_series_equal( bf_result, pd_result, ) @@ -3337,7 +3340,7 @@ def test_between(scalars_df_index, scalars_pandas_df_index, left, right, inclusi ) pd_result = scalars_pandas_df_index["int64_col"].between(left, right, inclusive) - pd.testing.assert_series_equal( + bigframes.testing.assert_series_equal( bf_result, pd_result.astype(pd.BooleanDtype()), ) @@ -3375,7 +3378,7 @@ def test_series_case_when(scalars_dfs_maybe_ordered): bf_result = bf_series.case_when(bf_conditions).to_pandas() pd_result = pd_series.case_when(pd_conditions) - pd.testing.assert_series_equal( + bigframes.testing.assert_series_equal( bf_result, pd_result.astype(pd.Int64Dtype()), ) @@ -3411,7 +3414,7 @@ def test_series_case_when_change_type(scalars_dfs_maybe_ordered): bf_result = bf_series.case_when(bf_conditions).to_pandas() pd_result = pd_series.case_when(pd_conditions) - pd.testing.assert_series_equal( + bigframes.testing.assert_series_equal( bf_result, pd_result.astype("string[pyarrow]"), ) @@ -3440,7 +3443,7 @@ def test_to_json(gcs_folder, scalars_df_index, scalars_pandas_df_index): scalars_df_index["int64_col"].to_json(path, lines=True, orient="records") gcs_df = pd.read_json(get_first_file_from_wildcard(path), lines=True) - pd.testing.assert_series_equal( + bigframes.testing.assert_series_equal( gcs_df["int64_col"].astype(pd.Int64Dtype()), scalars_pandas_df_index["int64_col"], check_dtype=False, @@ -3453,7 +3456,7 @@ def test_to_csv(gcs_folder, scalars_df_index, scalars_pandas_df_index): scalars_df_index["int64_col"].to_csv(path) gcs_df = pd.read_csv(get_first_file_from_wildcard(path)) - pd.testing.assert_series_equal( + bigframes.testing.assert_series_equal( gcs_df["int64_col"].astype(pd.Int64Dtype()), scalars_pandas_df_index["int64_col"], check_dtype=False, @@ -3582,7 +3585,7 @@ def test_series_values(scalars_df_index, scalars_pandas_df_index): pd_result = scalars_pandas_df_index["int64_too"].values # Numpy isn't equipped to compare non-numeric objects, so convert back to dataframe - pd.testing.assert_series_equal( + bigframes.testing.assert_series_equal( pd.Series(bf_result), pd.Series(pd_result), check_dtype=False ) @@ -3615,7 +3618,7 @@ def test_sort_values(scalars_df_index, scalars_pandas_df_index, ascending, na_po ascending=ascending, na_position=na_position ) - pd.testing.assert_series_equal( + bigframes.testing.assert_series_equal( bf_result, pd_result, ) @@ -3628,7 +3631,7 @@ def test_series_sort_values_inplace(scalars_df_index, scalars_pandas_df_index): bf_result = bf_series.to_pandas() pd_result = scalars_pandas_df_index["int64_col"].sort_values(ascending=False) - pd.testing.assert_series_equal( + bigframes.testing.assert_series_equal( bf_result, pd_result, ) @@ -3647,7 +3650,7 @@ def test_sort_index(scalars_df_index, scalars_pandas_df_index, ascending): ) pd_result = scalars_pandas_df_index["int64_too"].sort_index(ascending=ascending) - pd.testing.assert_series_equal( + bigframes.testing.assert_series_equal( bf_result, pd_result, ) @@ -3659,7 +3662,7 @@ def test_series_sort_index_inplace(scalars_df_index, scalars_pandas_df_index): bf_result = bf_series.to_pandas() pd_result = scalars_pandas_df_index["int64_too"].sort_index(ascending=False) - pd.testing.assert_series_equal( + bigframes.testing.assert_series_equal( bf_result, pd_result, ) @@ -3711,7 +3714,7 @@ def _ten_times(x): cond=lambda x: x > 0, other=_ten_times ) - pd.testing.assert_series_equal( + bigframes.testing.assert_series_equal( bf_result, pd_result, ) @@ -3820,7 +3823,7 @@ def test_astype(scalars_df_index, scalars_pandas_df_index, column, to_type, erro pytest.importorskip("pandas", minversion="2.0.0") bf_result = scalars_df_index[column].astype(to_type, errors=errors).to_pandas() pd_result = scalars_pandas_df_index[column].astype(to_type) - pd.testing.assert_series_equal(bf_result, pd_result) + bigframes.testing.assert_series_equal(bf_result, pd_result) def test_series_astype_python(session): @@ -3831,7 +3834,7 @@ def test_series_astype_python(session): index=pd.Index([0, 1, 2, 3], dtype="Int64"), ) result = session.read_pandas(input).astype(float, errors="null").to_pandas() - pd.testing.assert_series_equal(result, exepcted) + bigframes.testing.assert_series_equal(result, exepcted) def test_astype_safe(session): @@ -3842,7 +3845,7 @@ def test_astype_safe(session): index=pd.Index([0, 1, 2, 3], dtype="Int64"), ) result = session.read_pandas(input).astype("Float64", errors="null").to_pandas() - pd.testing.assert_series_equal(result, exepcted) + bigframes.testing.assert_series_equal(result, exepcted) def test_series_astype_w_invalid_error(session): @@ -3863,7 +3866,7 @@ def test_astype_numeric_to_int(scalars_df_index, scalars_pandas_df_index): .apply(lambda x: None if pd.isna(x) else math.trunc(x)) .astype(to_type) ) - pd.testing.assert_series_equal(bf_result, pd_result) + bigframes.testing.assert_series_equal(bf_result, pd_result) @pytest.mark.parametrize( @@ -3881,7 +3884,7 @@ def test_date_time_astype_int( pytest.importorskip("pandas", minversion="2.0.0") bf_result = scalars_df_index[column].astype(to_type).to_pandas() pd_result = scalars_pandas_df_index[column].astype(to_type) - pd.testing.assert_series_equal(bf_result, pd_result, check_dtype=False) + bigframes.testing.assert_series_equal(bf_result, pd_result, check_dtype=False) assert bf_result.dtype == "Int64" @@ -3892,7 +3895,7 @@ def test_string_astype_int(session): pd_result = pd_series.astype("Int64") bf_result = bf_series.astype("Int64").to_pandas() - pd.testing.assert_series_equal(bf_result, pd_result, check_index_type=False) + bigframes.testing.assert_series_equal(bf_result, pd_result, check_index_type=False) def test_string_astype_float(session): @@ -3905,7 +3908,7 @@ def test_string_astype_float(session): pd_result = pd_series.astype("Float64") bf_result = bf_series.astype("Float64").to_pandas() - pd.testing.assert_series_equal(bf_result, pd_result, check_index_type=False) + bigframes.testing.assert_series_equal(bf_result, pd_result, check_index_type=False) def test_string_astype_date(session): @@ -3925,7 +3928,7 @@ def test_string_astype_date(session): pd_result = pd_series.astype("date32[day][pyarrow]") # type: ignore bf_result = bf_series.astype("date32[day][pyarrow]").to_pandas() - pd.testing.assert_series_equal(bf_result, pd_result, check_index_type=False) + bigframes.testing.assert_series_equal(bf_result, pd_result, check_index_type=False) def test_string_astype_datetime(session): @@ -3938,7 +3941,7 @@ def test_string_astype_datetime(session): pd_result = pd_series.astype(pd.ArrowDtype(pa.timestamp("us"))) bf_result = bf_series.astype(pd.ArrowDtype(pa.timestamp("us"))).to_pandas() - pd.testing.assert_series_equal(bf_result, pd_result, check_index_type=False) + bigframes.testing.assert_series_equal(bf_result, pd_result, check_index_type=False) def test_string_astype_timestamp(session): @@ -3957,7 +3960,7 @@ def test_string_astype_timestamp(session): pd.ArrowDtype(pa.timestamp("us", tz="UTC")) ).to_pandas() - pd.testing.assert_series_equal(bf_result, pd_result, check_index_type=False) + bigframes.testing.assert_series_equal(bf_result, pd_result, check_index_type=False) def test_timestamp_astype_string(session): @@ -3979,7 +3982,7 @@ def test_timestamp_astype_string(session): ) bf_result = bf_series.astype(pa.string()).to_pandas() - pd.testing.assert_series_equal( + bigframes.testing.assert_series_equal( bf_result, expected_result, check_index_type=False, check_dtype=False ) assert bf_result.dtype == "string[pyarrow]" @@ -3995,7 +3998,7 @@ def test_float_astype_json(errors, session): expected_result = pd.Series(data, dtype=dtypes.JSON_DTYPE) expected_result.index = expected_result.index.astype("Int64") - pd.testing.assert_series_equal(bf_result.to_pandas(), expected_result) + bigframes.testing.assert_series_equal(bf_result.to_pandas(), expected_result) def test_float_astype_json_str(session): @@ -4007,7 +4010,7 @@ def test_float_astype_json_str(session): expected_result = pd.Series(data, dtype=dtypes.JSON_DTYPE) expected_result.index = expected_result.index.astype("Int64") - pd.testing.assert_series_equal(bf_result.to_pandas(), expected_result) + bigframes.testing.assert_series_equal(bf_result.to_pandas(), expected_result) @pytest.mark.parametrize("errors", ["raise", "null"]) @@ -4024,7 +4027,7 @@ def test_string_astype_json(errors, session): assert bf_result.dtype == dtypes.JSON_DTYPE pd_result = bf_series.to_pandas().astype(dtypes.JSON_DTYPE) - pd.testing.assert_series_equal(bf_result.to_pandas(), pd_result) + bigframes.testing.assert_series_equal(bf_result.to_pandas(), pd_result) def test_string_astype_json_in_safe_mode(session): @@ -4035,7 +4038,7 @@ def test_string_astype_json_in_safe_mode(session): expected = pd.Series([None], dtype=dtypes.JSON_DTYPE) expected.index = expected.index.astype("Int64") - pd.testing.assert_series_equal(bf_result.to_pandas(), expected) + bigframes.testing.assert_series_equal(bf_result.to_pandas(), expected) def test_string_astype_json_raise_error(session): @@ -4073,7 +4076,7 @@ def test_json_astype_others(data, to_type, errors, session): load_data = [json.loads(item) if item is not None else None for item in data] expected = pd.Series(load_data, dtype=to_type) expected.index = expected.index.astype("Int64") - pd.testing.assert_series_equal(bf_result.to_pandas(), expected) + bigframes.testing.assert_series_equal(bf_result.to_pandas(), expected) @pytest.mark.parametrize( @@ -4107,7 +4110,7 @@ def test_json_astype_others_in_safe_mode(data, to_type, session): expected = pd.Series([None, None], dtype=to_type) expected.index = expected.index.astype("Int64") - pd.testing.assert_series_equal(bf_result.to_pandas(), expected) + bigframes.testing.assert_series_equal(bf_result.to_pandas(), expected) @pytest.mark.parametrize( @@ -4130,7 +4133,7 @@ def test_loc_bool_series_explicit_index(scalars_df_index, scalars_pandas_df_inde bf_result = scalars_df_index.string_col.loc[scalars_df_index.bool_col].to_pandas() pd_result = scalars_pandas_df_index.string_col.loc[scalars_pandas_df_index.bool_col] - pd.testing.assert_series_equal( + bigframes.testing.assert_series_equal( bf_result, pd_result, ) @@ -4191,7 +4194,7 @@ def test_rename(scalars_df_index, scalars_pandas_df_index): bf_result = scalars_df_index.string_col.rename("newname") pd_result = scalars_pandas_df_index.string_col.rename("newname") - pd.testing.assert_series_equal( + bigframes.testing.assert_series_equal( bf_result.to_pandas(), pd_result, ) @@ -4201,7 +4204,7 @@ def test_rename_nonstring(scalars_df_index, scalars_pandas_df_index): bf_result = scalars_df_index.string_col.rename((4, 2)) pd_result = scalars_pandas_df_index.string_col.rename((4, 2)) - pd.testing.assert_series_equal( + bigframes.testing.assert_series_equal( bf_result.to_pandas(), pd_result, ) @@ -4213,7 +4216,7 @@ def test_rename_dict_same_type(scalars_df_index, scalars_pandas_df_index): pd_result.index = pd_result.index.astype("Int64") - pd.testing.assert_series_equal( + bigframes.testing.assert_series_equal( bf_result.to_pandas(), pd_result, ) @@ -4223,7 +4226,7 @@ def test_rename_axis(scalars_df_index, scalars_pandas_df_index): bf_result = scalars_df_index.string_col.rename_axis("newindexname") pd_result = scalars_pandas_df_index.string_col.rename_axis("newindexname") - pd.testing.assert_series_equal( + bigframes.testing.assert_series_equal( bf_result.to_pandas(), pd_result, ) @@ -4240,7 +4243,7 @@ def test_loc_list_string_index(scalars_df_index, scalars_pandas_df_index): bf_result = scalars_df_index.string_col.loc[index_list] pd_result = scalars_pandas_df_index.string_col.loc[index_list] - pd.testing.assert_series_equal( + bigframes.testing.assert_series_equal( bf_result.to_pandas(), pd_result, ) @@ -4252,7 +4255,7 @@ def test_loc_list_integer_index(scalars_df_index, scalars_pandas_df_index): bf_result = scalars_df_index.bool_col.loc[index_list] pd_result = scalars_pandas_df_index.bool_col.loc[index_list] - pd.testing.assert_series_equal( + bigframes.testing.assert_series_equal( bf_result.to_pandas(), pd_result, ) @@ -4268,7 +4271,7 @@ def test_loc_list_multiindex(scalars_df_index, scalars_pandas_df_index): bf_result = scalars_df_multiindex.int64_too.loc[index_list] pd_result = scalars_pandas_df_multiindex.int64_too.loc[index_list] - pd.testing.assert_series_equal( + bigframes.testing.assert_series_equal( bf_result.to_pandas(), pd_result, ) @@ -4280,7 +4283,7 @@ def test_iloc_list(scalars_df_index, scalars_pandas_df_index): bf_result = scalars_df_index.string_col.iloc[index_list] pd_result = scalars_pandas_df_index.string_col.iloc[index_list] - pd.testing.assert_series_equal( + bigframes.testing.assert_series_equal( bf_result.to_pandas(), pd_result, ) @@ -4294,7 +4297,7 @@ def test_iloc_list_nameless(scalars_df_index, scalars_pandas_df_index): pd_series = scalars_pandas_df_index.string_col.rename(None) pd_result = pd_series.iloc[index_list] - pd.testing.assert_series_equal( + bigframes.testing.assert_series_equal( bf_result.to_pandas(), pd_result, ) @@ -4309,7 +4312,7 @@ def test_loc_list_nameless(scalars_df_index, scalars_pandas_df_index): pd_series = scalars_pandas_df_index.string_col.rename(None) pd_result = pd_series.loc[index_list] - pd.testing.assert_series_equal( + bigframes.testing.assert_series_equal( bf_result.to_pandas(), pd_result, ) @@ -4325,7 +4328,7 @@ def test_loc_bf_series_string_index(scalars_df_index, scalars_pandas_df_index): bf_result = scalars_df_index.date_col.loc[bf_string_series] pd_result = scalars_pandas_df_index.date_col.loc[pd_string_series] - pd.testing.assert_series_equal( + bigframes.testing.assert_series_equal( bf_result.to_pandas(), pd_result, ) @@ -4343,7 +4346,7 @@ def test_loc_bf_series_multiindex(scalars_df_index, scalars_pandas_df_index): bf_result = scalars_df_multiindex.int64_too.loc[bf_string_series] pd_result = scalars_pandas_df_multiindex.int64_too.loc[pd_string_series] - pd.testing.assert_series_equal( + bigframes.testing.assert_series_equal( bf_result.to_pandas(), pd_result, ) @@ -4356,7 +4359,7 @@ def test_loc_bf_index_integer_index(scalars_df_index, scalars_pandas_df_index): bf_result = scalars_df_index.date_col.loc[bf_index] pd_result = scalars_pandas_df_index.date_col.loc[pd_index] - pd.testing.assert_series_equal( + bigframes.testing.assert_series_equal( bf_result.to_pandas(), pd_result, ) @@ -4370,7 +4373,7 @@ def test_loc_single_index_with_duplicate(scalars_df_index, scalars_pandas_df_ind index = "Hello, World!" bf_result = scalars_df_index.date_col.loc[index] pd_result = scalars_pandas_df_index.date_col.loc[index] - pd.testing.assert_series_equal( + bigframes.testing.assert_series_equal( bf_result.to_pandas(), pd_result, ) @@ -4455,7 +4458,7 @@ def test_map_dict_input(scalars_dfs): pd_result = pd_result.astype("Int64") # pandas type differences bf_result = scalars_df.string_col.map(local_map) - pd.testing.assert_series_equal( + bigframes.testing.assert_series_equal( bf_result.to_pandas(), pd_result, ) @@ -4474,7 +4477,7 @@ def test_map_series_input(scalars_dfs): pd_result = scalars_pandas_df.int64_too.map(pd_map_series) bf_result = scalars_df.int64_too.map(bf_map_series) - pd.testing.assert_series_equal( + bigframes.testing.assert_series_equal( bf_result.to_pandas(), pd_result, ) @@ -4735,7 +4738,7 @@ def foo(x: int, y: int, df): def test_series_explode(data): s = bigframes.pandas.Series(data) pd_s = s.to_pandas() - pd.testing.assert_series_equal( + bigframes.testing.assert_series_equal( s.explode().to_pandas(), pd_s.explode(), check_index_type=False, @@ -4781,7 +4784,7 @@ def test_series_explode_w_index(index, ignore_index): s = bigframes.pandas.Series(data, index=index) pd_s = pd.Series(data, index=index) # TODO(b/340885567): fix type error - pd.testing.assert_series_equal( + bigframes.testing.assert_series_equal( s.explode(ignore_index=ignore_index).to_pandas(), # type: ignore pd_s.explode(ignore_index=ignore_index).astype(pd.Float64Dtype()), # type: ignore check_index_type=False, @@ -4806,7 +4809,7 @@ def test_series_explode_reserve_order(ignore_index, ordered): # TODO(b/340885567): fix type error pd_res = pd_s.explode(ignore_index=ignore_index).astype(pd.Int64Dtype()) # type: ignore pd_res.index = pd_res.index.astype(pd.Int64Dtype()) - pd.testing.assert_series_equal( + bigframes.testing.assert_series_equal( res if ordered else res.sort_index(), pd_res, ) @@ -4828,7 +4831,7 @@ def test_series_construct_empty_array(): dtype=pd.ArrowDtype(pa.list_(pa.float64())), index=pd.Index([0], dtype=pd.Int64Dtype()), ) - pd.testing.assert_series_equal( + bigframes.testing.assert_series_equal( expected, s.to_pandas(), ) @@ -4845,7 +4848,7 @@ def test_series_construct_empty_array(): ) def test_series_explode_null(data): s = bigframes.pandas.Series(data) - pd.testing.assert_series_equal( + bigframes.testing.assert_series_equal( s.explode().to_pandas(), s.to_pandas().explode(), check_dtype=False, @@ -4870,7 +4873,7 @@ def test_resample(scalars_df_index, scalars_pandas_df_index, append, level, col, ] bf_result = scalars_df_index.resample(rule=rule, level=level).min().to_pandas() pd_result = scalars_pandas_df_index.resample(rule=rule, level=level).min() - pd.testing.assert_series_equal(bf_result, pd_result) + bigframes.testing.assert_series_equal(bf_result, pd_result) def test_series_struct_get_field_by_attribute( @@ -4882,13 +4885,13 @@ def test_series_struct_get_field_by_attribute( bf_series = nested_structs_df["person"] df_series = nested_structs_pandas_df["person"] - pd.testing.assert_series_equal( + bigframes.testing.assert_series_equal( bf_series.address.city.to_pandas(), df_series.struct.field("address").struct.field("city"), check_dtype=False, check_index=False, ) - pd.testing.assert_series_equal( + bigframes.testing.assert_series_equal( bf_series.address.country.to_pandas(), df_series.struct.field("address").struct.field("country"), check_dtype=False, diff --git a/tests/system/small/test_session.py b/tests/system/small/test_session.py index 0501df3f8c..9d37f23f18 100644 --- a/tests/system/small/test_session.py +++ b/tests/system/small/test_session.py @@ -37,6 +37,7 @@ import bigframes.dtypes import bigframes.ml.linear_model import bigframes.session.execution_spec +import bigframes.testing from bigframes.testing import utils all_write_engines = pytest.mark.parametrize( @@ -326,7 +327,7 @@ def test_read_gbq_w_anonymous_query_results_table(session: bigframes.Session): df = session.read_gbq(destination, index_col="name") result = df.to_pandas() expected.index = expected.index.astype(result.index.dtype) - pd.testing.assert_frame_equal(result, expected, check_dtype=False) + bigframes.testing.assert_frame_equal(result, expected, check_dtype=False) def test_read_gbq_w_primary_keys_table( @@ -349,7 +350,7 @@ def test_read_gbq_w_primary_keys_table( # Verify that the DataFrame is already sorted by primary keys. sorted_result = result.sort_values(primary_keys) - pd.testing.assert_frame_equal(result, sorted_result) + bigframes.testing.assert_frame_equal(result, sorted_result) # Verify that we're working from a snapshot rather than a copy of the table. assert "FOR SYSTEM_TIME AS OF" in df.sql @@ -388,7 +389,7 @@ def test_read_gbq_w_primary_keys_table_and_filters( # Verify that the DataFrame is already sorted by primary keys. sorted_result = result.sort_values(primary_keys) - pd.testing.assert_frame_equal(result, sorted_result) + bigframes.testing.assert_frame_equal(result, sorted_result) @pytest.mark.parametrize( @@ -533,7 +534,9 @@ def test_read_gbq_w_ambigous_name( .to_pandas() ) pd_df = pd.DataFrame({"x": [2, 1], "ambiguous_name": [20, 10]}) - pd.testing.assert_frame_equal(df, pd_df, check_dtype=False, check_index_type=False) + bigframes.testing.assert_frame_equal( + df, pd_df, check_dtype=False, check_index_type=False + ) def test_read_gbq_table_clustered_with_filter(session: bigframes.Session): @@ -768,8 +771,8 @@ def test_read_gbq_w_json_and_compare_w_pandas_json(session): dtype=pd.ArrowDtype(db_dtypes.JSONArrowType()), ) pd_df.index = pd_df.index.astype("Int64") - pd.testing.assert_series_equal(df.dtypes, pd_df.dtypes) - pd.testing.assert_series_equal(df["json_col"].to_pandas(), pd_df["json_col"]) + bigframes.testing.assert_series_equal(df.dtypes, pd_df.dtypes) + bigframes.testing.assert_series_equal(df["json_col"].to_pandas(), pd_df["json_col"]) def test_read_gbq_w_json_in_struct(session): @@ -867,7 +870,7 @@ def test_read_pandas(session, scalars_dfs): result = df.to_pandas() expected = scalars_pandas_df - pd.testing.assert_frame_equal(result, expected) + bigframes.testing.assert_frame_equal(result, expected) def test_read_pandas_series(session): @@ -876,7 +879,7 @@ def test_read_pandas_series(session): pd_series = pd.Series([3, 1, 4, 1, 5], dtype=pd.Int64Dtype(), index=idx) bf_series = session.read_pandas(pd_series) - pd.testing.assert_series_equal(bf_series.to_pandas(), pd_series) + bigframes.testing.assert_series_equal(bf_series.to_pandas(), pd_series) def test_read_pandas_index(session): @@ -884,7 +887,7 @@ def test_read_pandas_index(session): pd_idx: pd.Index = pd.Index([2, 7, 1, 2, 8], dtype=pd.Int64Dtype()) bf_idx = session.read_pandas(pd_idx) - pd.testing.assert_index_equal(bf_idx.to_pandas(), pd_idx) + bigframes.testing.assert_index_equal(bf_idx.to_pandas(), pd_idx) def test_read_pandas_w_unsupported_mixed_dtype(session): @@ -914,7 +917,7 @@ def test_read_pandas_col_label_w_space(session: bigframes.Session): ) result = session.read_pandas(expected).to_pandas() - pd.testing.assert_frame_equal( + bigframes.testing.assert_frame_equal( result, expected, check_index_type=False, check_dtype=False ) @@ -922,7 +925,7 @@ def test_read_pandas_col_label_w_space(session: bigframes.Session): def test_read_pandas_multi_index(session, scalars_pandas_df_multi_index): df = session.read_pandas(scalars_pandas_df_multi_index) result = df.to_pandas() - pd.testing.assert_frame_equal(result, scalars_pandas_df_multi_index) + bigframes.testing.assert_frame_equal(result, scalars_pandas_df_multi_index) def test_read_pandas_rowid_exists_adds_suffix(session, scalars_pandas_df_default_index): @@ -930,7 +933,7 @@ def test_read_pandas_rowid_exists_adds_suffix(session, scalars_pandas_df_default pandas_df["rowid"] = np.arange(pandas_df.shape[0]) df_roundtrip = session.read_pandas(pandas_df).to_pandas() - pd.testing.assert_frame_equal(df_roundtrip, pandas_df, check_dtype=False) + bigframes.testing.assert_frame_equal(df_roundtrip, pandas_df, check_dtype=False) def test_read_pandas_tokyo( @@ -969,12 +972,14 @@ def test_read_pandas_timedelta_dataframes(session, write_engine): expected_result = pandas_df.astype(bigframes.dtypes.TIMEDELTA_DTYPE) expected_result.index = expected_result.index.astype(bigframes.dtypes.INT_DTYPE) - pd.testing.assert_frame_equal(actual_result, expected_result) + bigframes.testing.assert_frame_equal(actual_result, expected_result) @all_write_engines def test_read_pandas_timedelta_series(session, write_engine): - expected_series = pd.Series(pd.to_timedelta([1, 2, 3], unit="d")) + expected_series = pd.Series(pd.to_timedelta([1, 2, 3], unit="d")).astype( + "timedelta64[ns]" + ) actual_result = ( session.read_pandas(expected_series, write_engine=write_engine) @@ -982,15 +987,15 @@ def test_read_pandas_timedelta_series(session, write_engine): .astype("timedelta64[ns]") ) - pd.testing.assert_series_equal( + bigframes.testing.assert_series_equal( actual_result, expected_series, check_index_type=False ) @all_write_engines def test_read_pandas_timedelta_index(session, write_engine): - expected_index = pd.to_timedelta( - [1, 2, 3], unit="d" + expected_index = pd.to_timedelta([1, 2, 3], unit="d").astype( + "timedelta64[ns]" ) # to_timedelta returns an index actual_result = ( @@ -999,7 +1004,7 @@ def test_read_pandas_timedelta_index(session, write_engine): .astype("timedelta64[ns]") ) - pd.testing.assert_index_equal(actual_result, expected_index) + bigframes.testing.assert_index_equal(actual_result, expected_index) @all_write_engines @@ -1018,7 +1023,9 @@ def test_read_pandas_json_dataframes(session, write_engine): expected_df, write_engine=write_engine ).to_pandas() - pd.testing.assert_frame_equal(actual_result, expected_df, check_index_type=False) + bigframes.testing.assert_frame_equal( + actual_result, expected_df, check_index_type=False + ) @all_write_engines @@ -1034,7 +1041,7 @@ def test_read_pandas_json_series(session, write_engine): actual_result = session.read_pandas( expected_series, write_engine=write_engine ).to_pandas() - pd.testing.assert_series_equal( + bigframes.testing.assert_series_equal( actual_result, expected_series, check_index_type=False ) @@ -1062,7 +1069,7 @@ def test_read_pandas_json_index(session, write_engine): actual_result = session.read_pandas( expected_index, write_engine=write_engine ).to_pandas() - pd.testing.assert_index_equal(actual_result, expected_index) + bigframes.testing.assert_index_equal(actual_result, expected_index) @pytest.mark.parametrize( @@ -1121,7 +1128,7 @@ def test_read_pandas_w_nested_json(session, write_engine): .to_pandas() .reset_index(drop=True) ) - pd.testing.assert_series_equal(bq_s, pd_s) + bigframes.testing.assert_series_equal(bq_s, pd_s) @pytest.mark.parametrize( @@ -1203,7 +1210,7 @@ def test_read_pandas_w_nested_json_index(session, write_engine): ), ) bq_idx = session.read_pandas(pd_idx, write_engine=write_engine).to_pandas() - pd.testing.assert_index_equal(bq_idx, pd_idx) + bigframes.testing.assert_index_equal(bq_idx, pd_idx) @all_write_engines @@ -1217,13 +1224,13 @@ def test_read_csv_for_gcs_file_w_write_engine(session, df_and_gcs_csv, write_eng write_engine=write_engine, dtype=scalars_df.dtypes.to_dict(), ) - pd.testing.assert_frame_equal(pd_df.to_pandas(), scalars_df.to_pandas()) + bigframes.testing.assert_frame_equal(pd_df.to_pandas(), scalars_df.to_pandas()) if write_engine in ("default", "bigquery_load"): bf_df = session.read_csv( path, engine="bigquery", index_col="rowindex", write_engine=write_engine ) - pd.testing.assert_frame_equal(bf_df.to_pandas(), pd_df.to_pandas()) + bigframes.testing.assert_frame_equal(bf_df.to_pandas(), pd_df.to_pandas()) @pytest.mark.parametrize( @@ -1251,8 +1258,8 @@ def test_read_csv_for_local_file_w_sep(session, df_and_local_csv, sep): pd_df = session.read_csv( buffer, index_col="rowindex", sep=sep, dtype=scalars_df.dtypes.to_dict() ) - pd.testing.assert_frame_equal(bf_df.to_pandas(), scalars_df.to_pandas()) - pd.testing.assert_frame_equal(bf_df.to_pandas(), pd_df.to_pandas()) + bigframes.testing.assert_frame_equal(bf_df.to_pandas(), scalars_df.to_pandas()) + bigframes.testing.assert_frame_equal(bf_df.to_pandas(), pd_df.to_pandas()) @pytest.mark.parametrize( @@ -1284,7 +1291,7 @@ def test_read_csv_for_index_col_w_false(session, df_and_local_csv, index_col): # (b/280889935) or guarantee row ordering. bf_df = bf_df.set_index("rowindex").sort_index() pd_df = pd_df.set_index("rowindex") - pd.testing.assert_frame_equal(bf_df.to_pandas(), pd_df.to_pandas()) + bigframes.testing.assert_frame_equal(bf_df.to_pandas(), pd_df.to_pandas()) @pytest.mark.parametrize( @@ -1307,7 +1314,7 @@ def test_read_csv_for_index_col(session, df_and_gcs_csv, index_col): ) assert bf_df.shape == pd_df.shape - pd.testing.assert_frame_equal(bf_df.to_pandas(), pd_df.to_pandas()) + bigframes.testing.assert_frame_equal(bf_df.to_pandas(), pd_df.to_pandas()) @pytest.mark.parametrize( @@ -1360,7 +1367,7 @@ def test_read_csv_for_gcs_wildcard_path(session, df_and_gcs_csv): assert bf_df.shape == pd_df.shape assert bf_df.columns.tolist() == pd_df.columns.tolist() - pd.testing.assert_frame_equal(bf_df.to_pandas(), pd_df.to_pandas()) + bigframes.testing.assert_frame_equal(bf_df.to_pandas(), pd_df.to_pandas()) def test_read_csv_for_names(session, df_and_gcs_csv_for_two_columns): @@ -1379,7 +1386,7 @@ def test_read_csv_for_names(session, df_and_gcs_csv_for_two_columns): # (b/280889935) or guarantee row ordering. bf_df = bf_df.set_index(names[0]).sort_index() pd_df = pd_df.set_index(names[0]) - pd.testing.assert_frame_equal(bf_df.to_pandas(), pd_df.to_pandas()) + bigframes.testing.assert_frame_equal(bf_df.to_pandas(), pd_df.to_pandas()) def test_read_csv_for_names_more_than_columns_can_raise_error( @@ -1408,7 +1415,7 @@ def test_read_csv_for_names_less_than_columns(session, df_and_gcs_csv_for_two_co # Pandas's index name is None, while BigFrames's index name is "rowindex". pd_df.index.name = "rowindex" - pd.testing.assert_frame_equal(bf_df.to_pandas(), pd_df.to_pandas()) + bigframes.testing.assert_frame_equal(bf_df.to_pandas(), pd_df.to_pandas()) def test_read_csv_for_names_less_than_columns_raise_error_when_index_col_set( @@ -1446,7 +1453,7 @@ def test_read_csv_for_names_and_index_col( assert bf_df.shape == pd_df.shape assert bf_df.columns.tolist() == pd_df.columns.tolist() - pd.testing.assert_frame_equal( + bigframes.testing.assert_frame_equal( bf_df.to_pandas(), pd_df.to_pandas(), check_index_type=False ) @@ -1478,7 +1485,7 @@ def test_read_csv_for_names_and_usecols( # (b/280889935) or guarantee row ordering. bf_df = bf_df.set_index(names[0]).sort_index() pd_df = pd_df.set_index(names[0]) - pd.testing.assert_frame_equal(bf_df.to_pandas(), pd_df.to_pandas()) + bigframes.testing.assert_frame_equal(bf_df.to_pandas(), pd_df.to_pandas()) def test_read_csv_for_names_and_invalid_usecols( @@ -1525,7 +1532,7 @@ def test_read_csv_for_names_and_usecols_and_indexcol( assert bf_df.shape == pd_df.shape assert bf_df.columns.tolist() == pd_df.columns.tolist() - pd.testing.assert_frame_equal(bf_df.to_pandas(), pd_df.to_pandas()) + bigframes.testing.assert_frame_equal(bf_df.to_pandas(), pd_df.to_pandas()) def test_read_csv_for_names_less_than_columns_and_same_usecols( @@ -1548,7 +1555,7 @@ def test_read_csv_for_names_less_than_columns_and_same_usecols( # (b/280889935) or guarantee row ordering. bf_df = bf_df.set_index(names[0]).sort_index() pd_df = pd_df.set_index(names[0]) - pd.testing.assert_frame_equal(bf_df.to_pandas(), pd_df.to_pandas()) + bigframes.testing.assert_frame_equal(bf_df.to_pandas(), pd_df.to_pandas()) def test_read_csv_for_names_less_than_columns_and_mismatched_usecols( @@ -1593,7 +1600,7 @@ def test_read_csv_for_dtype(session, df_and_gcs_csv_for_two_columns): # (b/280889935) or guarantee row ordering. bf_df = bf_df.set_index("rowindex").sort_index() pd_df = pd_df.set_index("rowindex") - pd.testing.assert_frame_equal(bf_df.to_pandas(), pd_df.to_pandas()) + bigframes.testing.assert_frame_equal(bf_df.to_pandas(), pd_df.to_pandas()) def test_read_csv_for_dtype_w_names(session, df_and_gcs_csv_for_two_columns): @@ -1613,7 +1620,7 @@ def test_read_csv_for_dtype_w_names(session, df_and_gcs_csv_for_two_columns): # (b/280889935) or guarantee row ordering. bf_df = bf_df.set_index("a").sort_index() pd_df = pd_df.set_index("a") - pd.testing.assert_frame_equal(bf_df.to_pandas(), pd_df.to_pandas()) + bigframes.testing.assert_frame_equal(bf_df.to_pandas(), pd_df.to_pandas()) @pytest.mark.parametrize( @@ -1680,8 +1687,8 @@ def test_read_csv_for_gcs_file_w_header(session, df_and_gcs_csv, header): # (b/280889935) or guarantee row ordering. bf_df = bf_df.set_index("rowindex").sort_index() pd_df = pd_df.set_index("rowindex") - pd.testing.assert_frame_equal(bf_df.to_pandas(), scalars_df.to_pandas()) - pd.testing.assert_frame_equal(bf_df.to_pandas(), pd_df.to_pandas()) + bigframes.testing.assert_frame_equal(bf_df.to_pandas(), scalars_df.to_pandas()) + bigframes.testing.assert_frame_equal(bf_df.to_pandas(), pd_df.to_pandas()) def test_read_csv_w_usecols(session, df_and_local_csv): @@ -1709,7 +1716,7 @@ def test_read_csv_w_usecols(session, df_and_local_csv): # (b/280889935) or guarantee row ordering. bf_df = bf_df.set_index("rowindex").sort_index() pd_df = pd_df.set_index("rowindex") - pd.testing.assert_frame_equal(bf_df.to_pandas(), pd_df.to_pandas()) + bigframes.testing.assert_frame_equal(bf_df.to_pandas(), pd_df.to_pandas()) def test_read_csv_w_usecols_and_indexcol(session, df_and_local_csv): @@ -1735,7 +1742,7 @@ def test_read_csv_w_usecols_and_indexcol(session, df_and_local_csv): assert bf_df.shape == pd_df.shape assert bf_df.columns.tolist() == pd_df.columns.tolist() - pd.testing.assert_frame_equal(bf_df.to_pandas(), pd_df.to_pandas()) + bigframes.testing.assert_frame_equal(bf_df.to_pandas(), pd_df.to_pandas()) def test_read_csv_w_indexcol_not_in_usecols(session, df_and_local_csv): @@ -1790,10 +1797,10 @@ def test_read_csv_local_w_encoding(session, penguins_pandas_df_default_index): bf_df = session.read_csv( path, engine="bigquery", index_col="rowindex", encoding="ISO-8859-1" ) - pd.testing.assert_frame_equal( + bigframes.testing.assert_frame_equal( bf_df.to_pandas(), penguins_pandas_df_default_index ) - pd.testing.assert_frame_equal(bf_df.to_pandas(), pd_df.to_pandas()) + bigframes.testing.assert_frame_equal(bf_df.to_pandas(), pd_df.to_pandas()) def test_read_pickle_local(session, penguins_pandas_df_default_index, tmp_path): @@ -1802,7 +1809,9 @@ def test_read_pickle_local(session, penguins_pandas_df_default_index, tmp_path): penguins_pandas_df_default_index.to_pickle(path) df = session.read_pickle(path) - pd.testing.assert_frame_equal(penguins_pandas_df_default_index, df.to_pandas()) + bigframes.testing.assert_frame_equal( + penguins_pandas_df_default_index, df.to_pandas() + ) def test_read_pickle_buffer(session, penguins_pandas_df_default_index): @@ -1811,7 +1820,9 @@ def test_read_pickle_buffer(session, penguins_pandas_df_default_index): buffer.seek(0) df = session.read_pickle(buffer) - pd.testing.assert_frame_equal(penguins_pandas_df_default_index, df.to_pandas()) + bigframes.testing.assert_frame_equal( + penguins_pandas_df_default_index, df.to_pandas() + ) def test_read_pickle_series_buffer(session): @@ -1830,7 +1841,9 @@ def test_read_pickle_gcs(session, penguins_pandas_df_default_index, gcs_folder): penguins_pandas_df_default_index.to_pickle(path) df = session.read_pickle(path) - pd.testing.assert_frame_equal(penguins_pandas_df_default_index, df.to_pandas()) + bigframes.testing.assert_frame_equal( + penguins_pandas_df_default_index, df.to_pandas() + ) @pytest.mark.parametrize( @@ -1903,7 +1916,7 @@ def test_read_parquet_gcs( assert df_out.size != 0 pd_df_in = df_in.to_pandas() pd_df_out = df_out.to_pandas() - pd.testing.assert_frame_equal(pd_df_in, pd_df_out) + bigframes.testing.assert_frame_equal(pd_df_in, pd_df_out) @pytest.mark.parametrize( @@ -1953,7 +1966,7 @@ def test_read_parquet_gcs_compressed( assert df_out.size != 0 pd_df_in = df_in.to_pandas() pd_df_out = df_out.to_pandas() - pd.testing.assert_frame_equal(pd_df_in, pd_df_out) + bigframes.testing.assert_frame_equal(pd_df_in, pd_df_out) @pytest.mark.parametrize( @@ -1998,7 +2011,7 @@ def test_read_json_gcs_bq_engine(session, scalars_dfs, gcs_folder): df = session.read_json(read_path, lines=True, orient="records", engine="bigquery") # The auto detects of BigQuery load job does not preserve any ordering of columns for json. - pd.testing.assert_index_equal( + bigframes.testing.assert_index_equal( df.columns.sort_values(), scalars_df.columns.sort_values() ) @@ -2023,7 +2036,7 @@ def test_read_json_gcs_bq_engine(session, scalars_dfs, gcs_folder): ] ) assert df.shape[0] == scalars_df.shape[0] - pd.testing.assert_series_equal( + bigframes.testing.assert_series_equal( df.dtypes.sort_index(), scalars_df.dtypes.sort_index() ) @@ -2049,7 +2062,7 @@ def test_read_json_gcs_default_engine(session, scalars_dfs, gcs_folder): orient="records", ) - pd.testing.assert_index_equal(df.columns, scalars_df.columns) + bigframes.testing.assert_index_equal(df.columns, scalars_df.columns) # The auto detects of BigQuery load job have restrictions to detect the bytes, # numeric and geometry types, so they're skipped here. @@ -2063,7 +2076,7 @@ def test_read_json_gcs_default_engine(session, scalars_dfs, gcs_folder): scalars_df = scalars_df.drop(columns=["date_col", "datetime_col", "time_col"]) assert df.shape[0] == scalars_df.shape[0] - pd.testing.assert_series_equal(df.dtypes, scalars_df.dtypes) + bigframes.testing.assert_series_equal(df.dtypes, scalars_df.dtypes) @pytest.mark.parametrize( @@ -2211,7 +2224,7 @@ def _assert_query_dry_run_stats_are_valid(result: pd.Series): ] ) - pd.testing.assert_index_equal(result.index, expected_index) + bigframes.testing.assert_index_equal(result.index, expected_index) assert result["columnCount"] + result["indexLevel"] > 0 @@ -2231,5 +2244,5 @@ def _assert_table_dry_run_stats_are_valid(result: pd.Series): ] ) - pd.testing.assert_index_equal(result.index, expected_index) + bigframes.testing.assert_index_equal(result.index, expected_index) assert result["columnCount"] == len(result["columnDtypes"]) diff --git a/tests/system/small/test_unordered.py b/tests/system/small/test_unordered.py index c7ff0ca1dd..b024b9eb72 100644 --- a/tests/system/small/test_unordered.py +++ b/tests/system/small/test_unordered.py @@ -261,6 +261,8 @@ def test_resample_with_index(unordered_session, rule, origin, data): assert isinstance(bf_result.index, bpd.DatetimeIndex) assert isinstance(pd_result.index, pd.DatetimeIndex) + # TODO: (b/484364312) + pd_result.index.name = bf_result.index.name pd.testing.assert_frame_equal( bf_result.to_pandas(), pd_result, diff --git a/tests/system/small/test_window.py b/tests/system/small/test_window.py index 29ab581f76..e5d6d44c13 100644 --- a/tests/system/small/test_window.py +++ b/tests/system/small/test_window.py @@ -19,6 +19,7 @@ import pytest from bigframes import dtypes +import bigframes.testing @pytest.fixture(scope="module") @@ -61,7 +62,9 @@ def test_dataframe_rolling_closed_param(rows_rolling_dfs, closed): actual_result = bf_df.rolling(window=3, closed=closed).sum().to_pandas() expected_result = pd_df.rolling(window=3, closed=closed).sum() - pd.testing.assert_frame_equal(actual_result, expected_result, check_dtype=False) + bigframes.testing.assert_frame_equal( + actual_result, expected_result, check_dtype=False + ) @pytest.mark.parametrize("closed", ["left", "right", "both", "neither"]) @@ -80,7 +83,7 @@ def test_dataframe_groupby_rolling_closed_param(rows_rolling_dfs, closed): expected_result = ( pd_df.groupby(pd_df["int64_too"] % 2).rolling(window=3, closed=closed).sum() ) - pd.testing.assert_frame_equal( + bigframes.testing.assert_frame_equal( actual_result[check_columns], expected_result, check_dtype=False ) @@ -91,7 +94,9 @@ def test_dataframe_rolling_on(rows_rolling_dfs): actual_result = bf_df.rolling(window=3, on="int64_too").sum().to_pandas() expected_result = pd_df.rolling(window=3, on="int64_too").sum() - pd.testing.assert_frame_equal(actual_result, expected_result, check_dtype=False) + bigframes.testing.assert_frame_equal( + actual_result, expected_result, check_dtype=False + ) def test_dataframe_rolling_on_invalid_column_raise_error(rows_rolling_dfs): @@ -116,7 +121,7 @@ def test_dataframe_groupby_rolling_on(rows_rolling_dfs): expected_result = ( pd_df.groupby(pd_df["int64_too"] % 2).rolling(window=3, on="float64_col").sum() ) - pd.testing.assert_frame_equal( + bigframes.testing.assert_frame_equal( actual_result[check_columns], expected_result, check_dtype=False ) @@ -135,7 +140,9 @@ def test_series_rolling_closed_param(rows_rolling_series, closed): actual_result = bf_series.rolling(window=3, closed=closed).sum().to_pandas() expected_result = df_series.rolling(window=3, closed=closed).sum() - pd.testing.assert_series_equal(actual_result, expected_result, check_dtype=False) + bigframes.testing.assert_series_equal( + actual_result, expected_result, check_dtype=False + ) @pytest.mark.parametrize("closed", ["left", "right", "both", "neither"]) @@ -152,7 +159,9 @@ def test_series_groupby_rolling_closed_param(rows_rolling_series, closed): expected_result = ( df_series.groupby(df_series % 2).rolling(window=3, closed=closed).sum() ) - pd.testing.assert_series_equal(actual_result, expected_result, check_dtype=False) + bigframes.testing.assert_series_equal( + actual_result, expected_result, check_dtype=False + ) @pytest.mark.parametrize( @@ -186,7 +195,9 @@ def test_series_window_agg_ops(rows_rolling_series, windowing, agg_op): actual_result = agg_op(windowing(bf_series)).to_pandas() expected_result = agg_op(windowing(pd_series)) - pd.testing.assert_series_equal(expected_result, actual_result, check_dtype=False) + bigframes.testing.assert_series_equal( + expected_result, actual_result, check_dtype=False + ) @pytest.mark.parametrize( @@ -225,7 +236,7 @@ def test_dataframe_window_agg_ops(scalars_dfs, windowing, agg_op): bf_result = agg_op(windowing(bf_df)).to_pandas() pd_result = agg_op(windowing(pd_df)) - pd.testing.assert_frame_equal(pd_result, bf_result, check_dtype=False) + bigframes.testing.assert_frame_equal(pd_result, bf_result, check_dtype=False) @pytest.mark.parametrize( @@ -268,7 +279,7 @@ def test_dataframe_window_agg_func(scalars_dfs, windowing, func): pd_result = windowing(pd_df).agg(func) - pd.testing.assert_frame_equal(pd_result, bf_result, check_dtype=False) + bigframes.testing.assert_frame_equal(pd_result, bf_result, check_dtype=False) def test_series_window_agg_single_func(scalars_dfs): @@ -281,7 +292,7 @@ def test_series_window_agg_single_func(scalars_dfs): pd_result = pd_series.expanding().agg("sum") - pd.testing.assert_series_equal(pd_result, bf_result, check_dtype=False) + bigframes.testing.assert_series_equal(pd_result, bf_result, check_dtype=False) def test_series_window_agg_multi_func(scalars_dfs): @@ -294,7 +305,7 @@ def test_series_window_agg_multi_func(scalars_dfs): pd_result = pd_series.expanding().agg(["sum", np.mean]) - pd.testing.assert_frame_equal(pd_result, bf_result, check_dtype=False) + bigframes.testing.assert_frame_equal(pd_result, bf_result, check_dtype=False) @pytest.mark.parametrize("closed", ["left", "right", "both", "neither"]) @@ -320,7 +331,7 @@ def test_series_range_rolling(range_rolling_dfs, window, closed, ascending): .rolling(window=window, closed=closed) .min() ) - pd.testing.assert_series_equal( + bigframes.testing.assert_series_equal( actual_result, expected_result, check_dtype=False, check_index=False ) @@ -341,7 +352,7 @@ def test_series_groupby_range_rolling(range_rolling_dfs): expected_result = ( pd_series.sort_index().groupby(pd_series % 2 == 0).rolling(window="3s").min() ) - pd.testing.assert_series_equal( + bigframes.testing.assert_series_equal( actual_result, expected_result, check_dtype=False, check_index=False ) @@ -372,7 +383,7 @@ def test_dataframe_range_rolling(range_rolling_dfs, window, closed, ascending): # Need to cast Pandas index type. Otherwise it uses DatetimeIndex that # does not exist in BigFrame expected_result.index = expected_result.index.astype(dtypes.TIMESTAMP_DTYPE) - pd.testing.assert_frame_equal( + bigframes.testing.assert_frame_equal( actual_result, expected_result, check_dtype=False, @@ -389,7 +400,7 @@ def test_dataframe_range_rolling_on(range_rolling_dfs): # Need to specify the column order because Pandas (seemingly) # re-arranges columns alphabetically cols = ["ts_col", "int_col", "float_col"] - pd.testing.assert_frame_equal( + bigframes.testing.assert_frame_equal( actual_result[cols], expected_result[cols], check_dtype=False, @@ -413,7 +424,7 @@ def test_dataframe_groupby_range_rolling(range_rolling_dfs): pd_df.sort_values(on).groupby("int_col").rolling(window="3s", on=on).min() ) expected_result.index = expected_result.index.set_names("index", level=1) - pd.testing.assert_frame_equal( + bigframes.testing.assert_frame_equal( actual_result, expected_result, check_dtype=False, @@ -440,7 +451,7 @@ def test_range_rolling_order_info_lookup(range_rolling_dfs): .rolling(window="3s") .count() ) - pd.testing.assert_series_equal( + bigframes.testing.assert_series_equal( actual_result, expected_result, check_dtype=False, check_index=False )