diff --git a/google/cloud/bigquery/enums.py b/google/cloud/bigquery/enums.py index 1b1eb241a..dc67f9674 100644 --- a/google/cloud/bigquery/enums.py +++ b/google/cloud/bigquery/enums.py @@ -480,3 +480,18 @@ class SourceColumnMatch(str, enum.Enum): NAME = "NAME" """Matches by name. This reads the header row as column names and reorders columns to match the field names in the schema.""" + + +class TimestampPrecision(enum.Enum): + """Precision (maximum number of total digits in base 10) for seconds of + TIMESTAMP type.""" + + MICROSECOND = None + """ + Default, for TIMESTAMP type with microsecond precision. + """ + + PICOSECOND = 12 + """ + For TIMESTAMP type with picosecond precision. + """ diff --git a/google/cloud/bigquery/schema.py b/google/cloud/bigquery/schema.py index 456730b00..1809df21f 100644 --- a/google/cloud/bigquery/schema.py +++ b/google/cloud/bigquery/schema.py @@ -196,6 +196,14 @@ class SchemaField(object): Only valid for top-level schema fields (not nested fields). If the type is FOREIGN, this field is required. + + timestamp_precision: Optional[enums.TimestampPrecision] + Precision (maximum number of total digits in base 10) for seconds + of TIMESTAMP type. + + Defaults to `enums.TimestampPrecision.MICROSECOND` (`None`) for + microsecond precision. Use `enums.TimestampPrecision.PICOSECOND` + (`12`) for picosecond precision. """ def __init__( @@ -213,6 +221,7 @@ def __init__( range_element_type: Union[FieldElementType, str, None] = None, rounding_mode: Union[enums.RoundingMode, str, None] = None, foreign_type_definition: Optional[str] = None, + timestamp_precision: Optional[enums.TimestampPrecision] = None, ): self._properties: Dict[str, Any] = { "name": name, @@ -237,6 +246,13 @@ def __init__( if isinstance(policy_tags, PolicyTagList) else None ) + if isinstance(timestamp_precision, enums.TimestampPrecision): + self._properties["timestampPrecision"] = timestamp_precision.value + elif timestamp_precision is not None: + raise ValueError( + "timestamp_precision must be class enums.TimestampPrecision " + f"or None, got {type(timestamp_precision)} instead." + ) if isinstance(range_element_type, str): self._properties["rangeElementType"] = {"type": range_element_type} if isinstance(range_element_type, FieldElementType): @@ -254,15 +270,22 @@ def from_api_repr(cls, api_repr: dict) -> "SchemaField": """Return a ``SchemaField`` object deserialized from a dictionary. Args: - api_repr (Mapping[str, str]): The serialized representation - of the SchemaField, such as what is output by - :meth:`to_api_repr`. + api_repr (dict): The serialized representation of the SchemaField, + such as what is output by :meth:`to_api_repr`. Returns: google.cloud.bigquery.schema.SchemaField: The ``SchemaField`` object. """ placeholder = cls("this_will_be_replaced", "PLACEHOLDER") + # The API would return a string despite we send an integer. To ensure + # success of resending received schema, we convert string to integer + # to ensure consistency. + try: + api_repr["timestampPrecision"] = int(api_repr["timestampPrecision"]) + except (TypeError, KeyError): + pass + # Note: we don't make a copy of api_repr because this can cause # unnecessary slowdowns, especially on deeply nested STRUCT / RECORD # fields. See https://github.com/googleapis/python-bigquery/issues/6 @@ -374,6 +397,16 @@ def policy_tags(self): resource = self._properties.get("policyTags") return PolicyTagList.from_api_repr(resource) if resource is not None else None + @property + def timestamp_precision(self) -> enums.TimestampPrecision: + """Precision (maximum number of total digits in base 10) for seconds of + TIMESTAMP type. + + Returns: + enums.TimestampPrecision: value of TimestampPrecision. + """ + return enums.TimestampPrecision(self._properties.get("timestampPrecision")) + def to_api_repr(self) -> dict: """Return a dictionary representing this schema field. @@ -408,6 +441,8 @@ def _key(self): None if self.policy_tags is None else tuple(sorted(self.policy_tags.names)) ) + timestamp_precision = self._properties.get("timestampPrecision") + return ( self.name, field_type, @@ -417,6 +452,7 @@ def _key(self): self.description, self.fields, policy_tags, + timestamp_precision, ) def to_standard_sql(self) -> standard_sql.StandardSqlField: @@ -467,10 +503,9 @@ def __hash__(self): return hash(self._key()) def __repr__(self): - key = self._key() - policy_tags = key[-1] + *initial_tags, policy_tags, timestamp_precision_tag = self._key() policy_tags_inst = None if policy_tags is None else PolicyTagList(policy_tags) - adjusted_key = key[:-1] + (policy_tags_inst,) + adjusted_key = (*initial_tags, policy_tags_inst, timestamp_precision_tag) return f"{self.__class__.__name__}{adjusted_key}" @@ -530,9 +565,11 @@ def _to_schema_fields(schema): if isinstance(schema, Sequence): # Input is a Sequence (e.g. a list): Process and return a list of SchemaFields return [ - field - if isinstance(field, SchemaField) - else SchemaField.from_api_repr(field) + ( + field + if isinstance(field, SchemaField) + else SchemaField.from_api_repr(field) + ) for field in schema ] diff --git a/tests/system/test_client.py b/tests/system/test_client.py index 6584ca03c..3d32a3634 100644 --- a/tests/system/test_client.py +++ b/tests/system/test_client.py @@ -74,6 +74,16 @@ bigquery.SchemaField("full_name", "STRING", mode="REQUIRED"), bigquery.SchemaField("age", "INTEGER", mode="REQUIRED"), ] +SCHEMA_PICOSECOND = [ + bigquery.SchemaField("full_name", "STRING", mode="REQUIRED"), + bigquery.SchemaField("age", "INTEGER", mode="REQUIRED"), + bigquery.SchemaField( + "time_pico", + "TIMESTAMP", + mode="REQUIRED", + timestamp_precision=enums.TimestampPrecision.PICOSECOND, + ), +] CLUSTERING_SCHEMA = [ bigquery.SchemaField("full_name", "STRING", mode="REQUIRED"), bigquery.SchemaField("age", "INTEGER", mode="REQUIRED"), @@ -631,6 +641,19 @@ def test_create_table_w_time_partitioning_w_clustering_fields(self): self.assertEqual(time_partitioning.field, "transaction_time") self.assertEqual(table.clustering_fields, ["user_email", "store_code"]) + def test_create_table_w_picosecond_timestamp(self): + dataset = self.temp_dataset(_make_dataset_id("create_table")) + table_id = "test_table" + table_arg = Table(dataset.table(table_id), schema=SCHEMA_PICOSECOND) + self.assertFalse(_table_exists(table_arg)) + + table = helpers.retry_403(Config.CLIENT.create_table)(table_arg) + self.to_delete.insert(0, table) + + self.assertTrue(_table_exists(table)) + self.assertEqual(table.table_id, table_id) + self.assertEqual(table.schema, SCHEMA_PICOSECOND) + def test_delete_dataset_with_string(self): dataset_id = _make_dataset_id("delete_table_true_with_string") project = Config.CLIENT.project diff --git a/tests/unit/test_schema.py b/tests/unit/test_schema.py index c63a8312c..f61b22035 100644 --- a/tests/unit/test_schema.py +++ b/tests/unit/test_schema.py @@ -52,6 +52,9 @@ def test_constructor_defaults(self): self.assertIsNone(field.default_value_expression) self.assertEqual(field.rounding_mode, None) self.assertEqual(field.foreign_type_definition, None) + self.assertEqual( + field.timestamp_precision, enums.TimestampPrecision.MICROSECOND + ) def test_constructor_explicit(self): FIELD_DEFAULT_VALUE_EXPRESSION = "This is the default value for this field" @@ -69,6 +72,7 @@ def test_constructor_explicit(self): default_value_expression=FIELD_DEFAULT_VALUE_EXPRESSION, rounding_mode=enums.RoundingMode.ROUNDING_MODE_UNSPECIFIED, foreign_type_definition="INTEGER", + timestamp_precision=enums.TimestampPrecision.PICOSECOND, ) self.assertEqual(field.name, "test") self.assertEqual(field.field_type, "STRING") @@ -87,6 +91,10 @@ def test_constructor_explicit(self): ) self.assertEqual(field.rounding_mode, "ROUNDING_MODE_UNSPECIFIED") self.assertEqual(field.foreign_type_definition, "INTEGER") + self.assertEqual( + field.timestamp_precision, + enums.TimestampPrecision.PICOSECOND, + ) def test_constructor_explicit_none(self): field = self._make_one("test", "STRING", description=None, policy_tags=None) @@ -189,6 +197,23 @@ def test_to_api_repr_with_subfield(self): }, ) + def test_to_api_repr_w_timestamp_precision(self): + field = self._make_one( + "foo", + "TIMESTAMP", + "NULLABLE", + timestamp_precision=enums.TimestampPrecision.PICOSECOND, + ) + self.assertEqual( + field.to_api_repr(), + { + "mode": "NULLABLE", + "name": "foo", + "type": "TIMESTAMP", + "timestampPrecision": 12, + }, + ) + def test_from_api_repr(self): field = self._get_target_class().from_api_repr( { @@ -198,6 +223,7 @@ def test_from_api_repr(self): "name": "foo", "type": "record", "roundingMode": "ROUNDING_MODE_UNSPECIFIED", + "timestampPrecision": 12, } ) self.assertEqual(field.name, "foo") @@ -210,6 +236,10 @@ def test_from_api_repr(self): self.assertEqual(field.fields[0].mode, "NULLABLE") self.assertEqual(field.range_element_type, None) self.assertEqual(field.rounding_mode, "ROUNDING_MODE_UNSPECIFIED") + self.assertEqual( + field.timestamp_precision, + enums.TimestampPrecision.PICOSECOND, + ) def test_from_api_repr_policy(self): field = self._get_target_class().from_api_repr( @@ -264,6 +294,17 @@ def test_from_api_repr_defaults(self): self.assertNotIn("policyTags", field._properties) self.assertNotIn("rangeElementType", field._properties) + def test_from_api_repr_timestamp_precision_str(self): + # The backend would return timestampPrecision field as a string, even + # if we send over an integer. This test verifies we manually converted + # it into integer to ensure resending could succeed. + field = self._get_target_class().from_api_repr( + { + "timestampPrecision": "12", + } + ) + self.assertEqual(field._properties["timestampPrecision"], 12) + def test_name_property(self): name = "lemon-ness" schema_field = self._make_one(name, "INTEGER") @@ -323,6 +364,22 @@ def test_foreign_type_definition_property_str(self): schema_field._properties["foreignTypeDefinition"] = FOREIGN_TYPE_DEFINITION self.assertEqual(schema_field.foreign_type_definition, FOREIGN_TYPE_DEFINITION) + def test_timestamp_precision_unsupported_type(self): + with pytest.raises(ValueError) as e: + self._make_one("test", "TIMESTAMP", timestamp_precision=12) + + assert "timestamp_precision must be class enums.TimestampPrecision" in str( + e.value + ) + + def test_timestamp_precision_property(self): + TIMESTAMP_PRECISION = enums.TimestampPrecision.PICOSECOND + schema_field = self._make_one("test", "TIMESTAMP") + schema_field._properties[ + "timestampPrecision" + ] = enums.TimestampPrecision.PICOSECOND.value + self.assertEqual(schema_field.timestamp_precision, TIMESTAMP_PRECISION) + def test_to_standard_sql_simple_type(self): examples = ( # a few legacy types @@ -637,7 +694,9 @@ def test___hash__not_equals(self): def test___repr__(self): field1 = self._make_one("field1", "STRING") - expected = "SchemaField('field1', 'STRING', 'NULLABLE', None, None, (), None)" + expected = ( + "SchemaField('field1', 'STRING', 'NULLABLE', None, None, (), None, None)" + ) self.assertEqual(repr(field1), expected) def test___repr__evaluable_no_policy_tags(self):