diff --git a/src/iceberg/avro/avro_data_util.cc b/src/iceberg/avro/avro_data_util.cc index 5ac565f61..17bbb394b 100644 --- a/src/iceberg/avro/avro_data_util.cc +++ b/src/iceberg/avro/avro_data_util.cc @@ -431,6 +431,19 @@ Status AppendPrimitiveValueToBuilder(const ::avro::NodePtr& avro_node, return {}; } + case TypeId::kTimestampNs: + case TypeId::kTimestampTzNs: { + if (avro_node->type() != ::avro::AVRO_LONG || + avro_node->logicalType().type() != ::avro::LogicalType::TIMESTAMP_NANOS) { + return InvalidArgument( + "Expected Avro long with TIMESTAMP_NANOS for timestamp field, got: {}", + ToString(avro_node)); + } + auto* builder = internal::checked_cast<::arrow::TimestampBuilder*>(array_builder); + ICEBERG_ARROW_RETURN_NOT_OK(builder->Append(avro_datum.value())); + return {}; + } + default: return InvalidArgument("Unsupported primitive type {} to append avro node {}", projected_field.type()->ToString(), ToString(avro_node)); diff --git a/src/iceberg/avro/avro_direct_decoder.cc b/src/iceberg/avro/avro_direct_decoder.cc index 335b6064e..cb4e869cc 100644 --- a/src/iceberg/avro/avro_direct_decoder.cc +++ b/src/iceberg/avro/avro_direct_decoder.cc @@ -562,6 +562,20 @@ Status DecodePrimitiveValueToBuilder(const ::avro::NodePtr& avro_node, return {}; } + case TypeId::kTimestampNs: + case TypeId::kTimestampTzNs: { + if (avro_node->type() != ::avro::AVRO_LONG || + avro_node->logicalType().type() != ::avro::LogicalType::TIMESTAMP_NANOS) { + return InvalidArgument( + "Expected Avro long with TIMESTAMP_NANOS for timestamp field, got: {}", + ToString(avro_node)); + } + auto* builder = internal::checked_cast<::arrow::TimestampBuilder*>(array_builder); + int64_t value = decoder.decodeLong(); + ICEBERG_ARROW_RETURN_NOT_OK(builder->Append(value)); + return {}; + } + default: return InvalidArgument("Unsupported primitive type {} to decode from avro node {}", projected_field.type()->ToString(), ToString(avro_node)); diff --git a/src/iceberg/avro/avro_schema_util.cc b/src/iceberg/avro/avro_schema_util.cc index 75db6d8d9..3d61d283f 100644 --- a/src/iceberg/avro/avro_schema_util.cc +++ b/src/iceberg/avro/avro_schema_util.cc @@ -194,6 +194,24 @@ Status ToAvroNodeVisitor::Visit(const TimestampTzType& type, ::avro::NodePtr* no return {}; } +Status ToAvroNodeVisitor::Visit(const TimestampNsType& type, ::avro::NodePtr* node) { + *node = std::make_shared<::avro::NodePrimitive>(::avro::AVRO_LONG); + (*node)->setLogicalType(::avro::LogicalType{::avro::LogicalType::TIMESTAMP_NANOS}); + ::avro::CustomAttributes attributes; + attributes.addAttribute(std::string(kAdjustToUtcProp), "false", /*addQuotes=*/false); + (*node)->addCustomAttributesForField(attributes); + return {}; +} + +Status ToAvroNodeVisitor::Visit(const TimestampTzNsType& type, ::avro::NodePtr* node) { + *node = std::make_shared<::avro::NodePrimitive>(::avro::AVRO_LONG); + (*node)->setLogicalType(::avro::LogicalType{::avro::LogicalType::TIMESTAMP_NANOS}); + ::avro::CustomAttributes attributes; + attributes.addAttribute(std::string(kAdjustToUtcProp), "true", /*addQuotes=*/false); + (*node)->addCustomAttributesForField(attributes); + return {}; +} + Status ToAvroNodeVisitor::Visit(const StringType& type, ::avro::NodePtr* node) { *node = std::make_shared<::avro::NodePrimitive>(::avro::AVRO_STRING); return {}; @@ -548,6 +566,20 @@ Status ValidateAvroSchemaEvolution(const Type& expected_type, return {}; } break; + case TypeId::kTimestampNs: + if (avro_node->type() == ::avro::AVRO_LONG && + HasLogicalType(avro_node, ::avro::LogicalType::TIMESTAMP_NANOS) && + GetAdjustToUtc(avro_node).value_or("false") == "false") { + return {}; + } + break; + case TypeId::kTimestampTzNs: + if (avro_node->type() == ::avro::AVRO_LONG && + HasLogicalType(avro_node, ::avro::LogicalType::TIMESTAMP_NANOS) && + GetAdjustToUtc(avro_node).value_or("false") == "true") { + return {}; + } + break; case TypeId::kString: if (avro_node->type() == ::avro::AVRO_STRING) { return {}; diff --git a/src/iceberg/avro/avro_schema_util_internal.h b/src/iceberg/avro/avro_schema_util_internal.h index bdfbf135a..e3b7a7ffd 100644 --- a/src/iceberg/avro/avro_schema_util_internal.h +++ b/src/iceberg/avro/avro_schema_util_internal.h @@ -52,6 +52,8 @@ class ToAvroNodeVisitor { Status Visit(const TimeType& type, ::avro::NodePtr* node); Status Visit(const TimestampType& type, ::avro::NodePtr* node); Status Visit(const TimestampTzType& type, ::avro::NodePtr* node); + Status Visit(const TimestampNsType& type, ::avro::NodePtr* node); + Status Visit(const TimestampTzNsType& type, ::avro::NodePtr* node); Status Visit(const StringType& type, ::avro::NodePtr* node); Status Visit(const UuidType& type, ::avro::NodePtr* node); Status Visit(const FixedType& type, ::avro::NodePtr* node); diff --git a/src/iceberg/expression/json_serde.cc b/src/iceberg/expression/json_serde.cc index 38e7a8e2f..065f41cf2 100644 --- a/src/iceberg/expression/json_serde.cc +++ b/src/iceberg/expression/json_serde.cc @@ -272,6 +272,11 @@ Result ToJson(const Literal& literal) { case TypeId::kTimestampTz: return nlohmann::json( TransformUtil::HumanTimestampWithZone(std::get(value))); + case TypeId::kTimestampNs: + return nlohmann::json(TransformUtil::HumanTimestampNs(std::get(value))); + case TypeId::kTimestampTzNs: + return nlohmann::json( + TransformUtil::HumanTimestampNsWithZone(std::get(value))); case TypeId::kFloat: return nlohmann::json(std::get(value)); case TypeId::kDouble: @@ -390,6 +395,26 @@ Result LiteralFromJson(const nlohmann::json& json, const Type* type) { return Literal::TimestampTz(micros); } + case TypeId::kTimestampNs: { + if (!json.is_string()) [[unlikely]] { + return JsonParseError("Cannot parse {} as a timestamp_ns value", + SafeDumpJson(json)); + } + ICEBERG_ASSIGN_OR_RAISE(auto nanos, + TransformUtil::ParseTimestampNs(json.get())); + return Literal::TimestampNs(nanos); + } + + case TypeId::kTimestampTzNs: { + if (!json.is_string()) [[unlikely]] { + return JsonParseError("Cannot parse {} as a timestamptz_ns value", + SafeDumpJson(json)); + } + ICEBERG_ASSIGN_OR_RAISE( + auto nanos, TransformUtil::ParseTimestampNsWithZone(json.get())); + return Literal::TimestampTzNs(nanos); + } + case TypeId::kUuid: { if (!json.is_string()) [[unlikely]] { return JsonParseError("Cannot parse {} as a uuid value", SafeDumpJson(json)); diff --git a/src/iceberg/expression/literal.cc b/src/iceberg/expression/literal.cc index 9b8060a19..a514e0e55 100644 --- a/src/iceberg/expression/literal.cc +++ b/src/iceberg/expression/literal.cc @@ -150,6 +150,10 @@ Result LiteralCaster::CastFromLong( return Literal::Timestamp(long_val); case TypeId::kTimestampTz: return Literal::TimestampTz(long_val); + case TypeId::kTimestampNs: + return Literal::TimestampNs(long_val); + case TypeId::kTimestampTzNs: + return Literal::TimestampTzNs(long_val); default: return NotSupported("Cast from Long to {} is not supported", target_type->ToString()); @@ -215,6 +219,15 @@ Result LiteralCaster::CastFromString( TransformUtil::ParseTimestampWithZone(str_val)); return Literal::TimestampTz(micros); } + case TypeId::kTimestampNs: { + ICEBERG_ASSIGN_OR_RAISE(auto nanos, TransformUtil::ParseTimestampNs(str_val)); + return Literal::TimestampNs(nanos); + } + case TypeId::kTimestampTzNs: { + ICEBERG_ASSIGN_OR_RAISE(auto nanos, + TransformUtil::ParseTimestampNsWithZone(str_val)); + return Literal::TimestampTzNs(nanos); + } case TypeId::kBinary: { ICEBERG_ASSIGN_OR_RAISE(auto bytes, StringUtils::HexStringToBytes(str_val)); return Literal::Binary(std::move(bytes)); @@ -250,14 +263,27 @@ Result LiteralCaster::CastFromString( Result LiteralCaster::CastFromTimestamp( const Literal& literal, const std::shared_ptr& target_type) { auto timestamp_val = std::get(literal.value_); + const auto& source_timestamp = + internal::checked_cast(*literal.type()); + const bool source_is_nanos = source_timestamp.time_unit() == TimeUnit::kNanosecond; switch (target_type->type_id()) { case TypeId::kDate: { ICEBERG_ASSIGN_OR_RAISE(auto days, TemporalUtils::ExtractDay(literal)); return Literal::Date(std::get(days.value())); } + case TypeId::kTimestamp: + return source_is_nanos ? Literal::Timestamp(timestamp_val / 1000) + : Literal::Timestamp(timestamp_val); case TypeId::kTimestampTz: - return Literal::TimestampTz(timestamp_val); + return source_is_nanos ? Literal::TimestampTz(timestamp_val / 1000) + : Literal::TimestampTz(timestamp_val); + case TypeId::kTimestampNs: + return source_is_nanos ? Literal::TimestampNs(timestamp_val) + : Literal::TimestampNs(timestamp_val * 1000); + case TypeId::kTimestampTzNs: + return source_is_nanos ? Literal::TimestampTzNs(timestamp_val) + : Literal::TimestampTzNs(timestamp_val * 1000); default: return NotSupported("Cast from Timestamp to {} is not supported", target_type->ToString()); @@ -266,15 +292,28 @@ Result LiteralCaster::CastFromTimestamp( Result LiteralCaster::CastFromTimestampTz( const Literal& literal, const std::shared_ptr& target_type) { - auto micros = std::get(literal.value_); + auto timestamp_val = std::get(literal.value_); + const auto& source_timestamp = + internal::checked_cast(*literal.type()); + const bool source_is_nanos = source_timestamp.time_unit() == TimeUnit::kNanosecond; switch (target_type->type_id()) { case TypeId::kDate: { ICEBERG_ASSIGN_OR_RAISE(auto days, TemporalUtils::ExtractDay(literal)); return Literal::Date(std::get(days.value())); } + case TypeId::kTimestampTz: + return source_is_nanos ? Literal::TimestampTz(timestamp_val / 1000) + : Literal::TimestampTz(timestamp_val); case TypeId::kTimestamp: - return Literal::Timestamp(micros); + return source_is_nanos ? Literal::Timestamp(timestamp_val / 1000) + : Literal::Timestamp(timestamp_val); + case TypeId::kTimestampNs: + return source_is_nanos ? Literal::TimestampNs(timestamp_val) + : Literal::TimestampNs(timestamp_val * 1000); + case TypeId::kTimestampTzNs: + return source_is_nanos ? Literal::TimestampTzNs(timestamp_val) + : Literal::TimestampTzNs(timestamp_val * 1000); default: return NotSupported("Cast from TimestampTz to {} is not supported", target_type->ToString()); @@ -329,6 +368,10 @@ Literal Literal::Timestamp(int64_t value) { return {Value{value}, timestamp()}; Literal Literal::TimestampTz(int64_t value) { return {Value{value}, timestamp_tz()}; } +Literal Literal::TimestampNs(int64_t value) { return {Value{value}, timestamp_ns()}; } + +Literal Literal::TimestampTzNs(int64_t value) { return {Value{value}, timestamptz_ns()}; } + Literal Literal::Float(float value) { return {Value{value}, float32()}; } Literal Literal::Double(double value) { return {Value{value}, float64()}; } @@ -395,8 +438,11 @@ bool Comparable(TypeId lhs, TypeId rhs) { case TypeId::kLong: case TypeId::kTimestamp: case TypeId::kTimestampTz: + case TypeId::kTimestampNs: + case TypeId::kTimestampTzNs: return rhs == TypeId::kLong || rhs == TypeId::kTimestamp || - rhs == TypeId::kTimestampTz; + rhs == TypeId::kTimestampTz || rhs == TypeId::kTimestampNs || + rhs == TypeId::kTimestampTzNs; default: return lhs == rhs; } @@ -439,7 +485,9 @@ std::partial_ordering Literal::operator<=>(const Literal& other) const { case TypeId::kLong: case TypeId::kTime: case TypeId::kTimestamp: - case TypeId::kTimestampTz: { + case TypeId::kTimestampTz: + case TypeId::kTimestampNs: + case TypeId::kTimestampTzNs: { auto this_val = std::get(value_); auto other_val = std::get(other.value_); return this_val <=> other_val; @@ -548,7 +596,9 @@ std::string Literal::ToString() const { } case TypeId::kTime: case TypeId::kTimestamp: - case TypeId::kTimestampTz: { + case TypeId::kTimestampTz: + case TypeId::kTimestampNs: + case TypeId::kTimestampTzNs: { return std::to_string(std::get(value_)); } case TypeId::kDate: { @@ -613,6 +663,10 @@ Result LiteralCaster::CastTo(const Literal& literal, return CastFromTimestamp(literal, target_type); case TypeId::kTimestampTz: return CastFromTimestampTz(literal, target_type); + case TypeId::kTimestampNs: + return CastFromTimestamp(literal, target_type); + case TypeId::kTimestampTzNs: + return CastFromTimestampTz(literal, target_type); default: break; } diff --git a/src/iceberg/expression/literal.h b/src/iceberg/expression/literal.h index b07aaa5e0..dbcabb521 100644 --- a/src/iceberg/expression/literal.h +++ b/src/iceberg/expression/literal.h @@ -73,6 +73,8 @@ class ICEBERG_EXPORT Literal : public util::Formattable { static Literal Time(int64_t value); static Literal Timestamp(int64_t value); static Literal TimestampTz(int64_t value); + static Literal TimestampNs(int64_t value); + static Literal TimestampTzNs(int64_t value); static Literal Float(float value); static Literal Double(double value); static Literal String(std::string value); @@ -199,6 +201,8 @@ DEFINE_LITERAL_TRAIT(kLong, int64_t) DEFINE_LITERAL_TRAIT(kTime, int64_t) DEFINE_LITERAL_TRAIT(kTimestamp, int64_t) DEFINE_LITERAL_TRAIT(kTimestampTz, int64_t) +DEFINE_LITERAL_TRAIT(kTimestampNs, int64_t) +DEFINE_LITERAL_TRAIT(kTimestampTzNs, int64_t) DEFINE_LITERAL_TRAIT(kFloat, float) DEFINE_LITERAL_TRAIT(kDouble, double) DEFINE_LITERAL_TRAIT(kDecimal, Decimal) diff --git a/src/iceberg/expression/predicate.cc b/src/iceberg/expression/predicate.cc index 3c92c2fcb..307c3c609 100644 --- a/src/iceberg/expression/predicate.cc +++ b/src/iceberg/expression/predicate.cc @@ -494,10 +494,10 @@ bool BoundLiteralPredicate::Equals(const Expression& other) const { } } - // TODO(gangwu): add TypeId::kTimestampNano static const std::unordered_set kIntegralTypes = { - TypeId::kInt, TypeId::kLong, TypeId::kDate, - TypeId::kTime, TypeId::kTimestamp, TypeId::kTimestampTz}; + TypeId::kInt, TypeId::kLong, TypeId::kDate, + TypeId::kTime, TypeId::kTimestamp, TypeId::kTimestampTz, + TypeId::kTimestampNs, TypeId::kTimestampTzNs}; if (kIntegralTypes.contains(term_->type()->type_id()) && term_->Equals(*other_pred->term())) { diff --git a/src/iceberg/json_serde.cc b/src/iceberg/json_serde.cc index 2d8c22255..3944e510c 100644 --- a/src/iceberg/json_serde.cc +++ b/src/iceberg/json_serde.cc @@ -363,6 +363,10 @@ nlohmann::json ToJson(const Type& type) { return "timestamp"; case TypeId::kTimestampTz: return "timestamptz"; + case TypeId::kTimestampNs: + return "timestamp_ns"; + case TypeId::kTimestampTzNs: + return "timestamptz_ns"; case TypeId::kString: return "string"; case TypeId::kBinary: @@ -488,6 +492,10 @@ Result> TypeFromJson(const nlohmann::json& json) { return std::make_unique(); } else if (type_str == "timestamptz") { return std::make_unique(); + } else if (type_str == "timestamp_ns") { + return std::make_unique(); + } else if (type_str == "timestamptz_ns") { + return std::make_unique(); } else if (type_str == "string") { return std::make_unique(); } else if (type_str == "binary") { diff --git a/src/iceberg/manifest/manifest_adapter.cc b/src/iceberg/manifest/manifest_adapter.cc index cf0a0515b..b37d82702 100644 --- a/src/iceberg/manifest/manifest_adapter.cc +++ b/src/iceberg/manifest/manifest_adapter.cc @@ -222,6 +222,8 @@ Status ManifestEntryAdapter::AppendPartitionValues( case TypeId::kTime: case TypeId::kTimestamp: case TypeId::kTimestampTz: + case TypeId::kTimestampNs: + case TypeId::kTimestampTzNs: ICEBERG_RETURN_UNEXPECTED( AppendField(child_array, std::get(partition_value.value()))); break; diff --git a/src/iceberg/parquet/parquet_schema_util.cc b/src/iceberg/parquet/parquet_schema_util.cc index e9574a48c..849bbd1f8 100644 --- a/src/iceberg/parquet/parquet_schema_util.cc +++ b/src/iceberg/parquet/parquet_schema_util.cc @@ -125,6 +125,26 @@ Status ValidateParquetSchemaEvolution( } } break; + case TypeId::kTimestampNs: + if (arrow_type->id() == ::arrow::Type::TIMESTAMP) { + const auto& timestamp_type = + internal::checked_cast(*arrow_type); + if (timestamp_type.unit() == ::arrow::TimeUnit::NANO && + timestamp_type.timezone().empty()) { + return {}; + } + } + break; + case TypeId::kTimestampTzNs: + if (arrow_type->id() == ::arrow::Type::TIMESTAMP) { + const auto& timestamp_type = + internal::checked_cast(*arrow_type); + if (timestamp_type.unit() == ::arrow::TimeUnit::NANO && + !timestamp_type.timezone().empty()) { + return {}; + } + } + break; case TypeId::kString: if (arrow_type->id() == ::arrow::Type::STRING) { return {}; diff --git a/src/iceberg/row/partition_values.cc b/src/iceberg/row/partition_values.cc index 712c801ad..fcd4691d0 100644 --- a/src/iceberg/row/partition_values.cc +++ b/src/iceberg/row/partition_values.cc @@ -57,6 +57,8 @@ Result PartitionValues::GetField(size_t pos) const { case TypeId::kTime: case TypeId::kTimestamp: case TypeId::kTimestampTz: + case TypeId::kTimestampNs: + case TypeId::kTimestampTzNs: return Scalar{std::get(literal.value())}; case TypeId::kFloat: return Scalar{std::get(literal.value())}; diff --git a/src/iceberg/row/struct_like.cc b/src/iceberg/row/struct_like.cc index 5b814204d..355af84c5 100644 --- a/src/iceberg/row/struct_like.cc +++ b/src/iceberg/row/struct_like.cc @@ -45,6 +45,8 @@ Result LiteralToScalar(const Literal& literal) { case TypeId::kTime: case TypeId::kTimestamp: case TypeId::kTimestampTz: + case TypeId::kTimestampNs: + case TypeId::kTimestampTzNs: return Scalar{std::get(literal.value())}; case TypeId::kFloat: return Scalar{std::get(literal.value())}; @@ -152,6 +154,10 @@ Result StructLikeAccessor::GetLiteral(const StructLike& struct_like) co return Literal::Timestamp(std::get(scalar)); case TypeId::kTimestampTz: return Literal::TimestampTz(std::get(scalar)); + case TypeId::kTimestampNs: + return Literal::TimestampNs(std::get(scalar)); + case TypeId::kTimestampTzNs: + return Literal::TimestampTzNs(std::get(scalar)); case TypeId::kFixed: { const auto& fixed_data = std::get(scalar); return Literal::Fixed(std::vector(fixed_data.cbegin(), fixed_data.cend())); diff --git a/src/iceberg/schema_internal.cc b/src/iceberg/schema_internal.cc index dedb603e2..bdd5b859f 100644 --- a/src/iceberg/schema_internal.cc +++ b/src/iceberg/schema_internal.cc @@ -123,6 +123,15 @@ ArrowErrorCode ToArrowSchema(const Type& type, bool optional, std::string_view n NANOARROW_RETURN_NOT_OK(ArrowSchemaSetTypeDateTime( schema, NANOARROW_TYPE_TIMESTAMP, NANOARROW_TIME_UNIT_MICRO, "UTC")); } break; + case TypeId::kTimestampNs: { + NANOARROW_RETURN_NOT_OK(ArrowSchemaSetTypeDateTime(schema, NANOARROW_TYPE_TIMESTAMP, + NANOARROW_TIME_UNIT_NANO, + /*timezone=*/nullptr)); + } break; + case TypeId::kTimestampTzNs: { + NANOARROW_RETURN_NOT_OK(ArrowSchemaSetTypeDateTime( + schema, NANOARROW_TYPE_TIMESTAMP, NANOARROW_TIME_UNIT_NANO, "UTC")); + } break; case TypeId::kString: NANOARROW_RETURN_NOT_OK(ArrowSchemaSetType(schema, NANOARROW_TYPE_STRING)); break; @@ -270,10 +279,18 @@ Result> FromArrowSchema(const ArrowSchema& schema) { case NANOARROW_TYPE_TIMESTAMP: { bool with_timezone = schema_view.timezone != nullptr && std::strlen(schema_view.timezone) > 0; - if (schema_view.time_unit != NANOARROW_TIME_UNIT_MICRO) { + if (schema_view.time_unit != NANOARROW_TIME_UNIT_MICRO && + schema_view.time_unit != NANOARROW_TIME_UNIT_NANO) { return InvalidSchema("Unsupported time unit for Arrow timestamp type: {}", static_cast(schema_view.time_unit)); } + if (schema_view.time_unit == NANOARROW_TIME_UNIT_NANO) { + if (with_timezone) { + return iceberg::timestamptz_ns(); + } else { + return iceberg::timestamp_ns(); + } + } if (with_timezone) { return iceberg::timestamp_tz(); } else { diff --git a/src/iceberg/test/arrow_test.cc b/src/iceberg/test/arrow_test.cc index 12039280e..dcfdb6b56 100644 --- a/src/iceberg/test/arrow_test.cc +++ b/src/iceberg/test/arrow_test.cc @@ -92,8 +92,14 @@ INSTANTIATE_TEST_SUITE_P( .arrow_type = ::arrow::time64(arrow::TimeUnit::MICRO)}, ToArrowSchemaParam{.iceberg_type = iceberg::timestamp(), .arrow_type = ::arrow::timestamp(arrow::TimeUnit::MICRO)}, - ToArrowSchemaParam{.iceberg_type = iceberg::timestamp(), - .arrow_type = ::arrow::timestamp(arrow::TimeUnit::MICRO)}, + ToArrowSchemaParam{ + .iceberg_type = iceberg::timestamp_tz(), + .arrow_type = ::arrow::timestamp(arrow::TimeUnit::MICRO, "UTC")}, + ToArrowSchemaParam{.iceberg_type = iceberg::timestamp_ns(), + .arrow_type = ::arrow::timestamp(arrow::TimeUnit::NANO)}, + ToArrowSchemaParam{ + .iceberg_type = iceberg::timestamptz_ns(), + .arrow_type = ::arrow::timestamp(arrow::TimeUnit::NANO, "UTC")}, ToArrowSchemaParam{.iceberg_type = iceberg::string(), .arrow_type = ::arrow::utf8()}, ToArrowSchemaParam{.iceberg_type = iceberg::binary(), @@ -289,6 +295,11 @@ INSTANTIATE_TEST_SUITE_P( FromArrowSchemaParam{ .arrow_type = ::arrow::timestamp(arrow::TimeUnit::MICRO, "UTC"), .iceberg_type = std::make_shared()}, + FromArrowSchemaParam{.arrow_type = ::arrow::timestamp(arrow::TimeUnit::NANO), + .iceberg_type = iceberg::timestamp_ns()}, + FromArrowSchemaParam{ + .arrow_type = ::arrow::timestamp(arrow::TimeUnit::NANO, "UTC"), + .iceberg_type = iceberg::timestamptz_ns()}, FromArrowSchemaParam{.arrow_type = ::arrow::utf8(), .iceberg_type = iceberg::string()}, FromArrowSchemaParam{.arrow_type = ::arrow::binary(), diff --git a/src/iceberg/test/avro_data_test.cc b/src/iceberg/test/avro_data_test.cc index c0e42f67b..2979ad9bd 100644 --- a/src/iceberg/test/avro_data_test.cc +++ b/src/iceberg/test/avro_data_test.cc @@ -298,6 +298,30 @@ const std::vector kPrimitiveTestCases = { .expected_json = R"([{"a": 1672531200000000}, {"a": 1672531201000000}, {"a": 1672531202000000}])", }, + { + .name = "TimestampNs", + .projected_type = iceberg::timestamp_ns(), + .source_type = iceberg::timestamp_ns(), + .value_setter = + [](::avro::GenericDatum& datum, int i) { + datum.value<::avro::GenericRecord>().fieldAt(0).value() = + 1672531200000000000LL + i * 1000000000LL + i; + }, + .expected_json = + R"([{"a": 1672531200000000000}, {"a": 1672531201000000001}, {"a": 1672531202000000002}])", + }, + { + .name = "TimestampTzNs", + .projected_type = iceberg::timestamptz_ns(), + .source_type = iceberg::timestamptz_ns(), + .value_setter = + [](::avro::GenericDatum& datum, int i) { + datum.value<::avro::GenericRecord>().fieldAt(0).value() = + 1672531200000000000LL + i * 1000000000LL + i; + }, + .expected_json = + R"([{"a": 1672531200000000000}, {"a": 1672531201000000001}, {"a": 1672531202000000002}])", + }, { .name = "IntToLongPromotion", .projected_type = iceberg::int64(), @@ -948,6 +972,30 @@ const std::vector kExtractDatumTestCases = { 1672531200000000LL + i * 1000000LL); }, }, + { + .name = "TimestampNs", + .iceberg_type = timestamp_ns(), + .arrow_json = + R"([{"a": 1672531200000000000}, {"a": 1672531201000000001}, {"a": 1672531202000000002}])", + .value_verifier = + [](const ::avro::GenericDatum& datum, int i) { + const auto& record = datum.value<::avro::GenericRecord>(); + EXPECT_EQ(record.fieldAt(0).value(), + 1672531200000000000LL + i * 1000000000LL + i); + }, + }, + { + .name = "TimestampTzNs", + .iceberg_type = timestamptz_ns(), + .arrow_json = + R"([{"a": 1672531200000000000}, {"a": 1672531201000000001}, {"a": 1672531202000000002}])", + .value_verifier = + [](const ::avro::GenericDatum& datum, int i) { + const auto& record = datum.value<::avro::GenericRecord>(); + EXPECT_EQ(record.fieldAt(0).value(), + 1672531200000000000LL + i * 1000000000LL + i); + }, + }, }; INSTANTIATE_TEST_SUITE_P(AllPrimitiveTypes, ExtractDatumFromArrayTest, diff --git a/src/iceberg/test/avro_schema_test.cc b/src/iceberg/test/avro_schema_test.cc index 2c1ee8a96..dc2cb0a51 100644 --- a/src/iceberg/test/avro_schema_test.cc +++ b/src/iceberg/test/avro_schema_test.cc @@ -199,6 +199,26 @@ TEST(ToAvroNodeVisitorTest, TimestampTzType) { EXPECT_EQ(node->customAttributesAt(0).getAttribute("adjust-to-utc"), "true"); } +TEST(ToAvroNodeVisitorTest, TimestampNsType) { + ::avro::NodePtr node; + EXPECT_THAT(ToAvroNodeVisitor{}.Visit(TimestampNsType{}, &node), IsOk()); + EXPECT_EQ(node->type(), ::avro::AVRO_LONG); + EXPECT_EQ(node->logicalType().type(), ::avro::LogicalType::TIMESTAMP_NANOS); + + ASSERT_EQ(node->customAttributes(), 1); + EXPECT_EQ(node->customAttributesAt(0).getAttribute("adjust-to-utc"), "false"); +} + +TEST(ToAvroNodeVisitorTest, TimestampTzNsType) { + ::avro::NodePtr node; + EXPECT_THAT(ToAvroNodeVisitor{}.Visit(TimestampTzNsType{}, &node), IsOk()); + EXPECT_EQ(node->type(), ::avro::AVRO_LONG); + EXPECT_EQ(node->logicalType().type(), ::avro::LogicalType::TIMESTAMP_NANOS); + + ASSERT_EQ(node->customAttributes(), 1); + EXPECT_EQ(node->customAttributesAt(0).getAttribute("adjust-to-utc"), "true"); +} + TEST(ToAvroNodeVisitorTest, StringType) { ::avro::NodePtr node; EXPECT_THAT(ToAvroNodeVisitor{}.Visit(StringType{}, &node), IsOk()); @@ -1023,6 +1043,30 @@ TEST(AvroSchemaProjectionTest, ProjectMapType) { ASSERT_EQ(projection.fields[0].children.size(), 2); } +TEST(AvroSchemaProjectionTest, RejectTimestampNsFromMicrosType) { + Schema expected_schema({ + SchemaField::MakeRequired(/*field_id=*/1, "ts", iceberg::timestamp_ns()), + }); + + std::string avro_schema_json = R"({ + "type": "record", + "name": "iceberg_schema", + "fields": [ + {"name": "ts", "type": { + "type": "long", + "logicalType": "timestamp-micros", + "adjust-to-utc": false + }, "field-id": 1} + ] + })"; + auto avro_schema = ::avro::compileJsonSchemaFromString(avro_schema_json); + + auto projection_result = + Project(expected_schema, avro_schema.root(), /*prune_source=*/false); + ASSERT_THAT(projection_result, IsError(ErrorKind::kInvalidSchema)); + ASSERT_THAT(projection_result, HasErrorMessage("Cannot read")); +} + TEST(AvroSchemaProjectionTest, ProjectMapTypeWithNonStringKey) { ::iceberg::avro::RegisterLogicalTypes(); diff --git a/src/iceberg/test/bucket_util_test.cc b/src/iceberg/test/bucket_util_test.cc index 9c0d46f00..61bd2f312 100644 --- a/src/iceberg/test/bucket_util_test.cc +++ b/src/iceberg/test/bucket_util_test.cc @@ -23,6 +23,7 @@ #include +#include "iceberg/expression/literal.h" #include "iceberg/test/temporal_test_helper.h" #include "iceberg/util/decimal.h" #include "iceberg/util/uuid.h" @@ -107,4 +108,59 @@ TEST(BucketUtilsTest, HashHelper) { EXPECT_EQ(BucketUtils::HashBytes(fixed), -188683207); } +TEST(BucketUtilsTest, BucketTimestampNanosMatchesMicros) { + constexpr int32_t kNumBuckets = 1000; + const auto ts_micros = TemporalTestHelper::CreateTimestamp({.year = 2017, + .month = 11, + .day = 16, + .hour = 22, + .minute = 31, + .second = 8, + .microsecond = 1}); + const auto ts_nanos = TemporalTestHelper::CreateTimestampNanos({.year = 2017, + .month = 11, + .day = 16, + .hour = 22, + .minute = 31, + .second = 8, + .nanosecond = 1000}); + + const auto micros_bucket = + BucketUtils::BucketIndex(Literal::Timestamp(ts_micros), kNumBuckets); + const auto nanos_bucket = + BucketUtils::BucketIndex(Literal::TimestampNs(ts_nanos), kNumBuckets); + + ASSERT_TRUE(micros_bucket.has_value()); + ASSERT_TRUE(nanos_bucket.has_value()); + EXPECT_EQ(micros_bucket.value(), nanos_bucket.value()); + + const auto ts_tz_micros = + TemporalTestHelper::CreateTimestampTz({.year = 2017, + .month = 11, + .day = 16, + .hour = 14, + .minute = 31, + .second = 8, + .microsecond = 1, + .tz_offset_minutes = -480}); + const auto ts_tz_nanos = + TemporalTestHelper::CreateTimestampTzNanos({.year = 2017, + .month = 11, + .day = 16, + .hour = 14, + .minute = 31, + .second = 8, + .nanosecond = 1000, + .tz_offset_minutes = -480}); + + const auto tz_micros_bucket = + BucketUtils::BucketIndex(Literal::TimestampTz(ts_tz_micros), kNumBuckets); + const auto tz_nanos_bucket = + BucketUtils::BucketIndex(Literal::TimestampTzNs(ts_tz_nanos), kNumBuckets); + + ASSERT_TRUE(tz_micros_bucket.has_value()); + ASSERT_TRUE(tz_nanos_bucket.has_value()); + EXPECT_EQ(tz_micros_bucket.value(), tz_nanos_bucket.value()); +} + } // namespace iceberg diff --git a/src/iceberg/test/literal_test.cc b/src/iceberg/test/literal_test.cc index 97724aad9..ac4edb413 100644 --- a/src/iceberg/test/literal_test.cc +++ b/src/iceberg/test/literal_test.cc @@ -580,7 +580,15 @@ INSTANTIATE_TEST_SUITE_P( BasicLiteralTestParam{.test_name = "TimestampTz", .literal = Literal::TimestampTz(1684137600000000LL), .expected_type_id = TypeId::kTimestampTz, - .expected_string = "1684137600000000"}), + .expected_string = "1684137600000000"}, + BasicLiteralTestParam{.test_name = "TimestampNs", + .literal = Literal::TimestampNs(1684137600000000001LL), + .expected_type_id = TypeId::kTimestampNs, + .expected_string = "1684137600000000001"}, + BasicLiteralTestParam{.test_name = "TimestampTzNs", + .literal = Literal::TimestampTzNs(1684137600000000001LL), + .expected_type_id = TypeId::kTimestampTzNs, + .expected_string = "1684137600000000001"}), [](const ::testing::TestParamInfo& info) { return info.param.test_name; }); diff --git a/src/iceberg/test/schema_json_test.cc b/src/iceberg/test/schema_json_test.cc index 87388cbb4..c9532eeb6 100644 --- a/src/iceberg/test/schema_json_test.cc +++ b/src/iceberg/test/schema_json_test.cc @@ -71,6 +71,8 @@ INSTANTIATE_TEST_SUITE_P( SchemaJsonParam{.json = "\"timestamp\"", .type = iceberg::timestamp()}, SchemaJsonParam{.json = "\"timestamptz\"", .type = std::make_shared()}, + SchemaJsonParam{.json = "\"timestamp_ns\"", .type = iceberg::timestamp_ns()}, + SchemaJsonParam{.json = "\"timestamptz_ns\"", .type = iceberg::timestamptz_ns()}, SchemaJsonParam{ .json = R"({"element":"string","element-id":3,"element-required":true,"type":"list"})", diff --git a/src/iceberg/test/transform_util_test.cc b/src/iceberg/test/transform_util_test.cc index 54f36cd07..99a5d2d4a 100644 --- a/src/iceberg/test/transform_util_test.cc +++ b/src/iceberg/test/transform_util_test.cc @@ -111,6 +111,8 @@ TEST(TransformUtilTest, HumanTimestamp) { // precision with 1 microsecond EXPECT_EQ("2026-01-01T00:00:01.000001", TransformUtil::HumanTimestamp(1767225601000001L)); + // pre-epoch timestamp with fractional microseconds + EXPECT_EQ("1969-12-31T23:59:59.123456", TransformUtil::HumanTimestamp(-876544)); } TEST(TransformUtilTest, HumanTimestampWithZone) { @@ -132,6 +134,56 @@ TEST(TransformUtilTest, HumanTimestampWithZone) { // precision with 1 microsecond EXPECT_EQ("2026-01-01T00:00:01.000001+00:00", TransformUtil::HumanTimestampWithZone(1767225601000001L)); + // pre-epoch timestamp with fractional microseconds + EXPECT_EQ("1969-12-31T23:59:59.123456+00:00", + TransformUtil::HumanTimestampWithZone(-876544)); +} + +TEST(TransformUtilTest, HumanTimestampNs) { + EXPECT_EQ("1970-01-01T00:00:00.000000001", TransformUtil::HumanTimestampNs(1)); + EXPECT_EQ("2026-01-01T00:00:01.000001001", + TransformUtil::HumanTimestampNs(1767225601000001001L)); + EXPECT_EQ("1969-12-31T23:59:59.123456789", TransformUtil::HumanTimestampNs(-876543211)); +} + +TEST(TransformUtilTest, HumanTimestampNsWithZone) { + EXPECT_EQ("1970-01-01T00:00:00.000000001+00:00", + TransformUtil::HumanTimestampNsWithZone(1)); + EXPECT_EQ("2026-01-01T00:00:01.000001001+00:00", + TransformUtil::HumanTimestampNsWithZone(1767225601000001001L)); + EXPECT_EQ("1969-12-31T23:59:59.123456789+00:00", + TransformUtil::HumanTimestampNsWithZone(-876543211)); +} + +TEST(TransformUtilTest, ParseTimestampNs) { + ICEBERG_UNWRAP_OR_FAIL( + auto nanos, TransformUtil::ParseTimestampNs("2026-01-01T00:00:01.000001001")); + EXPECT_EQ(nanos, 1767225601000001001L); + ICEBERG_UNWRAP_OR_FAIL(auto pre_epoch_nanos, TransformUtil::ParseTimestampNs( + "1969-12-31T23:59:59.123456789")); + EXPECT_EQ(pre_epoch_nanos, -876543211); + EXPECT_EQ(TransformUtil::HumanTimestampNs(pre_epoch_nanos), + "1969-12-31T23:59:59.123456789"); +} + +TEST(TransformUtilTest, ParseTimestampNsRejectsMoreThanNineFractionalDigits) { + EXPECT_THAT(TransformUtil::ParseTimestampNs("2026-01-01T00:00:01.0000010011"), + IsError(ErrorKind::kInvalidArgument)); +} + +TEST(TransformUtilTest, ParseTimestampNsWithZone) { + ICEBERG_UNWRAP_OR_FAIL(auto nanos, TransformUtil::ParseTimestampNsWithZone( + "2026-01-01T00:00:01.000001001+00:00")); + EXPECT_EQ(nanos, 1767225601000001001L); +} + +TEST(TransformUtilTest, ParseTimestampNsWithZoneRejectsOffsetPastPlusMinus1800) { + EXPECT_THAT( + TransformUtil::ParseTimestampNsWithZone("2026-01-01T00:00:01.000001001+18:01"), + IsError(ErrorKind::kInvalidArgument)); + EXPECT_THAT( + TransformUtil::ParseTimestampNsWithZone("2026-01-01T00:00:01.000001001-18:30"), + IsError(ErrorKind::kInvalidArgument)); } TEST(TransformUtilTest, Base64Encode) { diff --git a/src/iceberg/test/type_test.cc b/src/iceberg/test/type_test.cc index 266ff6103..e68843be4 100644 --- a/src/iceberg/test/type_test.cc +++ b/src/iceberg/test/type_test.cc @@ -90,7 +90,7 @@ TEST_P(TypeTest, StdFormat) { ASSERT_EQ(test_case.repr, std::format("{}", *test_case.type)); } -const static std::array kPrimitiveTypes = {{ +const static std::array kPrimitiveTypes = {{ { .name = "boolean", .type = iceberg::boolean(), @@ -168,6 +168,20 @@ const static std::array kPrimitiveTypes = {{ .primitive = true, .repr = "timestamptz", }, + { + .name = "timestamp_ns", + .type = iceberg::timestamp_ns(), + .type_id = iceberg::TypeId::kTimestampNs, + .primitive = true, + .repr = "timestamp_ns", + }, + { + .name = "timestamptz_ns", + .type = iceberg::timestamptz_ns(), + .type_id = iceberg::TypeId::kTimestampTzNs, + .primitive = true, + .repr = "timestamptz_ns", + }, { .name = "binary", .type = iceberg::binary(), diff --git a/src/iceberg/test/visit_type_test.cc b/src/iceberg/test/visit_type_test.cc index 786e1fd2c..7104581f5 100644 --- a/src/iceberg/test/visit_type_test.cc +++ b/src/iceberg/test/visit_type_test.cc @@ -53,7 +53,7 @@ std::string TypeTestCaseToString(const ::testing::TestParamInfo& i return info.param.name; } -const static std::array kPrimitiveTypes = {{ +const static std::array kPrimitiveTypes = {{ { .name = "boolean", .type = iceberg::boolean(), @@ -131,6 +131,20 @@ const static std::array kPrimitiveTypes = {{ .primitive = true, .repr = "timestamptz", }, + { + .name = "timestamp_ns", + .type = iceberg::timestamp_ns(), + .type_id = iceberg::TypeId::kTimestampNs, + .primitive = true, + .repr = "timestamp_ns", + }, + { + .name = "timestamptz_ns", + .type = iceberg::timestamptz_ns(), + .type_id = iceberg::TypeId::kTimestampTzNs, + .primitive = true, + .repr = "timestamptz_ns", + }, { .name = "binary", .type = iceberg::binary(), diff --git a/src/iceberg/transform.cc b/src/iceberg/transform.cc index c210f9ed2..8a7d4b3e1 100644 --- a/src/iceberg/transform.cc +++ b/src/iceberg/transform.cc @@ -152,6 +152,8 @@ bool Transform::CanTransform(const Type& source_type) const { case TypeId::kTime: case TypeId::kTimestamp: case TypeId::kTimestampTz: + case TypeId::kTimestampNs: + case TypeId::kTimestampTzNs: case TypeId::kString: case TypeId::kUuid: case TypeId::kFixed: @@ -177,6 +179,8 @@ bool Transform::CanTransform(const Type& source_type) const { case TypeId::kDate: case TypeId::kTimestamp: case TypeId::kTimestampTz: + case TypeId::kTimestampNs: + case TypeId::kTimestampTzNs: return true; default: return false; @@ -186,6 +190,8 @@ bool Transform::CanTransform(const Type& source_type) const { case TypeId::kDate: case TypeId::kTimestamp: case TypeId::kTimestampTz: + case TypeId::kTimestampNs: + case TypeId::kTimestampTzNs: return true; default: return false; @@ -194,6 +200,8 @@ bool Transform::CanTransform(const Type& source_type) const { switch (source_type.type_id()) { case TypeId::kTimestamp: case TypeId::kTimestampTz: + case TypeId::kTimestampNs: + case TypeId::kTimestampTzNs: return true; default: return false; @@ -420,6 +428,11 @@ Result Transform::ToHumanString(const Literal& value) { return TransformUtil::HumanTimestamp(std::get(value.value())); case TypeId::kTimestampTz: return TransformUtil::HumanTimestampWithZone(std::get(value.value())); + case TypeId::kTimestampNs: + return TransformUtil::HumanTimestampNs(std::get(value.value())); + case TypeId::kTimestampTzNs: + return TransformUtil::HumanTimestampNsWithZone( + std::get(value.value())); case TypeId::kFixed: case TypeId::kBinary: { const auto& binary_data = std::get>(value.value()); diff --git a/src/iceberg/transform_function.cc b/src/iceberg/transform_function.cc index 9213d2ce3..d7c18555d 100644 --- a/src/iceberg/transform_function.cc +++ b/src/iceberg/transform_function.cc @@ -79,6 +79,8 @@ Result> BucketTransform::Make( case TypeId::kTime: case TypeId::kTimestamp: case TypeId::kTimestampTz: + case TypeId::kTimestampNs: + case TypeId::kTimestampTzNs: case TypeId::kString: case TypeId::kUuid: case TypeId::kFixed: @@ -148,6 +150,8 @@ Result> YearTransform::Make( case TypeId::kDate: case TypeId::kTimestamp: case TypeId::kTimestampTz: + case TypeId::kTimestampNs: + case TypeId::kTimestampTzNs: break; default: return NotSupported("{} is not a valid input type for year transform", @@ -176,6 +180,8 @@ Result> MonthTransform::Make( case TypeId::kDate: case TypeId::kTimestamp: case TypeId::kTimestampTz: + case TypeId::kTimestampNs: + case TypeId::kTimestampTzNs: break; default: return NotSupported("{} is not a valid input type for month transform", @@ -204,6 +210,8 @@ Result> DayTransform::Make( case TypeId::kDate: case TypeId::kTimestamp: case TypeId::kTimestampTz: + case TypeId::kTimestampNs: + case TypeId::kTimestampTzNs: break; default: return NotSupported("{} is not a valid input type for day transform", @@ -231,6 +239,8 @@ Result> HourTransform::Make( switch (source_type->type_id()) { case TypeId::kTimestamp: case TypeId::kTimestampTz: + case TypeId::kTimestampNs: + case TypeId::kTimestampTzNs: break; default: return NotSupported("{} is not a valid input type for hour transform", diff --git a/src/iceberg/type.cc b/src/iceberg/type.cc index f008ad908..b5bee37e2 100644 --- a/src/iceberg/type.cc +++ b/src/iceberg/type.cc @@ -326,6 +326,22 @@ bool TimestampTzType::Equals(const Type& other) const { return other.type_id() == kTypeId; } +bool TimestampNsType::is_zoned() const { return false; } +TimeUnit TimestampNsType::time_unit() const { return TimeUnit::kNanosecond; } +TypeId TimestampNsType::type_id() const { return kTypeId; } +std::string TimestampNsType::ToString() const { return "timestamp_ns"; } +bool TimestampNsType::Equals(const Type& other) const { + return other.type_id() == kTypeId; +} + +bool TimestampTzNsType::is_zoned() const { return true; } +TimeUnit TimestampTzNsType::time_unit() const { return TimeUnit::kNanosecond; } +TypeId TimestampTzNsType::type_id() const { return kTypeId; } +std::string TimestampTzNsType::ToString() const { return "timestamptz_ns"; } +bool TimestampTzNsType::Equals(const Type& other) const { + return other.type_id() == kTypeId; +} + TypeId StringType::type_id() const { return kTypeId; } std::string StringType::ToString() const { return "string"; } bool StringType::Equals(const Type& other) const { return other.type_id() == kTypeId; } @@ -371,6 +387,8 @@ TYPE_FACTORY(date, DateType) TYPE_FACTORY(time, TimeType) TYPE_FACTORY(timestamp, TimestampType) TYPE_FACTORY(timestamp_tz, TimestampTzType) +TYPE_FACTORY(timestamp_ns, TimestampNsType) +TYPE_FACTORY(timestamptz_ns, TimestampTzNsType) TYPE_FACTORY(binary, BinaryType) TYPE_FACTORY(string, StringType) TYPE_FACTORY(uuid, UuidType) @@ -425,6 +443,10 @@ std::string_view ToString(TypeId id) { return "timestamp"; case TypeId::kTimestampTz: return "timestamptz"; + case TypeId::kTimestampNs: + return "timestamp_ns"; + case TypeId::kTimestampTzNs: + return "timestamptz_ns"; case TypeId::kString: return "string"; case TypeId::kUuid: diff --git a/src/iceberg/type.h b/src/iceberg/type.h index 1c50135dc..53237cdb5 100644 --- a/src/iceberg/type.h +++ b/src/iceberg/type.h @@ -396,6 +396,44 @@ class ICEBERG_EXPORT TimestampTzType : public TimestampBase { bool Equals(const Type& other) const override; }; +/// \brief A data type representing a timestamp in nanoseconds without +/// reference to a timezone. +class ICEBERG_EXPORT TimestampNsType : public TimestampBase { + public: + constexpr static const TypeId kTypeId = TypeId::kTimestampNs; + + TimestampNsType() = default; + ~TimestampNsType() override = default; + + bool is_zoned() const override; + TimeUnit time_unit() const override; + + TypeId type_id() const override; + std::string ToString() const override; + + protected: + bool Equals(const Type& other) const override; +}; + +/// \brief A data type representing a timestamp as nanoseconds since the +/// epoch in UTC. A time zone or offset is not stored. +class ICEBERG_EXPORT TimestampTzNsType : public TimestampBase { + public: + constexpr static const TypeId kTypeId = TypeId::kTimestampTzNs; + + TimestampTzNsType() = default; + ~TimestampTzNsType() override = default; + + bool is_zoned() const override; + TimeUnit time_unit() const override; + + TypeId type_id() const override; + std::string ToString() const override; + + protected: + bool Equals(const Type& other) const override; +}; + /// \brief A data type representing an arbitrary-length byte sequence. class ICEBERG_EXPORT BinaryType : public PrimitiveType { public: @@ -490,6 +528,10 @@ ICEBERG_EXPORT const std::shared_ptr& time(); ICEBERG_EXPORT const std::shared_ptr& timestamp(); /// \brief Return a TimestampTzType instance. ICEBERG_EXPORT const std::shared_ptr& timestamp_tz(); +/// \brief Return a TimestampNsType instance. +ICEBERG_EXPORT const std::shared_ptr& timestamp_ns(); +/// \brief Return a TimestampTzNsType instance. +ICEBERG_EXPORT const std::shared_ptr& timestamptz_ns(); /// \brief Return a BinaryType instance. ICEBERG_EXPORT const std::shared_ptr& binary(); /// \brief Return a StringType instance. diff --git a/src/iceberg/type_fwd.h b/src/iceberg/type_fwd.h index 3fe199d8a..144a9e33a 100644 --- a/src/iceberg/type_fwd.h +++ b/src/iceberg/type_fwd.h @@ -46,6 +46,8 @@ enum class TypeId { kTime, kTimestamp, kTimestampTz, + kTimestampNs, + kTimestampTzNs, kString, kUuid, kFixed, @@ -55,6 +57,7 @@ enum class TypeId { /// \brief The time unit. In Iceberg V3 nanoseconds are also supported. enum class TimeUnit { kMicrosecond, + kNanosecond, }; /// \brief Data type family. @@ -77,6 +80,8 @@ class TimeType; class TimestampBase; class TimestampType; class TimestampTzType; +class TimestampNsType; +class TimestampTzNsType; class Type; class UuidType; diff --git a/src/iceberg/util/bucket_util.cc b/src/iceberg/util/bucket_util.cc index 88b240de7..a15091a14 100644 --- a/src/iceberg/util/bucket_util.cc +++ b/src/iceberg/util/bucket_util.cc @@ -63,6 +63,16 @@ int32_t HashLiteral(const Literal& literal) { return BucketUtils::HashLong(std::get(literal.value())); } +template <> +int32_t HashLiteral(const Literal& literal) { + return BucketUtils::HashLong(std::get(literal.value()) / 1000); +} + +template <> +int32_t HashLiteral(const Literal& literal) { + return BucketUtils::HashLong(std::get(literal.value()) / 1000); +} + template <> int32_t HashLiteral(const Literal& literal) { const auto& decimal = std::get(literal.value()); @@ -131,6 +141,8 @@ Result BucketUtils::BucketIndex(const Literal& literal, int32_t num_buc DISPATCH_HASH_LITERAL(TypeId::kTime) DISPATCH_HASH_LITERAL(TypeId::kTimestamp) DISPATCH_HASH_LITERAL(TypeId::kTimestampTz) + DISPATCH_HASH_LITERAL(TypeId::kTimestampNs) + DISPATCH_HASH_LITERAL(TypeId::kTimestampTzNs) DISPATCH_HASH_LITERAL(TypeId::kDecimal) DISPATCH_HASH_LITERAL(TypeId::kString) DISPATCH_HASH_LITERAL(TypeId::kUuid) diff --git a/src/iceberg/util/conversions.cc b/src/iceberg/util/conversions.cc index 0cc7c55d8..5c5356103 100644 --- a/src/iceberg/util/conversions.cc +++ b/src/iceberg/util/conversions.cc @@ -109,6 +109,8 @@ Result> Conversions::ToBytes(const PrimitiveType& type, DISPATCH_LITERAL_TO_BYTES(TypeId::kTime) DISPATCH_LITERAL_TO_BYTES(TypeId::kTimestamp) DISPATCH_LITERAL_TO_BYTES(TypeId::kTimestampTz) + DISPATCH_LITERAL_TO_BYTES(TypeId::kTimestampNs) + DISPATCH_LITERAL_TO_BYTES(TypeId::kTimestampTzNs) DISPATCH_LITERAL_TO_BYTES(TypeId::kFloat) DISPATCH_LITERAL_TO_BYTES(TypeId::kDouble) DISPATCH_LITERAL_TO_BYTES(TypeId::kDecimal) @@ -158,7 +160,9 @@ Result Conversions::FromBytes(const PrimitiveType& type, case TypeId::kLong: case TypeId::kTime: case TypeId::kTimestamp: - case TypeId::kTimestampTz: { + case TypeId::kTimestampTz: + case TypeId::kTimestampNs: + case TypeId::kTimestampTzNs: { int64_t value; if (data.size() < 8) { // Type was promoted from int to long diff --git a/src/iceberg/util/projection_util_internal.h b/src/iceberg/util/projection_util_internal.h index df4fe9789..e9cda56ca 100644 --- a/src/iceberg/util/projection_util_internal.h +++ b/src/iceberg/util/projection_util_internal.h @@ -54,6 +54,10 @@ class ProjectionUtil { return Literal::Timestamp(std::get(literal.value()) + adjustment); case TypeId::kTimestampTz: return Literal::TimestampTz(std::get(literal.value()) + adjustment); + case TypeId::kTimestampNs: + return Literal::TimestampNs(std::get(literal.value()) + adjustment); + case TypeId::kTimestampTzNs: + return Literal::TimestampTzNs(std::get(literal.value()) + adjustment); case TypeId::kDecimal: { const auto& decimal_type = internal::checked_cast(*literal.type()); @@ -143,6 +147,8 @@ class ProjectionUtil { case TypeId::kDate: case TypeId::kTimestamp: case TypeId::kTimestampTz: + case TypeId::kTimestampNs: + case TypeId::kTimestampTzNs: break; default: return NotSupported("{} is not a valid input type for numeric transform", @@ -179,6 +185,8 @@ class ProjectionUtil { case TypeId::kDate: case TypeId::kTimestamp: case TypeId::kTimestampTz: + case TypeId::kTimestampNs: + case TypeId::kTimestampTzNs: break; default: return NotSupported("{} is not a valid input type for numeric transform", diff --git a/src/iceberg/util/struct_like_set.cc b/src/iceberg/util/struct_like_set.cc index cc3de5293..433cfa681 100644 --- a/src/iceberg/util/struct_like_set.cc +++ b/src/iceberg/util/struct_like_set.cc @@ -276,6 +276,8 @@ Status ValidateScalarAgainstType(const Scalar& scalar, const Type& type) { case TypeId::kTime: case TypeId::kTimestamp: case TypeId::kTimestampTz: + case TypeId::kTimestampNs: + case TypeId::kTimestampTzNs: ICEBERG_PRECHECK(std::holds_alternative(scalar), "Expected {} but got {}", type.ToString(), ScalarTypeName(scalar)); return {}; diff --git a/src/iceberg/util/temporal_util.cc b/src/iceberg/util/temporal_util.cc index 05aafb961..0e8d64154 100644 --- a/src/iceberg/util/temporal_util.cc +++ b/src/iceberg/util/temporal_util.cc @@ -42,6 +42,10 @@ inline constexpr year_month_day TimestampToYmd(int64_t micros_since_epoch) { return {floor(sys_time(microseconds{micros_since_epoch}))}; } +inline constexpr year_month_day TimestampNsToYmd(int64_t nanos_since_epoch) { + return {floor(sys_time(nanoseconds{nanos_since_epoch}))}; +} + template requires std::is_same_v || std::is_same_v inline constexpr int32_t TimestampToDuration(int64_t micros_since_epoch) { @@ -51,6 +55,15 @@ inline constexpr int32_t TimestampToDuration(int64_t micros_since_epoch) { .count()); } +template + requires std::is_same_v || std::is_same_v +inline constexpr int32_t TimestampNsToDuration(int64_t nanos_since_epoch) { + return static_cast( + floor( + sys_time(nanoseconds{nanos_since_epoch}).time_since_epoch()) + .count()); +} + inline constexpr int32_t MonthsSinceEpoch(const year_month_day& ymd) { auto delta = ymd.year() - kEpochYmd.year(); // Calculate the month as months from 1970-01 @@ -78,11 +91,23 @@ Result ExtractYearImpl(const Literal& literal) { return Literal::Int((ymd.year() - kEpochYmd.year()).count()); } +template <> +Result ExtractYearImpl(const Literal& literal) { + auto value = std::get(literal.value()); + auto ymd = TimestampNsToYmd(value); + return Literal::Int((ymd.year() - kEpochYmd.year()).count()); +} + template <> Result ExtractYearImpl(const Literal& literal) { return ExtractYearImpl(literal); } +template <> +Result ExtractYearImpl(const Literal& literal) { + return ExtractYearImpl(literal); +} + template Result ExtractMonthImpl(const Literal& literal) { std::unreachable(); @@ -102,11 +127,23 @@ Result ExtractMonthImpl(const Literal& literal) { return Literal::Int(MonthsSinceEpoch(ymd)); } +template <> +Result ExtractMonthImpl(const Literal& literal) { + auto value = std::get(literal.value()); + auto ymd = TimestampNsToYmd(value); + return Literal::Int(MonthsSinceEpoch(ymd)); +} + template <> Result ExtractMonthImpl(const Literal& literal) { return ExtractMonthImpl(literal); } +template <> +Result ExtractMonthImpl(const Literal& literal) { + return ExtractMonthImpl(literal); +} + template Result ExtractDayImpl(const Literal& literal) { std::unreachable(); @@ -123,11 +160,22 @@ Result ExtractDayImpl(const Literal& literal) { return Literal::Int(TimestampToDuration(value)); } +template <> +Result ExtractDayImpl(const Literal& literal) { + auto value = std::get(literal.value()); + return Literal::Int(TimestampNsToDuration(value)); +} + template <> Result ExtractDayImpl(const Literal& literal) { return ExtractDayImpl(literal); } +template <> +Result ExtractDayImpl(const Literal& literal) { + return ExtractDayImpl(literal); +} + template Result ExtractHourImpl(const Literal& literal) { std::unreachable(); @@ -139,11 +187,22 @@ Result ExtractHourImpl(const Literal& literal) { return Literal::Int(TimestampToDuration(value)); } +template <> +Result ExtractHourImpl(const Literal& literal) { + auto value = std::get(literal.value()); + return Literal::Int(TimestampNsToDuration(value)); +} + template <> Result ExtractHourImpl(const Literal& literal) { return ExtractHourImpl(literal); } +template <> +Result ExtractHourImpl(const Literal& literal) { + return ExtractHourImpl(literal); +} + } // namespace #define DISPATCH_EXTRACT_YEAR(type_id) \ @@ -163,6 +222,8 @@ Result TemporalUtils::ExtractYear(const Literal& literal) { DISPATCH_EXTRACT_YEAR(TypeId::kDate) DISPATCH_EXTRACT_YEAR(TypeId::kTimestamp) DISPATCH_EXTRACT_YEAR(TypeId::kTimestampTz) + DISPATCH_EXTRACT_YEAR(TypeId::kTimestampNs) + DISPATCH_EXTRACT_YEAR(TypeId::kTimestampTzNs) default: return NotSupported("Extract year from type {} is not supported", literal.type()->ToString()); @@ -186,6 +247,8 @@ Result TemporalUtils::ExtractMonth(const Literal& literal) { DISPATCH_EXTRACT_MONTH(TypeId::kDate) DISPATCH_EXTRACT_MONTH(TypeId::kTimestamp) DISPATCH_EXTRACT_MONTH(TypeId::kTimestampTz) + DISPATCH_EXTRACT_MONTH(TypeId::kTimestampNs) + DISPATCH_EXTRACT_MONTH(TypeId::kTimestampTzNs) default: return NotSupported("Extract month from type {} is not supported", literal.type()->ToString()); @@ -209,6 +272,8 @@ Result TemporalUtils::ExtractDay(const Literal& literal) { DISPATCH_EXTRACT_DAY(TypeId::kDate) DISPATCH_EXTRACT_DAY(TypeId::kTimestamp) DISPATCH_EXTRACT_DAY(TypeId::kTimestampTz) + DISPATCH_EXTRACT_DAY(TypeId::kTimestampNs) + DISPATCH_EXTRACT_DAY(TypeId::kTimestampTzNs) default: return NotSupported("Extract day from type {} is not supported", literal.type()->ToString()); @@ -231,6 +296,8 @@ Result TemporalUtils::ExtractHour(const Literal& literal) { switch (literal.type()->type_id()) { DISPATCH_EXTRACT_HOUR(TypeId::kTimestamp) DISPATCH_EXTRACT_HOUR(TypeId::kTimestampTz) + DISPATCH_EXTRACT_HOUR(TypeId::kTimestampNs) + DISPATCH_EXTRACT_HOUR(TypeId::kTimestampTzNs) default: return NotSupported("Extract hour from type {} is not supported", literal.type()->ToString()); diff --git a/src/iceberg/util/transform_util.cc b/src/iceberg/util/transform_util.cc index a9221310e..ecbab5598 100644 --- a/src/iceberg/util/transform_util.cc +++ b/src/iceberg/util/transform_util.cc @@ -32,6 +32,9 @@ constexpr auto kEpochDate = std::chrono::year{1970} / std::chrono::January / 1; constexpr int64_t kMicrosPerMillis = 1'000; constexpr int64_t kMicrosPerSecond = 1'000'000; constexpr int64_t kMicrosPerDay = 86'400'000'000LL; +constexpr int64_t kNanosPerMillis = 1'000'000; +constexpr int64_t kNanosPerSecond = 1'000'000'000; +constexpr int64_t kNanosPerDay = 86'400'000'000'000LL; /// Parse a timezone offset of the form "+HH:mm" or "-HH:mm" and return the /// offset in microseconds (positive for east of UTC, negative for west). @@ -44,28 +47,107 @@ Result ParseTimezoneOffset(std::string_view offset) { StringUtils::ParseNumber(offset.substr(1, 2))); ICEBERG_ASSIGN_OR_RAISE(auto minutes, StringUtils::ParseNumber(offset.substr(4, 2))); - if (hours > 18 || minutes > 59) { + if (hours > 18 || minutes > 59) [[unlikely]] { return InvalidArgument("Invalid timezone offset: '{}'", offset); } + + if (hours == 18 && minutes != 0) [[unlikely]] { + return InvalidArgument("Timezone offset '{}' not in range [-18:00, +18:00]", offset); + } + auto micros = hours * 3'600 * kMicrosPerSecond + minutes * 60 * kMicrosPerSecond; return negative ? -micros : micros; } +Result> ParseTimestampWithZoneSuffix( + std::string_view str) { + if (str.empty()) [[unlikely]] { + return InvalidArgument("Invalid timestamptz string: '{}'", str); + } + + int64_t offset_micros = 0; + std::string_view timestamp_part; + + if (str.back() == 'Z') { + timestamp_part = str.substr(0, str.size() - 1); + } else if (str.size() >= 6 && + (str[str.size() - 6] == '+' || str[str.size() - 6] == '-')) { + // Parse "+HH:mm" or "-HH:mm" offset suffix + ICEBERG_ASSIGN_OR_RAISE(offset_micros, + ParseTimezoneOffset(str.substr(str.size() - 6))); + timestamp_part = str.substr(0, str.size() - 6); + } else { + return InvalidArgument("Invalid timestamptz string (missing timezone suffix): '{}'", + str); + } + + return std::make_pair(timestamp_part, offset_micros); +} + /// Parse fractional seconds (after '.') and return micros. /// Digits beyond 6 are truncated (nanosecond precision). Result ParseFractionalMicros(std::string_view frac) { - if (frac.empty()) { + if (frac.empty() || frac.size() > 9) [[unlikely]] { return InvalidArgument("Invalid fractional seconds: '{}'", frac); } // Truncate to microsecond precision (6 digits), matching Java ISO_LOCAL_TIME behavior if (frac.size() > 6) frac = frac.substr(0, 6); ICEBERG_ASSIGN_OR_RAISE(auto val, StringUtils::ParseNumber(frac)); - // Right-pad to 6 digits: "500" → 500000, "001" → 1000, "000001" → 1 + // Right-pad to 6 digits: "500" -> 500000, "001" -> 1000, "000001" -> 1000 for (size_t i = frac.size(); i < 6; ++i) { val *= 10; } return static_cast(val); } + +/// Parse fractional seconds (after '.') and return nanos. +Result ParseFractionalNanos(std::string_view frac) { + if (frac.empty() || frac.size() > 9) [[unlikely]] { + return InvalidArgument("Invalid fractional seconds: '{}'", frac); + } + ICEBERG_ASSIGN_OR_RAISE(auto val, StringUtils::ParseNumber(frac)); + // Right-pad to 9 digits: "500" -> 500000000, "001" -> 1000000, "000001" -> 1000 + for (size_t i = frac.size(); i < 9; ++i) { + val *= 10; + } + return static_cast(val); +} + +template +Result ParseTimeWithFraction(std::string_view str, int64_t units_per_second, + TimeScaleParser&& parse_fraction) { + if (str.size() < 5 || str[2] != ':') [[unlikely]] { + return InvalidArgument("Invalid time string: '{}'", str); + } + + ICEBERG_ASSIGN_OR_RAISE(auto hours, + StringUtils::ParseNumber(str.substr(0, 2))); + ICEBERG_ASSIGN_OR_RAISE(auto minutes, + StringUtils::ParseNumber(str.substr(3, 2))); + int64_t seconds = 0; + + int64_t frac_units = 0; + if (str.size() > 5) { + if (str[5] != ':' || str.size() < 8) [[unlikely]] { + return InvalidArgument("Invalid time string: '{}'", str); + } + ICEBERG_ASSIGN_OR_RAISE(seconds, StringUtils::ParseNumber(str.substr(6, 2))); + if (str.size() > 8) { + if (str[8] != '.') [[unlikely]] { + return InvalidArgument("Invalid time string: '{}'", str); + } + ICEBERG_ASSIGN_OR_RAISE(frac_units, parse_fraction(str.substr(9))); + } + } + + if (hours < 0 || hours > 23 || minutes < 0 || minutes > 59 || seconds < 0 || + seconds > 59) [[unlikely]] { + return InvalidArgument("Invalid time string: '{}'", str); + } + + return hours * 3'600 * units_per_second + minutes * 60 * units_per_second + + seconds * units_per_second + frac_units; +} } // namespace std::string TransformUtil::HumanYear(int32_t year_ordinal) { @@ -79,7 +161,7 @@ std::string TransformUtil::HumanMonth(int32_t month_ordinal) { } std::string TransformUtil::HumanDay(int32_t day_ordinal) { - auto ymd = std::chrono::sys_days(kEpochDate) + std::chrono::days{day_ordinal}; + auto ymd = std::chrono::sys_days{kEpochDate} + std::chrono::days{day_ordinal}; return std::format("{:%F}", ymd); } @@ -105,9 +187,14 @@ std::string TransformUtil::HumanTime(int64_t micros_from_midnight) { } std::string TransformUtil::HumanTimestamp(int64_t timestamp_micros) { + const auto micros_since_epoch = std::chrono::microseconds{timestamp_micros}; + const auto seconds_since_epoch = + std::chrono::floor(micros_since_epoch); auto tp = std::chrono::time_point{ - std::chrono::seconds(timestamp_micros / kMicrosPerSecond)}; - auto micros = timestamp_micros % kMicrosPerSecond; + seconds_since_epoch}; + auto micros = std::chrono::duration_cast(micros_since_epoch - + seconds_since_epoch) + .count(); if (micros == 0) { return std::format("{:%FT%T}", tp); } else if (micros % kMicrosPerMillis == 0) { @@ -117,10 +204,35 @@ std::string TransformUtil::HumanTimestamp(int64_t timestamp_micros) { } } +std::string TransformUtil::HumanTimestampNs(int64_t timestamp_nanos) { + const auto nanos_since_epoch = std::chrono::nanoseconds{timestamp_nanos}; + const auto seconds_since_epoch = + std::chrono::floor(nanos_since_epoch); + auto tp = std::chrono::time_point{ + seconds_since_epoch}; + auto nanos = std::chrono::duration_cast(nanos_since_epoch - + seconds_since_epoch) + .count(); + if (nanos == 0) { + return std::format("{:%FT%T}", tp); + } else if (nanos % kNanosPerMillis == 0) { + return std::format("{:%FT%T}.{:03d}", tp, nanos / kNanosPerMillis); + } else if (nanos % kMicrosPerMillis == 0) { + return std::format("{:%FT%T}.{:06d}", tp, nanos / kMicrosPerMillis); + } else { + return std::format("{:%FT%T}.{:09d}", tp, nanos); + } +} + std::string TransformUtil::HumanTimestampWithZone(int64_t timestamp_micros) { + const auto micros_since_epoch = std::chrono::microseconds{timestamp_micros}; + const auto seconds_since_epoch = + std::chrono::floor(micros_since_epoch); auto tp = std::chrono::time_point{ - std::chrono::seconds(timestamp_micros / kMicrosPerSecond)}; - auto micros = timestamp_micros % kMicrosPerSecond; + seconds_since_epoch}; + auto micros = std::chrono::duration_cast(micros_since_epoch - + seconds_since_epoch) + .count(); if (micros == 0) { return std::format("{:%FT%T}+00:00", tp); } else if (micros % kMicrosPerMillis == 0) { @@ -130,6 +242,26 @@ std::string TransformUtil::HumanTimestampWithZone(int64_t timestamp_micros) { } } +std::string TransformUtil::HumanTimestampNsWithZone(int64_t timestamp_nanos) { + const auto nanos_since_epoch = std::chrono::nanoseconds{timestamp_nanos}; + const auto seconds_since_epoch = + std::chrono::floor(nanos_since_epoch); + auto tp = std::chrono::time_point{ + seconds_since_epoch}; + auto nanos = std::chrono::duration_cast(nanos_since_epoch - + seconds_since_epoch) + .count(); + if (nanos == 0) { + return std::format("{:%FT%T}+00:00", tp); + } else if (nanos % kNanosPerMillis == 0) { + return std::format("{:%FT%T}.{:03d}+00:00", tp, nanos / kNanosPerMillis); + } else if (nanos % kMicrosPerMillis == 0) { + return std::format("{:%FT%T}.{:06d}+00:00", tp, nanos / kMicrosPerMillis); + } else { + return std::format("{:%FT%T}.{:09d}+00:00", tp, nanos); + } +} + Result TransformUtil::ParseDay(std::string_view str) { // Expected format: "[+-]yyyy-MM-dd" // Parse year, month, day manually, skipping leading '+' or '-' to find first date dash @@ -156,47 +288,19 @@ Result TransformUtil::ParseDay(std::string_view str) { return InvalidArgument("Invalid date: '{}'", str); } - auto days = std::chrono::sys_days(ymd) - std::chrono::sys_days(kEpochDate); + auto days = std::chrono::sys_days{ymd} - std::chrono::sys_days{kEpochDate}; return static_cast(days.count()); } Result TransformUtil::ParseTime(std::string_view str) { - if (str.size() < 5 || str[2] != ':') [[unlikely]] { - return InvalidArgument("Invalid time string: '{}'", str); - } - - ICEBERG_ASSIGN_OR_RAISE(auto hours, - StringUtils::ParseNumber(str.substr(0, 2))); - ICEBERG_ASSIGN_OR_RAISE(auto minutes, - StringUtils::ParseNumber(str.substr(3, 2))); - int64_t seconds = 0; - - int64_t frac_micros = 0; - if (str.size() > 5) { - if (str[5] != ':' || str.size() < 8) [[unlikely]] { - return InvalidArgument("Invalid time string: '{}'", str); - } - ICEBERG_ASSIGN_OR_RAISE(seconds, StringUtils::ParseNumber(str.substr(6, 2))); - if (str.size() > 8) { - if (str[8] != '.') [[unlikely]] { - return InvalidArgument("Invalid time string: '{}'", str); - } - ICEBERG_ASSIGN_OR_RAISE(frac_micros, ParseFractionalMicros(str.substr(9))); - } - } - - // check that hours, minutes, seconds are in valid ranges - if (hours < 0 || hours > 23 || minutes < 0 || minutes > 59 || seconds < 0 || - seconds > 59) [[unlikely]] { - return InvalidArgument("Invalid time string: '{}'", str); - } + return ParseTimeWithFraction(str, kMicrosPerSecond, ParseFractionalMicros); +} - return hours * 3'600 * kMicrosPerSecond + minutes * 60 * kMicrosPerSecond + - seconds * kMicrosPerSecond + frac_micros; +Result TransformUtil::ParseTimeNs(std::string_view str) { + return ParseTimeWithFraction(str, kNanosPerSecond, ParseFractionalNanos); } Result TransformUtil::ParseTimestamp(std::string_view str) { - // Format: "yyyy-MM-ddTHH:mm:ss[.SSS[SSS]]" auto t_pos = str.find('T'); if (t_pos == std::string_view::npos) [[unlikely]] { return InvalidArgument("Invalid timestamp string (missing 'T'): '{}'", str); @@ -208,32 +312,32 @@ Result TransformUtil::ParseTimestamp(std::string_view str) { return static_cast(days) * kMicrosPerDay + time_micros; } -Result TransformUtil::ParseTimestampWithZone(std::string_view str) { - if (str.empty()) [[unlikely]] { - return InvalidArgument("Invalid timestamptz string: '{}'", str); +Result TransformUtil::ParseTimestampNs(std::string_view str) { + auto t_pos = str.find('T'); + if (t_pos == std::string_view::npos) [[unlikely]] { + return InvalidArgument("Invalid timestamp string (missing 'T'): '{}'", str); } - int64_t offset_micros = 0; - std::string_view timestamp_part; + ICEBERG_ASSIGN_OR_RAISE(auto days, ParseDay(str.substr(0, t_pos))); + ICEBERG_ASSIGN_OR_RAISE(auto time_nanos, ParseTimeNs(str.substr(t_pos + 1))); - if (str.back() == 'Z') { - // "Z" suffix means UTC (offset = 0) - timestamp_part = str.substr(0, str.size() - 1); - } else if (str.size() >= 6 && - (str[str.size() - 6] == '+' || str[str.size() - 6] == '-')) { - // Parse "+HH:mm" or "-HH:mm" offset suffix - ICEBERG_ASSIGN_OR_RAISE(offset_micros, - ParseTimezoneOffset(str.substr(str.size() - 6))); - timestamp_part = str.substr(0, str.size() - 6); - } else { - return InvalidArgument("Invalid timestamptz string (missing timezone suffix): '{}'", - str); - } + return static_cast(days) * kNanosPerDay + time_nanos; +} +Result TransformUtil::ParseTimestampWithZone(std::string_view str) { + ICEBERG_ASSIGN_OR_RAISE(auto timestamp_with_offset, ParseTimestampWithZoneSuffix(str)); + const auto [timestamp_part, offset_micros] = timestamp_with_offset; ICEBERG_ASSIGN_OR_RAISE(auto local_micros, ParseTimestamp(timestamp_part)); return local_micros - offset_micros; } +Result TransformUtil::ParseTimestampNsWithZone(std::string_view str) { + ICEBERG_ASSIGN_OR_RAISE(auto timestamp_with_offset, ParseTimestampWithZoneSuffix(str)); + const auto [timestamp_part, offset_micros] = timestamp_with_offset; + ICEBERG_ASSIGN_OR_RAISE(auto local_nanos, ParseTimestampNs(timestamp_part)); + return local_nanos - offset_micros * 1000; +} + std::string TransformUtil::Base64Encode(std::string_view str_to_encode) { static constexpr std::string_view kBase64Chars = "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789+/"; diff --git a/src/iceberg/util/transform_util.h b/src/iceberg/util/transform_util.h index c23d08c8c..b9c692098 100644 --- a/src/iceberg/util/transform_util.h +++ b/src/iceberg/util/transform_util.h @@ -23,6 +23,7 @@ #include "iceberg/iceberg_export.h" #include "iceberg/result.h" +#include "iceberg/type_fwd.h" namespace iceberg { @@ -86,6 +87,19 @@ class ICEBERG_EXPORT TransformUtil { /// \return a string representation of this timestamp. static std::string HumanTimestamp(int64_t timestamp_micros); + /// \brief Returns a string representation of a timestamp in nanoseconds. + /// + /// The output will be one of the following forms, according to the precision of the + /// timestamp: + /// - yyyy-MM-ddTHH:mm:ss + /// - yyyy-MM-ddTHH:mm:ss.SSS + /// - yyyy-MM-ddTHH:mm:ss.SSSSSS + /// - yyyy-MM-ddTHH:mm:ss.SSSSSSSSS + /// + /// \param timestamp_nanos the timestamp in nanoseconds. + /// \return a string representation of this timestamp. + static std::string HumanTimestampNs(int64_t timestamp_nanos); + /// \brief Returns a human-readable string representation of a timestamp with a time /// zone. /// @@ -99,6 +113,20 @@ class ICEBERG_EXPORT TransformUtil { /// \return a string representation of this timestamp. static std::string HumanTimestampWithZone(int64_t timestamp_micros); + /// \brief Returns a string representation of a timestamp in nanoseconds with a time + /// zone. + /// + /// The output will be one of the following forms, according to the precision of the + /// timestamp: + /// - yyyy-MM-ddTHH:mm:ss+00:00 + /// - yyyy-MM-ddTHH:mm:ss.SSS+00:00 + /// - yyyy-MM-ddTHH:mm:ss.SSSSSS+00:00 + /// - yyyy-MM-ddTHH:mm:ss.SSSSSSSSS+00:00 + /// + /// \param timestamp_nanos the timestamp in nanoseconds. + /// \return a string representation of this timestamp. + static std::string HumanTimestampNsWithZone(int64_t timestamp_nanos); + /// \brief Parses a date string in "[+-]yyyy-MM-dd" format into days since epoch. /// /// Supports an optional '+' or '-' prefix for extended years beyond 9999. @@ -117,6 +145,16 @@ class ICEBERG_EXPORT TransformUtil { /// \return The number of microseconds from midnight, or an error. static Result ParseTime(std::string_view str); + /// \brief Parses a time string into nanoseconds from midnight. + /// + /// Accepts ISO-8601 local time formats: "HH:mm", "HH:mm:ss", or + /// "HH:mm:ss.f" where the fractional part can be 1-9 digits. + /// Digits beyond 9 (nanosecond precision) are truncated. + /// + /// \param str The time string to parse. + /// \return The number of nanoseconds from midnight, or an error. + static Result ParseTimeNs(std::string_view str); + /// \brief Parses a timestamp string into microseconds since epoch. /// /// Accepts ISO-8601 local date-time formats: "yyyy-MM-ddTHH:mm", @@ -127,6 +165,16 @@ class ICEBERG_EXPORT TransformUtil { /// \return The number of microseconds since epoch, or an error. static Result ParseTimestamp(std::string_view str); + /// \brief Parses a timestamp string into nanoseconds since epoch. + /// + /// Accepts ISO-8601 local date-time formats: "yyyy-MM-ddTHH:mm", + /// "yyyy-MM-ddTHH:mm:ss", or "yyyy-MM-ddTHH:mm:ss.f" where the + /// fractional part can be 1-9 digits. + /// + /// \param str The timestamp string to parse. + /// \return The number of nanoseconds since epoch, or an error. + static Result ParseTimestampNs(std::string_view str); + /// \brief Parses a timestamp-with-zone string into microseconds since epoch (UTC). /// /// Accepts the same formats as ParseTimestamp, with a timezone suffix: @@ -137,6 +185,16 @@ class ICEBERG_EXPORT TransformUtil { /// \return The number of microseconds since epoch (UTC), or an error. static Result ParseTimestampWithZone(std::string_view str); + /// \brief Parses a timestamp-with-zone string into nanoseconds since epoch (UTC). + /// + /// Accepts the same formats as ParseTimestampNs, with a timezone suffix: + /// "Z", "+HH:mm", or "-HH:mm". Non-UTC offsets are converted to UTC. + /// The seconds and fractional parts are optional (e.g. "yyyy-MM-ddTHH:mm+00:00"). + /// + /// \param str The timestamp string to parse. + /// \return The number of nanoseconds since epoch (UTC), or an error. + static Result ParseTimestampNsWithZone(std::string_view str); + /// \brief Base64 encode a string static std::string Base64Encode(std::string_view str_to_encode); }; diff --git a/src/iceberg/util/visitor_generate.h b/src/iceberg/util/visitor_generate.h index 053371d41..7a3648546 100644 --- a/src/iceberg/util/visitor_generate.h +++ b/src/iceberg/util/visitor_generate.h @@ -32,6 +32,8 @@ namespace iceberg { ACTION(Time); \ ACTION(Timestamp); \ ACTION(TimestampTz); \ + ACTION(TimestampNs); \ + ACTION(TimestampTzNs); \ ACTION(String); \ ACTION(Uuid); \ ACTION(Fixed); \