Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions src/iceberg/CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -108,6 +108,7 @@ set(ICEBERG_SOURCES
util/murmurhash3_internal.cc
util/property_util.cc
util/snapshot_util.cc
util/string_util.cc
util/temporal_util.cc
util/timepoint.cc
util/transform_util.cc
Expand Down
149 changes: 145 additions & 4 deletions src/iceberg/expression/json_serde.cc
Original file line number Diff line number Diff line change
Expand Up @@ -17,6 +17,7 @@
* under the License.
*/

#include <limits>
#include <string>
#include <vector>

Expand Down Expand Up @@ -298,10 +299,150 @@ Result<nlohmann::json> ToJson(const Literal& literal) {
}
}

Result<Literal> LiteralFromJson(const nlohmann::json& json, const Type* /*type*/) {
// TODO(gangwu): implement type-aware literal parsing equivalent to Java's
// SingleValueParser.fromJson(type, node).
return LiteralFromJson(json);
Result<Literal> LiteralFromJson(const nlohmann::json& json, const Type* type) {
// If {"type": "literal", "value": <actual>} wrapper is present, unwrap it first.
if (json.is_object() && json.contains(kType) &&
json[kType].get<std::string>() == kLiteral && json.contains(kValue)) {
return LiteralFromJson(json[kValue], type);
}
// If no type context is provided, fall back to untyped parsing.
if (type == nullptr) return LiteralFromJson(json);

// Type-aware parsing equivalent to Java's SingleValueParser.fromJson(type, node).
switch (type->type_id()) {
case TypeId::kBoolean:
if (!json.is_boolean()) [[unlikely]] {
return JsonParseError("Cannot parse {} as a boolean value", SafeDumpJson(json));
}
return Literal::Boolean(json.get<bool>());

case TypeId::kInt: {
if (!json.is_number_integer()) [[unlikely]] {
return JsonParseError("Cannot parse {} as an int value", SafeDumpJson(json));
}
auto val = json.get<int64_t>();
if (val < std::numeric_limits<int32_t>::min() ||
val > std::numeric_limits<int32_t>::max()) [[unlikely]] {
return JsonParseError("Cannot parse {} as an int value: out of range",
SafeDumpJson(json));
}
return Literal::Int(static_cast<int32_t>(val));
}

case TypeId::kLong:
if (!json.is_number_integer()) [[unlikely]] {
return JsonParseError("Cannot parse {} as a long value", SafeDumpJson(json));
}
return Literal::Long(json.get<int64_t>());

case TypeId::kFloat:
if (!json.is_number_float()) [[unlikely]] {
return JsonParseError("Cannot parse {} as a float value", SafeDumpJson(json));
}
return Literal::Float(json.get<float>());

case TypeId::kDouble:
if (!json.is_number_float()) [[unlikely]] {
return JsonParseError("Cannot parse {} as a double value", SafeDumpJson(json));
}
return Literal::Double(json.get<double>());

case TypeId::kString:
if (!json.is_string()) [[unlikely]] {
return JsonParseError("Cannot parse {} as a string value", SafeDumpJson(json));
}
return Literal::String(json.get<std::string>());

case TypeId::kDate: {
if (!json.is_string()) [[unlikely]] {
return JsonParseError("Cannot parse {} as a date value", SafeDumpJson(json));
}
ICEBERG_ASSIGN_OR_RAISE(auto days,
TransformUtil::ParseDay(json.get<std::string>()));
return Literal::Date(days);
}

case TypeId::kTime: {
if (!json.is_string()) [[unlikely]] {
return JsonParseError("Cannot parse {} as a time value", SafeDumpJson(json));
}
ICEBERG_ASSIGN_OR_RAISE(auto micros,
TransformUtil::ParseTime(json.get<std::string>()));
return Literal::Time(micros);
}

case TypeId::kTimestamp: {
if (!json.is_string()) [[unlikely]] {
return JsonParseError("Cannot parse {} as a timestamp value", SafeDumpJson(json));
}
ICEBERG_ASSIGN_OR_RAISE(auto micros,
TransformUtil::ParseTimestamp(json.get<std::string>()));
return Literal::Timestamp(micros);
}

case TypeId::kTimestampTz: {
if (!json.is_string()) [[unlikely]] {
return JsonParseError("Cannot parse {} as a timestamptz value",
SafeDumpJson(json));
}
ICEBERG_ASSIGN_OR_RAISE(
auto micros, TransformUtil::ParseTimestampWithZone(json.get<std::string>()));
return Literal::TimestampTz(micros);
}

case TypeId::kUuid: {
if (!json.is_string()) [[unlikely]] {
return JsonParseError("Cannot parse {} as a uuid value", SafeDumpJson(json));
}
ICEBERG_ASSIGN_OR_RAISE(auto uuid, Uuid::FromString(json.get<std::string>()));
return Literal::UUID(uuid);
}

case TypeId::kBinary: {
if (!json.is_string()) [[unlikely]] {
return JsonParseError("Cannot parse {} as a binary value", SafeDumpJson(json));
}
ICEBERG_ASSIGN_OR_RAISE(auto bytes,
StringUtils::HexStringToBytes(json.get<std::string>()));
return Literal::Binary(std::move(bytes));
}

case TypeId::kFixed: {
if (!json.is_string()) [[unlikely]] {
return JsonParseError("Cannot parse {} as a fixed value", SafeDumpJson(json));
}
const auto& fixed_type = internal::checked_cast<const FixedType&>(*type);
const std::string& hex = json.get<std::string>();
if (hex.size() != static_cast<size_t>(fixed_type.length()) * 2) [[unlikely]] {
return JsonParseError("Cannot parse fixed[{}]: expected {} hex chars, got {}",
fixed_type.length(), fixed_type.length() * 2, hex.size());
}
ICEBERG_ASSIGN_OR_RAISE(auto bytes, StringUtils::HexStringToBytes(hex));
return Literal::Fixed(std::move(bytes));
}

case TypeId::kDecimal: {
if (!json.is_string()) [[unlikely]] {
return JsonParseError("Cannot parse {} as a decimal value", SafeDumpJson(json));
}
const auto& dec_type = internal::checked_cast<const DecimalType&>(*type);
int32_t parsed_precision = 0;
int32_t parsed_scale = 0;
ICEBERG_ASSIGN_OR_RAISE(
auto dec,
Decimal::FromString(json.get<std::string>(), &parsed_precision, &parsed_scale));
if (parsed_precision > dec_type.precision() || parsed_scale != dec_type.scale())
[[unlikely]] {
return JsonParseError("Cannot parse {} as a {} value", SafeDumpJson(json),
type->ToString());
}
return Literal::Decimal(dec.value(), dec_type.precision(), dec_type.scale());
}

default:
return NotSupported("Unsupported type for literal JSON parsing: {}",
type->ToString());
}
}

Result<Literal> LiteralFromJson(const nlohmann::json& json) {
Expand Down
54 changes: 48 additions & 6 deletions src/iceberg/expression/literal.cc
Original file line number Diff line number Diff line change
Expand Up @@ -23,11 +23,16 @@
#include <concepts>
#include <cstdint>
#include <string>
#include <vector>

#include "iceberg/type.h"
#include "iceberg/util/checked_cast.h"
#include "iceberg/util/conversions.h"
#include "iceberg/util/decimal.h"
#include "iceberg/util/macros.h"
#include "iceberg/util/string_util.h"
#include "iceberg/util/temporal_util.h"
#include "iceberg/util/transform_util.h"

namespace iceberg {

Expand Down Expand Up @@ -193,12 +198,49 @@ Result<Literal> LiteralCaster::CastFromString(
ICEBERG_ASSIGN_OR_RAISE(auto uuid, Uuid::FromString(str_val));
return Literal::UUID(uuid);
}
case TypeId::kDate:
case TypeId::kTime:
case TypeId::kTimestamp:
case TypeId::kTimestampTz:
return NotImplemented("Cast from String to {} is not implemented yet",
target_type->ToString());
case TypeId::kDate: {
ICEBERG_ASSIGN_OR_RAISE(auto days, TransformUtil::ParseDay(str_val));
return Literal::Date(days);
}
case TypeId::kTime: {
ICEBERG_ASSIGN_OR_RAISE(auto micros, TransformUtil::ParseTime(str_val));
return Literal::Time(micros);
}
case TypeId::kTimestamp: {
ICEBERG_ASSIGN_OR_RAISE(auto micros, TransformUtil::ParseTimestamp(str_val));
return Literal::Timestamp(micros);
}
case TypeId::kTimestampTz: {
ICEBERG_ASSIGN_OR_RAISE(auto micros,
TransformUtil::ParseTimestampWithZone(str_val));
return Literal::TimestampTz(micros);
}
case TypeId::kBinary: {
ICEBERG_ASSIGN_OR_RAISE(auto bytes, StringUtils::HexStringToBytes(str_val));
return Literal::Binary(std::move(bytes));
}
case TypeId::kFixed: {
const auto& fixed_type = internal::checked_cast<const FixedType&>(*target_type);
if (str_val.size() != static_cast<size_t>(fixed_type.length()) * 2) {
return InvalidArgument("Cannot cast string to {}: expected {} hex chars, got {}",
target_type->ToString(), fixed_type.length() * 2,
str_val.size());
}
ICEBERG_ASSIGN_OR_RAISE(auto bytes, StringUtils::HexStringToBytes(str_val));
return Literal::Fixed(std::move(bytes));
}
case TypeId::kDecimal: {
const auto& dec_type = internal::checked_cast<const DecimalType&>(*target_type);
int32_t parsed_precision = 0;
int32_t parsed_scale = 0;
ICEBERG_ASSIGN_OR_RAISE(
auto dec, Decimal::FromString(str_val, &parsed_precision, &parsed_scale));
if (parsed_precision > dec_type.precision() || parsed_scale != dec_type.scale()) {
return InvalidArgument("Cannot cast {} as a {} value", str_val,
target_type->ToString());
}
return Literal::Decimal(dec.value(), dec_type.precision(), dec_type.scale());
}
default:
return NotSupported("Cast from String to {} is not supported",
target_type->ToString());
Expand Down
1 change: 1 addition & 0 deletions src/iceberg/meson.build
Original file line number Diff line number Diff line change
Expand Up @@ -126,6 +126,7 @@ iceberg_sources = files(
'util/murmurhash3_internal.cc',
'util/property_util.cc',
'util/snapshot_util.cc',
'util/string_util.cc',
'util/temporal_util.cc',
'util/timepoint.cc',
'util/transform_util.cc',
Expand Down
127 changes: 127 additions & 0 deletions src/iceberg/test/expression_json_test.cc
Original file line number Diff line number Diff line change
Expand Up @@ -18,6 +18,7 @@
*/

#include <memory>
#include <optional>
#include <string>
#include <vector>

Expand All @@ -31,6 +32,7 @@
#include "iceberg/expression/literal.h"
#include "iceberg/expression/predicate.h"
#include "iceberg/schema.h"
#include "iceberg/schema_field.h"
#include "iceberg/test/matchers.h"
#include "iceberg/type.h"

Expand Down Expand Up @@ -405,4 +407,129 @@ INSTANTIATE_TEST_SUITE_P(
return info.param.name;
});

// --- LiteralFromJson(json, type) type-aware tests ---

struct LiteralFromJsonTypedParam {
std::string name;
nlohmann::json json;
std::shared_ptr<Type> type;
TypeId expected_type_id;
std::optional<std::string> expected_str;
};

class LiteralFromJsonTypedTest
: public ::testing::TestWithParam<LiteralFromJsonTypedParam> {};

TEST_P(LiteralFromJsonTypedTest, Parses) {
const auto& p = GetParam();
ICEBERG_UNWRAP_OR_FAIL(auto lit, LiteralFromJson(p.json, p.type.get()));
EXPECT_EQ(lit.type()->type_id(), p.expected_type_id);
if (p.expected_str) {
EXPECT_EQ(lit.ToString(), *p.expected_str);
}
}

INSTANTIATE_TEST_SUITE_P(
LiteralFromJsonTyped, LiteralFromJsonTypedTest,
::testing::Values(
LiteralFromJsonTypedParam{"Boolean", nlohmann::json(true), boolean(),
TypeId::kBoolean, "true"},
LiteralFromJsonTypedParam{"Int", nlohmann::json(123), int32(), TypeId::kInt,
"123"},
LiteralFromJsonTypedParam{"Long", nlohmann::json(9876543210LL), int64(),
TypeId::kLong, "9876543210"},
LiteralFromJsonTypedParam{"Float", nlohmann::json(1.5), float32(), TypeId::kFloat,
std::nullopt},
LiteralFromJsonTypedParam{"Double", nlohmann::json(3.14), float64(),
TypeId::kDouble, std::nullopt},
LiteralFromJsonTypedParam{"String", nlohmann::json("hello"), string(),
TypeId::kString, std::nullopt},
LiteralFromJsonTypedParam{"DateString", nlohmann::json("2024-01-15"), date(),
TypeId::kDate, std::nullopt},
LiteralFromJsonTypedParam{"Uuid",
nlohmann::json("f79c3e09-677c-4bbd-a479-3f349cb785e7"),
uuid(), TypeId::kUuid, std::nullopt},
LiteralFromJsonTypedParam{"Binary", nlohmann::json("deadbeef"), binary(),
TypeId::kBinary, std::nullopt},
LiteralFromJsonTypedParam{"Fixed", nlohmann::json("cafebabe"), fixed(4),
TypeId::kFixed, std::nullopt},
LiteralFromJsonTypedParam{"DecimalMatchingScale", nlohmann::json("123.4500"),
decimal(9, 4), TypeId::kDecimal, "123.4500"},
LiteralFromJsonTypedParam{"DecimalScaleZero", nlohmann::json("2"), decimal(9, 0),
TypeId::kDecimal, "2"}),
[](const ::testing::TestParamInfo<LiteralFromJsonTypedParam>& info) {
return info.param.name;
});

struct InvalidLiteralFromJsonTypedParam {
std::string name;
nlohmann::json json;
std::shared_ptr<Type> type;
};

class InvalidLiteralFromJsonTypedTest
: public ::testing::TestWithParam<InvalidLiteralFromJsonTypedParam> {};

TEST_P(InvalidLiteralFromJsonTypedTest, ReturnsError) {
const auto& p = GetParam();
EXPECT_FALSE(LiteralFromJson(p.json, p.type.get()).has_value());
}

INSTANTIATE_TEST_SUITE_P(
LiteralFromJsonTyped, InvalidLiteralFromJsonTypedTest,
::testing::Values(
InvalidLiteralFromJsonTypedParam{"BooleanTypeMismatch", nlohmann::json(42),
boolean()},
InvalidLiteralFromJsonTypedParam{"DateTypeMismatch", nlohmann::json(true),
date()},
InvalidLiteralFromJsonTypedParam{"UuidTypeMismatch", nlohmann::json(42), uuid()},
InvalidLiteralFromJsonTypedParam{"BinaryInvalidHex", nlohmann::json("xyz"),
binary()},
InvalidLiteralFromJsonTypedParam{"FixedLengthMismatch", nlohmann::json("cafe12"),
fixed(4)},
InvalidLiteralFromJsonTypedParam{"DecimalScaleMismatch", nlohmann::json("123.45"),
decimal(9, 4)},
InvalidLiteralFromJsonTypedParam{"DecimalNotString", nlohmann::json(123.45),
decimal(9, 2)}),
[](const ::testing::TestParamInfo<InvalidLiteralFromJsonTypedParam>& info) {
return info.param.name;
});

struct SchemaAwarePredicateParam {
std::string name;
std::string field_name;
std::shared_ptr<Type> field_type;
nlohmann::json value;
};

class SchemaAwarePredicateRoundTripTest
: public ::testing::TestWithParam<SchemaAwarePredicateParam> {};

TEST_P(SchemaAwarePredicateRoundTripTest, RoundTrip) {
const auto& p = GetParam();
auto schema = std::make_shared<Schema>(
std::vector<SchemaField>{SchemaField::MakeOptional(1, p.field_name, p.field_type)});
nlohmann::json pred_json = {{"type", "eq"}, {"term", p.field_name}, {"value", p.value}};
ICEBERG_UNWRAP_OR_FAIL(auto expr, ExpressionFromJson(pred_json, schema.get()));
ASSERT_NE(expr, nullptr);
}

INSTANTIATE_TEST_SUITE_P(
LiteralFromJsonTyped, SchemaAwarePredicateRoundTripTest,
::testing::Values(
SchemaAwarePredicateParam{"Date", "event_date", date(), "2024-01-15"},
SchemaAwarePredicateParam{"Time", "event_time", time(), "14:30:00"},
SchemaAwarePredicateParam{"Timestamp", "created_at", timestamp(),
"2026-01-01T00:00:01.500"},
SchemaAwarePredicateParam{"TimestampTz", "updated_at", timestamp_tz(),
"2026-01-01T00:00:01.500+00:00"},
SchemaAwarePredicateParam{"Uuid", "trace_id", uuid(),
"f79c3e09-677c-4bbd-a479-3f349cb785e7"},
SchemaAwarePredicateParam{"Binary", "payload", binary(), "deadbeef"},
SchemaAwarePredicateParam{"Fixed", "hash", fixed(4), "cafebabe"},
SchemaAwarePredicateParam{"Decimal", "amount", decimal(9, 2), "123.45"}),
[](const ::testing::TestParamInfo<SchemaAwarePredicateParam>& info) {
return info.param.name;
});

} // namespace iceberg
Loading
Loading