diff --git a/DEPENDENCIES b/DEPENDENCIES index 6efadfe8..3c3755a7 100644 --- a/DEPENDENCIES +++ b/DEPENDENCIES @@ -1,4 +1,4 @@ vendorpull https://github.com/sourcemeta/vendorpull 1dcbac42809cf87cb5b045106b863e17ad84ba02 -core https://github.com/sourcemeta/core 428cbdf92f6330b0f6ae918ac0a9dce089a0470b -blaze https://github.com/sourcemeta/blaze 90c9d98ebeb0c13bd1d75fd3b0e0fb89770cc53d +core https://github.com/sourcemeta/core d07b863772ce5ee38e696ebbc8408d4831545a60 +blaze https://github.com/sourcemeta/blaze 015713ad9c98fbc4bd9669cffb2ab1fcb37942ea bootstrap https://github.com/twbs/bootstrap 1a6fdfae6be09b09eaced8f0e442ca6f7680a61e diff --git a/vendor/blaze/DEPENDENCIES b/vendor/blaze/DEPENDENCIES index 78654694..ed7767e4 100644 --- a/vendor/blaze/DEPENDENCIES +++ b/vendor/blaze/DEPENDENCIES @@ -1,6 +1,6 @@ vendorpull https://github.com/sourcemeta/vendorpull 1dcbac42809cf87cb5b045106b863e17ad84ba02 -core https://github.com/sourcemeta/core 428cbdf92f6330b0f6ae918ac0a9dce089a0470b -jsonschema-test-suite https://github.com/json-schema-org/JSON-Schema-Test-Suite 60755c1097769e313fae3ec4d63bcc9d49b5d2d5 +core https://github.com/sourcemeta/core d07b863772ce5ee38e696ebbc8408d4831545a60 +jsonschema-test-suite https://github.com/json-schema-org/JSON-Schema-Test-Suite 1acd90e53554fa24d2529b49fd7d50bab18f8b7e jsonschema-2020-12 https://github.com/json-schema-org/json-schema-spec 769daad75a9553562333a8937a187741cb708c72 jsonschema-2019-09 https://github.com/json-schema-org/json-schema-spec 41014ea723120ce70b314d72f863c6929d9f3cfd jsonschema-draft7 https://github.com/json-schema-org/json-schema-spec 567f768506aaa33a38e552c85bf0586029ef1b32 diff --git a/vendor/blaze/src/alterschema/canonicalizer/allof_merge_compatible_branches.h b/vendor/blaze/src/alterschema/canonicalizer/allof_merge_compatible_branches.h index 507bcd49..2912f1e8 100644 --- a/vendor/blaze/src/alterschema/canonicalizer/allof_merge_compatible_branches.h +++ b/vendor/blaze/src/alterschema/canonicalizer/allof_merge_compatible_branches.h @@ -95,14 +95,16 @@ class AllOfMergeCompatibleBranches final : public SchemaTransformRule { const auto index{relative.at(1).to_index()}; if (index == this->merge_from_) { - const Pointer old_prefix{current.concat({KEYWORD, this->merge_from_})}; - const Pointer new_prefix{current.concat({KEYWORD, this->merge_into_})}; + const Pointer old_prefix{ + current.concat(Pointer{KEYWORD, this->merge_from_})}; + const Pointer new_prefix{ + current.concat(Pointer{KEYWORD, this->merge_into_})}; return target.rebase(old_prefix, new_prefix); } if (index > this->merge_from_) { - const Pointer old_prefix{current.concat({KEYWORD, index})}; - const Pointer new_prefix{current.concat({KEYWORD, index - 1})}; + const Pointer old_prefix{current.concat(Pointer{KEYWORD, index})}; + const Pointer new_prefix{current.concat(Pointer{KEYWORD, index - 1})}; return target.rebase(old_prefix, new_prefix); } diff --git a/vendor/blaze/src/alterschema/canonicalizer/disallow_array_to_extends.h b/vendor/blaze/src/alterschema/canonicalizer/disallow_array_to_extends.h index 293b9bd5..89be86db 100644 --- a/vendor/blaze/src/alterschema/canonicalizer/disallow_array_to_extends.h +++ b/vendor/blaze/src/alterschema/canonicalizer/disallow_array_to_extends.h @@ -63,7 +63,7 @@ class DisallowArrayToExtends final : public SchemaTransformRule { const Pointer &target, const Pointer ¤t) const -> Pointer override { - const auto disallow_prefix{current.concat({"disallow"})}; + const auto disallow_prefix{current.concat("disallow")}; if (!target.starts_with(disallow_prefix)) { return target; } @@ -75,7 +75,7 @@ class DisallowArrayToExtends final : public SchemaTransformRule { const auto index{relative.at(0).to_index()}; return target.rebase( - current.concat({"disallow", index}), + current.concat(Pointer{"disallow", index}), current.concat( {"extends", this->extends_start_ + index, "disallow", 0})); } diff --git a/vendor/blaze/src/alterschema/canonicalizer/disallow_extends_to_type.h b/vendor/blaze/src/alterschema/canonicalizer/disallow_extends_to_type.h index 7cb88c1b..2f079162 100644 --- a/vendor/blaze/src/alterschema/canonicalizer/disallow_extends_to_type.h +++ b/vendor/blaze/src/alterschema/canonicalizer/disallow_extends_to_type.h @@ -80,7 +80,7 @@ class DisallowExtendsToType final : public SchemaTransformRule { } const auto index{relative.at(0).to_index()}; - return target.rebase(extends_prefix.concat({index}), + return target.rebase(extends_prefix.concat(index), current.concat({"type", index, "disallow", 0})); } diff --git a/vendor/blaze/src/alterschema/canonicalizer/disallow_to_array_of_schemas.h b/vendor/blaze/src/alterschema/canonicalizer/disallow_to_array_of_schemas.h index a81b779d..c50b213b 100644 --- a/vendor/blaze/src/alterschema/canonicalizer/disallow_to_array_of_schemas.h +++ b/vendor/blaze/src/alterschema/canonicalizer/disallow_to_array_of_schemas.h @@ -70,8 +70,8 @@ class DisallowToArrayOfSchemas final : public SchemaTransformRule { const Pointer &target, const Pointer ¤t) const -> Pointer override { - return target.rebase(current.concat({"disallow"}), - current.concat({"disallow", 0})); + return target.rebase(current.concat("disallow"), + current.concat(Pointer{"disallow", 0})); } private: diff --git a/vendor/blaze/src/alterschema/canonicalizer/disallow_type_union_to_extends.h b/vendor/blaze/src/alterschema/canonicalizer/disallow_type_union_to_extends.h index 6763e68d..518e40fd 100644 --- a/vendor/blaze/src/alterschema/canonicalizer/disallow_type_union_to_extends.h +++ b/vendor/blaze/src/alterschema/canonicalizer/disallow_type_union_to_extends.h @@ -80,7 +80,7 @@ class DisallowTypeUnionToExtends final : public SchemaTransformRule { } const auto index{relative.at(0).to_index()}; - return target.rebase(type_prefix.concat({index}), + return target.rebase(type_prefix.concat(index), current.concat({"extends", index, "disallow", 0})); } diff --git a/vendor/blaze/src/alterschema/canonicalizer/extends_to_array.h b/vendor/blaze/src/alterschema/canonicalizer/extends_to_array.h index 7799efd4..a6b2b951 100644 --- a/vendor/blaze/src/alterschema/canonicalizer/extends_to_array.h +++ b/vendor/blaze/src/alterschema/canonicalizer/extends_to_array.h @@ -35,7 +35,7 @@ class ExtendsToArray final : public SchemaTransformRule { const Pointer &target, const Pointer ¤t) const -> Pointer override { - return target.rebase(current.concat({"extends"}), - current.concat({"extends", 0})); + return target.rebase(current.concat("extends"), + current.concat(Pointer{"extends", 0})); } }; diff --git a/vendor/blaze/src/alterschema/canonicalizer/required_to_extends.h b/vendor/blaze/src/alterschema/canonicalizer/required_to_extends.h index 44e9845e..86c7f109 100644 --- a/vendor/blaze/src/alterschema/canonicalizer/required_to_extends.h +++ b/vendor/blaze/src/alterschema/canonicalizer/required_to_extends.h @@ -76,7 +76,7 @@ class RequiredToExtends final : public SchemaTransformRule { const Pointer ¤t) const -> Pointer override { for (const auto &keyword : this->wrapped_keywords_) { - const auto keyword_prefix{current.concat({keyword})}; + const auto keyword_prefix{current.concat(keyword)}; if (target.starts_with(keyword_prefix)) { return target.rebase( keyword_prefix, diff --git a/vendor/blaze/src/alterschema/canonicalizer/single_branch_allof.h b/vendor/blaze/src/alterschema/canonicalizer/single_branch_allof.h index 1182d380..38b4b475 100644 --- a/vendor/blaze/src/alterschema/canonicalizer/single_branch_allof.h +++ b/vendor/blaze/src/alterschema/canonicalizer/single_branch_allof.h @@ -61,7 +61,7 @@ class SingleBranchAllOf final : public SchemaTransformRule { const Pointer ¤t) const -> Pointer override { static const JSON::String KEYWORD{"allOf"}; - const auto prefix{current.concat({KEYWORD, 0})}; + const auto prefix{current.concat(Pointer{KEYWORD, 0})}; if (!target.starts_with(prefix)) { return target; } diff --git a/vendor/blaze/src/alterschema/canonicalizer/single_branch_anyof.h b/vendor/blaze/src/alterschema/canonicalizer/single_branch_anyof.h index 7d1edf13..e43abf56 100644 --- a/vendor/blaze/src/alterschema/canonicalizer/single_branch_anyof.h +++ b/vendor/blaze/src/alterschema/canonicalizer/single_branch_anyof.h @@ -61,11 +61,11 @@ class SingleBranchAnyOf final : public SchemaTransformRule { -> Pointer override { static const JSON::String KEYWORD{"anyOf"}; if (this->has_unevaluated_) { - const auto old_prefix{current.concat({KEYWORD})}; - const Pointer new_prefix{current.concat({"allOf"})}; + const auto old_prefix{current.concat(KEYWORD)}; + const Pointer new_prefix{current.concat("allOf")}; return target.rebase(old_prefix, new_prefix); } - const auto prefix{current.concat({KEYWORD, 0})}; + const auto prefix{current.concat(Pointer{KEYWORD, 0})}; if (!target.starts_with(prefix)) { return target; } diff --git a/vendor/blaze/src/alterschema/canonicalizer/single_branch_oneof.h b/vendor/blaze/src/alterschema/canonicalizer/single_branch_oneof.h index ca989b2b..d7b21e9c 100644 --- a/vendor/blaze/src/alterschema/canonicalizer/single_branch_oneof.h +++ b/vendor/blaze/src/alterschema/canonicalizer/single_branch_oneof.h @@ -61,11 +61,11 @@ class SingleBranchOneOf final : public SchemaTransformRule { -> Pointer override { static const JSON::String KEYWORD{"oneOf"}; if (this->has_unevaluated_) { - const auto old_prefix{current.concat({KEYWORD})}; - const Pointer new_prefix{current.concat({"allOf"})}; + const auto old_prefix{current.concat(KEYWORD)}; + const Pointer new_prefix{current.concat("allOf")}; return target.rebase(old_prefix, new_prefix); } - const auto prefix{current.concat({KEYWORD, 0})}; + const auto prefix{current.concat(Pointer{KEYWORD, 0})}; if (!target.starts_with(prefix)) { return target; } diff --git a/vendor/blaze/src/alterschema/canonicalizer/type_array_to_any_of.h b/vendor/blaze/src/alterschema/canonicalizer/type_array_to_any_of.h index 99b18626..de4bca74 100644 --- a/vendor/blaze/src/alterschema/canonicalizer/type_array_to_any_of.h +++ b/vendor/blaze/src/alterschema/canonicalizer/type_array_to_any_of.h @@ -125,9 +125,9 @@ class TypeArrayToAnyOf final : public SchemaTransformRule { current); } - const Pointer old_prefix{current.concat({keyword})}; + const Pointer old_prefix{current.concat(keyword)}; const Pointer new_prefix{current.concat(this->disjunctors_prefix_) - .concat({match->second, keyword})}; + .concat(Pointer{match->second, keyword})}; return target.rebase(old_prefix, new_prefix); } diff --git a/vendor/blaze/src/alterschema/canonicalizer/type_union_distribute_keywords.h b/vendor/blaze/src/alterschema/canonicalizer/type_union_distribute_keywords.h index 04ba7d5e..70076b94 100644 --- a/vendor/blaze/src/alterschema/canonicalizer/type_union_distribute_keywords.h +++ b/vendor/blaze/src/alterschema/canonicalizer/type_union_distribute_keywords.h @@ -138,7 +138,7 @@ class TypeUnionDistributeKeywords final : public SchemaTransformRule { const Pointer ¤t) const -> Pointer override { if (this->wrap_) { - const auto type_prefix{current.concat({"type"})}; + const auto type_prefix{current.concat("type")}; if (target.starts_with(type_prefix)) { return target.rebase( type_prefix, @@ -146,7 +146,7 @@ class TypeUnionDistributeKeywords final : public SchemaTransformRule { } for (const auto &keyword : this->wrap_keywords_) { - const auto keyword_prefix{current.concat({keyword})}; + const auto keyword_prefix{current.concat(keyword)}; if (target.starts_with(keyword_prefix)) { return target.rebase( keyword_prefix, @@ -162,7 +162,7 @@ class TypeUnionDistributeKeywords final : public SchemaTransformRule { continue; } - const auto keyword_prefix{current.concat({entry.first})}; + const auto keyword_prefix{current.concat(entry.first)}; if (target.starts_with(keyword_prefix)) { return target.rebase( keyword_prefix, diff --git a/vendor/blaze/src/alterschema/canonicalizer/type_with_applicator_to_allof.h b/vendor/blaze/src/alterschema/canonicalizer/type_with_applicator_to_allof.h index 11831cfe..006e23cb 100644 --- a/vendor/blaze/src/alterschema/canonicalizer/type_with_applicator_to_allof.h +++ b/vendor/blaze/src/alterschema/canonicalizer/type_with_applicator_to_allof.h @@ -434,7 +434,7 @@ class TypeWithApplicatorToAllOf final : public SchemaTransformRule { for (const auto &typed_kw : this->typed_keywords_) { if (typed_kw == keyword) { - const Pointer old_prefix{current.concat({keyword})}; + const Pointer old_prefix{current.concat(keyword)}; if (this->strategy_ == Strategy::SafeExtract) { const Pointer new_prefix{current.concat( {allof_keyword, this->typed_branch_index_, keyword})}; @@ -468,7 +468,7 @@ class TypeWithApplicatorToAllOf final : public SchemaTransformRule { if (keyword == applicator || (this->has_if_then_else_ && std::string_view{applicator} == "if" && (keyword == "then" || keyword == "else"))) { - const Pointer old_prefix{current.concat({keyword})}; + const Pointer old_prefix{current.concat(keyword)}; const Pointer new_prefix{ current.concat({allof_keyword, index, keyword})}; return target.rebase(old_prefix, new_prefix); diff --git a/vendor/blaze/src/alterschema/canonicalizer/type_with_applicator_to_extends.h b/vendor/blaze/src/alterschema/canonicalizer/type_with_applicator_to_extends.h index 97482a85..65e59b44 100644 --- a/vendor/blaze/src/alterschema/canonicalizer/type_with_applicator_to_extends.h +++ b/vendor/blaze/src/alterschema/canonicalizer/type_with_applicator_to_extends.h @@ -103,7 +103,7 @@ class TypeWithApplicatorToExtends final : public SchemaTransformRule { for (const auto &typed_keyword : this->typed_keywords_) { if (typed_keyword == keyword) { - const Pointer old_prefix{current.concat({keyword})}; + const Pointer old_prefix{current.concat(keyword)}; const std::size_t typed_index{ static_cast(std::popcount(this->applicator_indices_))}; const Pointer new_prefix{ @@ -115,7 +115,7 @@ class TypeWithApplicatorToExtends final : public SchemaTransformRule { std::size_t index{0}; for (const auto &applicator : APPLICATORS) { if (keyword == applicator) { - const Pointer old_prefix{current.concat({keyword})}; + const Pointer old_prefix{current.concat(keyword)}; const Pointer new_prefix{ current.concat({extends_keyword, index, keyword})}; return target.rebase(old_prefix, new_prefix); diff --git a/vendor/blaze/src/alterschema/canonicalizer/unevaluated_items_to_items.h b/vendor/blaze/src/alterschema/canonicalizer/unevaluated_items_to_items.h index 36716bc0..35d7cb2f 100644 --- a/vendor/blaze/src/alterschema/canonicalizer/unevaluated_items_to_items.h +++ b/vendor/blaze/src/alterschema/canonicalizer/unevaluated_items_to_items.h @@ -48,7 +48,7 @@ class UnevaluatedItemsToItems final : public SchemaTransformRule { const Pointer &target, const Pointer ¤t) const -> Pointer override { - return target.rebase(current.concat({"unevaluatedItems"}), - current.concat({"items"})); + return target.rebase(current.concat("unevaluatedItems"), + current.concat("items")); } }; diff --git a/vendor/blaze/src/alterschema/canonicalizer/unevaluated_properties_to_additional_properties.h b/vendor/blaze/src/alterschema/canonicalizer/unevaluated_properties_to_additional_properties.h index d309d75b..628c14c7 100644 --- a/vendor/blaze/src/alterschema/canonicalizer/unevaluated_properties_to_additional_properties.h +++ b/vendor/blaze/src/alterschema/canonicalizer/unevaluated_properties_to_additional_properties.h @@ -50,7 +50,7 @@ class UnevaluatedPropertiesToAdditionalProperties final const Pointer &target, const Pointer ¤t) const -> Pointer override { - return target.rebase(current.concat({"unevaluatedProperties"}), - current.concat({"additionalProperties"})); + return target.rebase(current.concat("unevaluatedProperties"), + current.concat("additionalProperties")); } }; diff --git a/vendor/blaze/src/alterschema/common/double_negation_elimination.h b/vendor/blaze/src/alterschema/common/double_negation_elimination.h index 3e6a25e8..a0209ea4 100644 --- a/vendor/blaze/src/alterschema/common/double_negation_elimination.h +++ b/vendor/blaze/src/alterschema/common/double_negation_elimination.h @@ -65,9 +65,9 @@ class DoubleNegationElimination final : public SchemaTransformRule { const Pointer &target, const Pointer ¤t) const -> Pointer override { - auto old_prefix{current.concat({"not", "not"})}; - while (target.starts_with(old_prefix.concat({"not", "not"}))) { - old_prefix = old_prefix.concat({"not", "not"}); + auto old_prefix{current.concat(Pointer{"not", "not"})}; + while (target.starts_with(old_prefix.concat(Pointer{"not", "not"}))) { + old_prefix = old_prefix.concat(Pointer{"not", "not"}); } if (!target.starts_with(old_prefix)) { return target; diff --git a/vendor/blaze/src/alterschema/common/duplicate_allof_branches.h b/vendor/blaze/src/alterschema/common/duplicate_allof_branches.h index f2c97226..2267dfb6 100644 --- a/vendor/blaze/src/alterschema/common/duplicate_allof_branches.h +++ b/vendor/blaze/src/alterschema/common/duplicate_allof_branches.h @@ -63,12 +63,12 @@ class DuplicateAllOfBranches final : public SchemaTransformRule { const Pointer &target, const Pointer ¤t) const -> Pointer override { - const auto allof_prefix{current.concat({"allOf"})}; + const auto allof_prefix{current.concat("allOf")}; const auto relative{target.resolve_from(allof_prefix)}; const auto old_index{relative.at(0).to_index()}; const auto new_index{this->index_mapping_.at(old_index)}; - const Pointer old_prefix{allof_prefix.concat({old_index})}; - const Pointer new_prefix{allof_prefix.concat({new_index})}; + const Pointer old_prefix{allof_prefix.concat(old_index)}; + const Pointer new_prefix{allof_prefix.concat(new_index)}; return target.rebase(old_prefix, new_prefix); } diff --git a/vendor/blaze/src/alterschema/common/duplicate_anyof_branches.h b/vendor/blaze/src/alterschema/common/duplicate_anyof_branches.h index 9dbd4c3d..cf44a9c6 100644 --- a/vendor/blaze/src/alterschema/common/duplicate_anyof_branches.h +++ b/vendor/blaze/src/alterschema/common/duplicate_anyof_branches.h @@ -63,12 +63,12 @@ class DuplicateAnyOfBranches final : public SchemaTransformRule { const Pointer &target, const Pointer ¤t) const -> Pointer override { - const auto anyof_prefix{current.concat({"anyOf"})}; + const auto anyof_prefix{current.concat("anyOf")}; const auto relative{target.resolve_from(anyof_prefix)}; const auto old_index{relative.at(0).to_index()}; const auto new_index{this->index_mapping_.at(old_index)}; - const Pointer old_prefix{anyof_prefix.concat({old_index})}; - const Pointer new_prefix{anyof_prefix.concat({new_index})}; + const Pointer old_prefix{anyof_prefix.concat(old_index)}; + const Pointer new_prefix{anyof_prefix.concat(new_index)}; return target.rebase(old_prefix, new_prefix); } diff --git a/vendor/blaze/src/alterschema/common/flatten_nested_allof.h b/vendor/blaze/src/alterschema/common/flatten_nested_allof.h index bdd05eda..ca551af6 100644 --- a/vendor/blaze/src/alterschema/common/flatten_nested_allof.h +++ b/vendor/blaze/src/alterschema/common/flatten_nested_allof.h @@ -72,7 +72,7 @@ class FlattenNestedAllOf final : public SchemaTransformRule { const Pointer ¤t) const -> Pointer override { static const JSON::String KEYWORD{"allOf"}; - const auto prefix{current.concat({KEYWORD})}; + const auto prefix{current.concat(KEYWORD)}; if (!target.starts_with(prefix)) { return target; } @@ -86,12 +86,12 @@ class FlattenNestedAllOf final : public SchemaTransformRule { const Pointer old_prefix{ prefix.concat({old_index, KEYWORD, inner.value()})}; if (target.starts_with(old_prefix)) { - const Pointer new_prefix{prefix.concat({mapped})}; + const Pointer new_prefix{prefix.concat(mapped)}; return target.rebase(old_prefix, new_prefix); } } else if (outer == old_index) { - const Pointer old_prefix{prefix.concat({old_index})}; - const Pointer new_prefix{prefix.concat({mapped})}; + const Pointer old_prefix{prefix.concat(old_index)}; + const Pointer new_prefix{prefix.concat(mapped)}; return target.rebase(old_prefix, new_prefix); } } diff --git a/vendor/blaze/src/alterschema/common/flatten_nested_anyof.h b/vendor/blaze/src/alterschema/common/flatten_nested_anyof.h index fda43228..632350a9 100644 --- a/vendor/blaze/src/alterschema/common/flatten_nested_anyof.h +++ b/vendor/blaze/src/alterschema/common/flatten_nested_anyof.h @@ -72,7 +72,7 @@ class FlattenNestedAnyOf final : public SchemaTransformRule { const Pointer ¤t) const -> Pointer override { static const JSON::String KEYWORD{"anyOf"}; - const auto prefix{current.concat({KEYWORD})}; + const auto prefix{current.concat(KEYWORD)}; if (!target.starts_with(prefix)) { return target; } @@ -86,12 +86,12 @@ class FlattenNestedAnyOf final : public SchemaTransformRule { const Pointer old_prefix{ prefix.concat({old_index, KEYWORD, inner.value()})}; if (target.starts_with(old_prefix)) { - const Pointer new_prefix{prefix.concat({mapped})}; + const Pointer new_prefix{prefix.concat(mapped)}; return target.rebase(old_prefix, new_prefix); } } else if (outer == old_index) { - const Pointer old_prefix{prefix.concat({old_index})}; - const Pointer new_prefix{prefix.concat({mapped})}; + const Pointer old_prefix{prefix.concat(old_index)}; + const Pointer new_prefix{prefix.concat(mapped)}; return target.rebase(old_prefix, new_prefix); } } diff --git a/vendor/blaze/src/alterschema/common/flatten_nested_extends.h b/vendor/blaze/src/alterschema/common/flatten_nested_extends.h index 9863cd54..3eb7c6a3 100644 --- a/vendor/blaze/src/alterschema/common/flatten_nested_extends.h +++ b/vendor/blaze/src/alterschema/common/flatten_nested_extends.h @@ -69,7 +69,7 @@ class FlattenNestedExtends final : public SchemaTransformRule { const Pointer ¤t) const -> Pointer override { static const JSON::String KEYWORD{"extends"}; - const auto prefix{current.concat({KEYWORD})}; + const auto prefix{current.concat(KEYWORD)}; if (!target.starts_with(prefix)) { return target; } @@ -83,12 +83,12 @@ class FlattenNestedExtends final : public SchemaTransformRule { const Pointer old_prefix{ prefix.concat({old_index, KEYWORD, inner.value()})}; if (target.starts_with(old_prefix)) { - const Pointer new_prefix{prefix.concat({mapped})}; + const Pointer new_prefix{prefix.concat(mapped)}; return target.rebase(old_prefix, new_prefix); } } else if (outer == old_index) { - const Pointer old_prefix{prefix.concat({old_index})}; - const Pointer new_prefix{prefix.concat({mapped})}; + const Pointer old_prefix{prefix.concat(old_index)}; + const Pointer new_prefix{prefix.concat(mapped)}; return target.rebase(old_prefix, new_prefix); } } diff --git a/vendor/blaze/src/alterschema/common/oneof_to_anyof_disjoint_types.h b/vendor/blaze/src/alterschema/common/oneof_to_anyof_disjoint_types.h index 3aabdd59..3295d3f2 100644 --- a/vendor/blaze/src/alterschema/common/oneof_to_anyof_disjoint_types.h +++ b/vendor/blaze/src/alterschema/common/oneof_to_anyof_disjoint_types.h @@ -94,8 +94,8 @@ class OneOfToAnyOfDisjointTypes final : public SchemaTransformRule { rereference(const std::string_view, const Pointer &origin [[maybe_unused]], const Pointer &target, const Pointer ¤t) const -> Pointer override { - const Pointer oneof_prefix{current.concat({"oneOf"})}; - const Pointer anyof_prefix{current.concat({"anyOf"})}; + const Pointer oneof_prefix{current.concat("oneOf")}; + const Pointer anyof_prefix{current.concat("anyOf")}; return target.rebase(oneof_prefix, anyof_prefix); } }; diff --git a/vendor/blaze/src/alterschema/linter/definitions_to_defs.h b/vendor/blaze/src/alterschema/linter/definitions_to_defs.h index 1c4197c8..05fdfb0e 100644 --- a/vendor/blaze/src/alterschema/linter/definitions_to_defs.h +++ b/vendor/blaze/src/alterschema/linter/definitions_to_defs.h @@ -32,7 +32,7 @@ class DefinitionsToDefs final : public SchemaTransformRule { const Pointer &target, const Pointer ¤t) const -> Pointer override { - return target.rebase(current.concat({"definitions"}), - current.concat({"$defs"})); + return target.rebase(current.concat("definitions"), + current.concat("$defs")); } }; diff --git a/vendor/blaze/src/alterschema/linter/unnecessary_allof_wrapper.h b/vendor/blaze/src/alterschema/linter/unnecessary_allof_wrapper.h index b16b52d9..977a0fb3 100644 --- a/vendor/blaze/src/alterschema/linter/unnecessary_allof_wrapper.h +++ b/vendor/blaze/src/alterschema/linter/unnecessary_allof_wrapper.h @@ -201,11 +201,12 @@ class UnnecessaryAllOfWrapper final : public SchemaTransformRule { const Pointer ¤t) const -> Pointer override { // The rule moves keywords from /allOf// to / - const auto allof_prefix{current.concat({KEYWORD})}; + const auto allof_prefix{current.concat(KEYWORD)}; const auto relative{target.resolve_from(allof_prefix)}; const auto &keyword{relative.at(1).to_property()}; - const Pointer old_prefix{allof_prefix.concat({relative.at(0), keyword})}; - const Pointer new_prefix{current.concat({keyword})}; + const Pointer old_prefix{ + allof_prefix.concat(Pointer{relative.at(0), keyword})}; + const Pointer new_prefix{current.concat(keyword)}; return target.rebase(old_prefix, new_prefix); } diff --git a/vendor/blaze/src/alterschema/linter/unnecessary_extends_wrapper.h b/vendor/blaze/src/alterschema/linter/unnecessary_extends_wrapper.h index 9a774756..41151002 100644 --- a/vendor/blaze/src/alterschema/linter/unnecessary_extends_wrapper.h +++ b/vendor/blaze/src/alterschema/linter/unnecessary_extends_wrapper.h @@ -124,11 +124,12 @@ class UnnecessaryExtendsWrapper final : public SchemaTransformRule { const Pointer ¤t) const -> Pointer override { // The rule moves keywords from /extends// to / - const auto extends_prefix{current.concat({KEYWORD})}; + const auto extends_prefix{current.concat(KEYWORD)}; const auto relative{target.resolve_from(extends_prefix)}; const auto &keyword{relative.at(1).to_property()}; - const Pointer old_prefix{extends_prefix.concat({relative.at(0), keyword})}; - const Pointer new_prefix{current.concat({keyword})}; + const Pointer old_prefix{ + extends_prefix.concat(Pointer{relative.at(0), keyword})}; + const Pointer new_prefix{current.concat(keyword)}; return target.rebase(old_prefix, new_prefix); } diff --git a/vendor/blaze/src/codegen/codegen_mangle.cc b/vendor/blaze/src/codegen/codegen_mangle.cc index 0d6a4a38..dec59ad5 100644 --- a/vendor/blaze/src/codegen/codegen_mangle.cc +++ b/vendor/blaze/src/codegen/codegen_mangle.cc @@ -28,24 +28,26 @@ auto symbol_to_identifier(const std::string_view prefix, continue; } - bool first_in_segment{true}; + bool at_word_start{true}; + bool at_segment_start{true}; for (const auto character : segment) { if (is_alpha(character)) { - if (first_in_segment) { - result += to_upper(character); - first_in_segment = false; - } else { - result += character; - } + result += at_word_start ? to_upper(character) : character; + at_word_start = false; + at_segment_start = false; } else if (is_digit(character)) { - if (first_in_segment) { + if (at_segment_start) { result += '_'; } result += character; - first_in_segment = false; + at_word_start = false; + at_segment_start = false; } else if (character == '_' || character == '$') { result += character; - first_in_segment = false; + at_word_start = false; + at_segment_start = false; + } else { + at_word_start = true; } } } diff --git a/vendor/blaze/src/compiler/compile_helpers.h b/vendor/blaze/src/compiler/compile_helpers.h index a2921279..eb82f409 100644 --- a/vendor/blaze/src/compiler/compile_helpers.h +++ b/vendor/blaze/src/compiler/compile_helpers.h @@ -81,7 +81,7 @@ inline auto make_with_resource(const InstructionIndex type, dynamic_context.keyword.empty() ? to_pointer(dynamic_context.base_schema_location) : to_pointer(dynamic_context.base_schema_location) - .concat({dynamic_context.keyword})}; + .concat(dynamic_context.keyword)}; const auto extra_index{context.extra.size()}; context.extra.push_back( {.relative_schema_location = schema_location, @@ -115,7 +115,7 @@ inline auto make(const InstructionIndex type, const Context &context, dynamic_context.keyword.empty() ? to_pointer(dynamic_context.base_schema_location) : to_pointer(dynamic_context.base_schema_location) - .concat({dynamic_context.keyword})}; + .concat(dynamic_context.keyword)}; const auto extra_index{context.extra.size()}; context.extra.push_back( {.relative_schema_location = schema_location, @@ -259,7 +259,7 @@ inline auto find_adjacent(const Context &context, sourcemeta::core::to_uri( sourcemeta::core::to_pointer( std::string{reference.fragment.value_or("")}) - .concat({keyword})) + .concat(keyword)) .resolve_from(sourcemeta::core::URI{reference.base})}; // TODO: When this logic is used by diff --git a/vendor/blaze/src/frame/frame.cc b/vendor/blaze/src/frame/frame.cc index 690e3ec4..a2833dd3 100644 --- a/vendor/blaze/src/frame/frame.cc +++ b/vendor/blaze/src/frame/frame.cc @@ -1146,7 +1146,7 @@ auto SchemaFrame::analyse(const sourcemeta::core::JSON &root, if (ref != "#") { throw sourcemeta::blaze::SchemaReferenceError( entry.id.value_or(""), - to_pointer(common_pointer_weak).concat({"$recursiveRef"}), + to_pointer(common_pointer_weak).concat("$recursiveRef"), "Invalid recursive reference"); } diff --git a/vendor/blaze/src/test/test_parser.cc b/vendor/blaze/src/test/test_parser.cc index 465c5a71..16de65b9 100644 --- a/vendor/blaze/src/test/test_parser.cc +++ b/vendor/blaze/src/test/test_parser.cc @@ -58,17 +58,17 @@ auto TestCase::parse( "`dataPath` property, but not both"); TEST_ERROR_IF(test_case_json.defines("dataPath") && !test_case_json.at("dataPath").is_string(), - tracker, location.concat({"dataPath"}), + tracker, location.concat("dataPath"), "Test case documents must set the `dataPath` property to a " "string"); TEST_ERROR_IF(test_case_json.defines("description") && !test_case_json.at("description").is_string(), - tracker, location.concat({"description"}), + tracker, location.concat("description"), "If you set a test case description, it must be a string"); TEST_ERROR_IF(!test_case_json.defines("valid"), tracker, location, "Test case documents must contain a `valid` property"); TEST_ERROR_IF(!test_case_json.at("valid").is_boolean(), tracker, - location.concat({"valid"}), + location.concat("valid"), "The test case document `valid` property must be a boolean"); sourcemeta::core::JSON::String description; diff --git a/vendor/core/CMakeLists.txt b/vendor/core/CMakeLists.txt index 22f9a9a7..e45099d9 100644 --- a/vendor/core/CMakeLists.txt +++ b/vendor/core/CMakeLists.txt @@ -27,6 +27,7 @@ option(SOURCEMETA_CORE_URI "Build the Sourcemeta Core URI library" ON) option(SOURCEMETA_CORE_URITEMPLATE "Build the Sourcemeta Core URI Template library" ON) option(SOURCEMETA_CORE_JSON "Build the Sourcemeta Core JSON library" ON) option(SOURCEMETA_CORE_JSONPOINTER "Build the Sourcemeta Core JSON Pointer library" ON) +option(SOURCEMETA_CORE_JSONLD "Build the Sourcemeta Core JSON-LD library" ON) option(SOURCEMETA_CORE_JSONL "Build the Sourcemeta Core JSONL library" ON) option(SOURCEMETA_CORE_YAML "Build the Sourcemeta Core YAML library" ON) option(SOURCEMETA_CORE_JSONRPC "Build the Sourcemeta Core JSON-RPC library" ON) @@ -171,6 +172,10 @@ if(SOURCEMETA_CORE_JSONPOINTER) add_subdirectory(src/core/jsonpointer) endif() +if(SOURCEMETA_CORE_JSONLD) + add_subdirectory(src/core/jsonld) +endif() + if(SOURCEMETA_CORE_GZIP) find_package(LibDeflate REQUIRED) add_subdirectory(src/core/gzip) @@ -338,6 +343,10 @@ if(SOURCEMETA_CORE_TESTS) add_subdirectory(test/jsonpointer) endif() + if(SOURCEMETA_CORE_JSONLD) + add_subdirectory(test/jsonld) + endif() + if(SOURCEMETA_CORE_GZIP) add_subdirectory(test/gzip) endif() diff --git a/vendor/core/config.cmake.in b/vendor/core/config.cmake.in index 5867d812..889e1c79 100644 --- a/vendor/core/config.cmake.in +++ b/vendor/core/config.cmake.in @@ -24,6 +24,7 @@ if(NOT SOURCEMETA_CORE_COMPONENTS) list(APPEND SOURCEMETA_CORE_COMPONENTS json) list(APPEND SOURCEMETA_CORE_COMPONENTS jsonl) list(APPEND SOURCEMETA_CORE_COMPONENTS jsonpointer) + list(APPEND SOURCEMETA_CORE_COMPONENTS jsonld) list(APPEND SOURCEMETA_CORE_COMPONENTS yaml) list(APPEND SOURCEMETA_CORE_COMPONENTS jsonrpc) list(APPEND SOURCEMETA_CORE_COMPONENTS mcp) @@ -136,6 +137,20 @@ foreach(component ${SOURCEMETA_CORE_COMPONENTS}) include("${CMAKE_CURRENT_LIST_DIR}/sourcemeta_core_unicode.cmake") include("${CMAKE_CURRENT_LIST_DIR}/sourcemeta_core_json.cmake") include("${CMAKE_CURRENT_LIST_DIR}/sourcemeta_core_jsonpointer.cmake") + elseif(component STREQUAL "jsonld") + find_dependency(PCRE2 CONFIG) + include("${CMAKE_CURRENT_LIST_DIR}/sourcemeta_core_preprocessor.cmake") + include("${CMAKE_CURRENT_LIST_DIR}/sourcemeta_core_numeric.cmake") + include("${CMAKE_CURRENT_LIST_DIR}/sourcemeta_core_io.cmake") + include("${CMAKE_CURRENT_LIST_DIR}/sourcemeta_core_unicode.cmake") + include("${CMAKE_CURRENT_LIST_DIR}/sourcemeta_core_ip.cmake") + include("${CMAKE_CURRENT_LIST_DIR}/sourcemeta_core_regex.cmake") + include("${CMAKE_CURRENT_LIST_DIR}/sourcemeta_core_text.cmake") + include("${CMAKE_CURRENT_LIST_DIR}/sourcemeta_core_json.cmake") + include("${CMAKE_CURRENT_LIST_DIR}/sourcemeta_core_uri.cmake") + include("${CMAKE_CURRENT_LIST_DIR}/sourcemeta_core_jsonpointer.cmake") + include("${CMAKE_CURRENT_LIST_DIR}/sourcemeta_core_langtag.cmake") + include("${CMAKE_CURRENT_LIST_DIR}/sourcemeta_core_jsonld.cmake") elseif(component STREQUAL "yaml") find_dependency(PCRE2 CONFIG) include("${CMAKE_CURRENT_LIST_DIR}/sourcemeta_core_regex.cmake") diff --git a/vendor/core/src/core/jose/CMakeLists.txt b/vendor/core/src/core/jose/CMakeLists.txt index 3923db25..986ec448 100644 --- a/vendor/core/src/core/jose/CMakeLists.txt +++ b/vendor/core/src/core/jose/CMakeLists.txt @@ -1,7 +1,7 @@ sourcemeta_library(NAMESPACE sourcemeta PROJECT core NAME jose - PRIVATE_HEADERS algorithm.h error.h jwk.h jwks.h jwt.h verify.h - SOURCES jose_algorithm.cc jose_jwk.cc jose_jwks.cc jose_jwt.cc - jose_jwt_check_claims.cc jose_jws_verify_signature.cc + PRIVATE_HEADERS algorithm.h error.h jwk.h jwks.h jwks_provider.h jwt.h verify.h + SOURCES jose_algorithm.cc jose_jwk.cc jose_jwks.cc jose_jwks_provider.cc + jose_jwt.cc jose_jwt_check_claims.cc jose_jws_verify_signature.cc jose_jwt_verify_signature.cc jose_jwt_verify.cc) target_link_libraries(sourcemeta_core_jose diff --git a/vendor/core/src/core/jose/include/sourcemeta/core/jose.h b/vendor/core/src/core/jose/include/sourcemeta/core/jose.h index 47193a2a..169065fa 100644 --- a/vendor/core/src/core/jose/include/sourcemeta/core/jose.h +++ b/vendor/core/src/core/jose/include/sourcemeta/core/jose.h @@ -10,6 +10,7 @@ #include #include #include +#include #include #include // NOLINTEND(misc-include-cleaner) diff --git a/vendor/core/src/core/jose/include/sourcemeta/core/jose_jwks_provider.h b/vendor/core/src/core/jose/include/sourcemeta/core/jose_jwks_provider.h new file mode 100644 index 00000000..53d12eed --- /dev/null +++ b/vendor/core/src/core/jose/include/sourcemeta/core/jose_jwks_provider.h @@ -0,0 +1,120 @@ +#ifndef SOURCEMETA_CORE_JOSE_JWKS_PROVIDER_H_ +#define SOURCEMETA_CORE_JOSE_JWKS_PROVIDER_H_ + +#ifndef SOURCEMETA_CORE_JOSE_EXPORT +#include +#endif + +#include +#include +#include +#include + +#include // std::chrono +#include // std::function +#include // std::shared_ptr +#include // std::mutex +#include // std::optional, std::nullopt +#include // std::span +#include // std::string +#include // std::string_view + +namespace sourcemeta::core { + +/// @ingroup jose +/// A stateful, thread-safe resolver that owns a key set URL and verifies tokens +/// against it, fetching and refreshing the keys internally. It adds caching, a +/// freshness-aware refresh, a guarded refetch on rotation, and serving of the +/// previously held keys through a transient outage. The current time is read +/// internally from an injectable clock, so a caller never deals with it and +/// tests can drive expiry, rotation, and the refetch cooldown +/// deterministically. +class SOURCEMETA_CORE_JOSE_EXPORT JWKSProvider { +public: + /// The outcome of one key set retrieval. The freshness hint is kept separate + /// from the body because only the caller that made the request can observe + /// the caching response header. An absent hint means no lifetime was + /// advertised, which is distinct from an advertised lifetime of zero. + struct FetchResult { + std::string body; + std::optional max_age; + }; + + /// A pluggable transport that turns a URL into raw key set bytes plus an + /// optional freshness hint. Returns no value on a failed retrieval, such as a + /// transport error, an unsuccessful response, or an oversized body. Injecting + /// the transport keeps this module free of any networking dependency and + /// makes the provider substitutable for testing. + using Fetcher = + std::function(std::string_view url)>; + + /// A source of the current time. It defaults to the system clock and exists + /// as an injection point so that expiry, rotation, and the refetch cooldown + /// can be driven deterministically under test. + using Clock = std::function; + + /// Tunables for the caching and verification policy. + struct Options { + std::chrono::seconds fallback_ttl{std::chrono::hours{1}}; + std::chrono::seconds minimum_ttl{std::chrono::minutes{5}}; + std::chrono::seconds maximum_ttl{std::chrono::hours{24}}; + std::chrono::seconds unknown_kid_cooldown{std::chrono::minutes{5}}; + std::chrono::seconds clock_skew{0}; + }; + + /// Construct a provider for a concrete key set URL with an injected + /// transport, optionally overriding the policy and the clock. + JWKSProvider(std::string jwks_uri, Fetcher fetcher); + JWKSProvider(std::string jwks_uri, Fetcher fetcher, Options options); + JWKSProvider(std::string jwks_uri, Fetcher fetcher, Options options, + Clock clock); + + /// The provider owns a lock guarding its cache, so it is neither copyable + /// nor movable. Store it behind a pointer or construct it in place. + JWKSProvider(const JWKSProvider &) = delete; + auto operator=(const JWKSProvider &) -> JWKSProvider & = delete; + JWKSProvider(JWKSProvider &&) = delete; + auto operator=(JWKSProvider &&) -> JWKSProvider & = delete; + + /// Verify a token against the provider's key set, fetching or refreshing the + /// keys as needed. Returns no value when the token is fully valid, otherwise + /// the first failing step. The current time and the clock skew tolerance are + /// the provider's own concern, so a caller supplies only what identifies the + /// token: an optional expected subject and an optional expected type for the + /// access token profile. + [[nodiscard]] auto + verify(const JWT &token, + const std::span allowed_algorithms, + const std::string_view expected_issuer, + const std::string_view expected_audience, + const std::optional expected_subject = std::nullopt, + const std::optional expected_type = std::nullopt) + -> std::optional; + +private: + auto fetch_and_install_locked(std::chrono::system_clock::time_point now) + -> bool; + auto refresh_locked(std::chrono::system_clock::time_point now) + -> std::shared_ptr; + auto refetch_for_unknown_kid_locked(std::chrono::system_clock::time_point now) + -> std::shared_ptr; + +#if defined(_MSC_VER) +#pragma warning(disable : 4251) +#endif + const std::string jwks_uri_; + const Fetcher fetcher_; + const Options options_; + const Clock clock_; + std::mutex mutex_; + std::shared_ptr keys_; + std::chrono::system_clock::time_point next_refresh_{}; + std::chrono::system_clock::time_point unknown_kid_cooldown_until_{}; +#if defined(_MSC_VER) +#pragma warning(default : 4251) +#endif +}; + +} // namespace sourcemeta::core + +#endif diff --git a/vendor/core/src/core/jose/jose_jwks_provider.cc b/vendor/core/src/core/jose/jose_jwks_provider.cc new file mode 100644 index 00000000..d9c20ed8 --- /dev/null +++ b/vendor/core/src/core/jose/jose_jwks_provider.cc @@ -0,0 +1,173 @@ +#include + +#include // sourcemeta::core::try_parse_json + +#include // std::max, std::min +#include // std::chrono +#include // std::make_shared, std::shared_ptr +#include // std::scoped_lock +#include // std::optional, std::nullopt +#include // std::move + +namespace { + +auto system_now() -> std::chrono::system_clock::time_point { + return std::chrono::system_clock::now(); +} + +auto clamp_ttl(const std::optional max_age, + const sourcemeta::core::JWKSProvider::Options &options) + -> std::chrono::seconds { + // An advertised lifetime (including a real zero) is held within the + // configured band. The absence of one falls back to the default lifetime. The + // lower and upper limits are applied as separate comparisons so that a + // configuration whose band is inverted, with the minimum above the maximum, + // stays well defined rather than undefined, with the minimum taking + // precedence + if (max_age.has_value()) { + return std::max(options.minimum_ttl, + std::min(max_age.value(), options.maximum_ttl)); + } + + return options.fallback_ttl; +} + +} // namespace + +namespace sourcemeta::core { + +JWKSProvider::JWKSProvider(std::string jwks_uri, Fetcher fetcher) + : JWKSProvider{std::move(jwks_uri), std::move(fetcher), Options{}, + system_now} {} + +JWKSProvider::JWKSProvider(std::string jwks_uri, Fetcher fetcher, + Options options) + : JWKSProvider{std::move(jwks_uri), std::move(fetcher), options, + system_now} {} + +JWKSProvider::JWKSProvider(std::string jwks_uri, Fetcher fetcher, + Options options, Clock clock) + : jwks_uri_{std::move(jwks_uri)}, fetcher_{std::move(fetcher)}, + options_{options}, + // An empty clock falls back to the system clock so that a default + // constructed one cannot make verification throw + clock_{clock ? std::move(clock) : Clock{system_now}} {} + +auto JWKSProvider::fetch_and_install_locked( + const std::chrono::system_clock::time_point now) -> bool { + std::optional fetched; + try { + fetched = this->fetcher_(this->jwks_uri_); + } catch (...) { + // A fetcher signals failure by returning no value, but a throwing transport + // is treated as just another failed retrieval, so a misbehaving fetcher can + // never escape verification + return false; + } + + if (!fetched.has_value()) { + return false; + } + + auto document{try_parse_json(fetched.value().body)}; + if (!document.has_value()) { + return false; + } + + auto parsed{JWKS::from(std::move(document).value())}; + if (!parsed.has_value()) { + return false; + } + + this->keys_ = std::make_shared(std::move(parsed).value()); + this->next_refresh_ = + now + clamp_ttl(fetched.value().max_age, this->options_); + return true; +} + +auto JWKSProvider::refresh_locked( + const std::chrono::system_clock::time_point now) + -> std::shared_ptr { + if (this->keys_ == nullptr) { + // Cold start: keep attempting on every call until a set is obtained, so the + // provider recovers the instant the issuer becomes reachable + this->fetch_and_install_locked(now); + } else if (now >= this->next_refresh_) { + if (!this->fetch_and_install_locked(now)) { + // A failed refresh of a set we already hold serves the stale keys and + // backs off, so a persistently unreachable issuer is not refetched on + // every request. Only the ability to validate a freshly rotated key is + // delayed, and that is separately bounded by the cooldown + this->next_refresh_ = now + this->options_.minimum_ttl; + } + } + + return this->keys_; +} + +auto JWKSProvider::refetch_for_unknown_kid_locked( + const std::chrono::system_clock::time_point now) + -> std::shared_ptr { + if (now < this->unknown_kid_cooldown_until_) { + return nullptr; + } + + // The cooldown is armed before the attempt so that tokens carrying random key + // identifiers cannot drive a fetch on every request + this->unknown_kid_cooldown_until_ = now + this->options_.unknown_kid_cooldown; + if (this->fetch_and_install_locked(now)) { + return this->keys_; + } + + return nullptr; +} + +auto JWKSProvider::verify( + const JWT &token, const std::span allowed_algorithms, + const std::string_view expected_issuer, + const std::string_view expected_audience, + const std::optional expected_subject, + const std::optional expected_type) + -> std::optional { + const auto now{this->clock_()}; + + std::shared_ptr snapshot; + { + const std::scoped_lock lock{this->mutex_}; + snapshot = this->refresh_locked(now); + } + + if (snapshot == nullptr) { + return JWTVerificationError::UnknownKey; + } + + const auto error{jwt_verify( + token, *snapshot, allowed_algorithms, expected_issuer, expected_audience, + now, this->options_.clock_skew, expected_subject, expected_type)}; + if (!error.has_value() || error.value() != JWTVerificationError::UnknownKey) { + return error; + } + + // No key in the current set could verify the token, whether because the key + // it names is absent, because it names none and nothing matched, or because + // only incompatible keys are present. That is the signal the set may be stale + // after a rotation, so the provider performs one guarded, cooldown-bounded + // refetch and retries once. A failure that is instead a present but + // non-verifying key is never retried, since that is an attack or a corrupt + // token, not a rotation + std::shared_ptr refreshed; + { + const std::scoped_lock lock{this->mutex_}; + refreshed = this->refetch_for_unknown_kid_locked(now); + } + + if (refreshed == nullptr || refreshed == snapshot) { + return error; + } + + return jwt_verify(token, *refreshed, allowed_algorithms, expected_issuer, + expected_audience, now, this->options_.clock_skew, + expected_subject, expected_type); +} + +} // namespace sourcemeta::core diff --git a/vendor/core/src/core/json/include/sourcemeta/core/json_object.h b/vendor/core/src/core/json/include/sourcemeta/core/json_object.h index 48dbdda5..b680d8bb 100644 --- a/vendor/core/src/core/json/include/sourcemeta/core/json_object.h +++ b/vendor/core/src/core/json/include/sourcemeta/core/json_object.h @@ -37,6 +37,28 @@ template class JSONObject { key_type first; mapped_type second; hash_type hash; + + /// Check whether this entry's key equals the given key, comparing the + /// precomputed hashes first and only falling back to a string comparison + /// when the hash is not perfect. For example: + /// + /// ```cpp + /// #include + /// #include + /// + /// const sourcemeta::core::JSON document = + /// sourcemeta::core::parse_json("{ \"foo\": 1 }"); + /// const auto &entry{*document.as_object().cbegin()}; + /// assert(entry.key_equals( + /// "foo", sourcemeta::core::JSON::Object::hash("foo"))); + /// ``` + [[nodiscard]] inline auto key_equals(const KeyView key, + const hash_type key_hash) const + -> bool { + assert(JSONObject::hash(key) == key_hash); + return this->hash == key_hash && + (hasher.is_perfect(key_hash) || this->first == key); + } }; using underlying_type = std::vector; diff --git a/vendor/core/src/core/jsonld/CMakeLists.txt b/vendor/core/src/core/jsonld/CMakeLists.txt new file mode 100644 index 00000000..65cdef86 --- /dev/null +++ b/vendor/core/src/core/jsonld/CMakeLists.txt @@ -0,0 +1,25 @@ +sourcemeta_library(NAMESPACE sourcemeta PROJECT core NAME jsonld + PRIVATE_HEADERS error.h + SOURCES jsonld.cc + jsonld_iri_expansion.cc jsonld_create_term_definition.cc + jsonld_context_processing.cc jsonld_value_expansion.cc jsonld_expansion.cc + jsonld_is_expanded.cc + jsonld_inverse_context.cc jsonld_iri_compaction.cc + jsonld_value_compaction.cc jsonld_compaction.cc + jsonld_node_map.cc jsonld_flatten.cc + jsonld_algorithms.h jsonld_keywords.h) + +if(SOURCEMETA_CORE_INSTALL) + sourcemeta_library_install(NAMESPACE sourcemeta PROJECT core NAME jsonld) +endif() + +target_link_libraries(sourcemeta_core_jsonld + PUBLIC sourcemeta::core::json) +target_link_libraries(sourcemeta_core_jsonld + PUBLIC sourcemeta::core::jsonpointer) +target_link_libraries(sourcemeta_core_jsonld + PRIVATE sourcemeta::core::uri) +target_link_libraries(sourcemeta_core_jsonld + PRIVATE sourcemeta::core::langtag) +target_link_libraries(sourcemeta_core_jsonld + PRIVATE sourcemeta::core::text) diff --git a/vendor/core/src/core/jsonld/include/sourcemeta/core/jsonld.h b/vendor/core/src/core/jsonld/include/sourcemeta/core/jsonld.h new file mode 100644 index 00000000..c799dbad --- /dev/null +++ b/vendor/core/src/core/jsonld/include/sourcemeta/core/jsonld.h @@ -0,0 +1,196 @@ +#ifndef SOURCEMETA_CORE_JSONLD_H_ +#define SOURCEMETA_CORE_JSONLD_H_ + +#ifndef SOURCEMETA_CORE_JSONLD_EXPORT +#include +#endif + +#include +#include + +#include // std::uint8_t +#include // std::function +#include // std::optional + +/// @defgroup jsonld JSON-LD +/// @brief A JSON-LD 1.1 processor, with support for the JSON-LD 1.0 processing +/// mode. +/// +/// This functionality is included as follows: +/// +/// ```cpp +/// #include +/// ``` + +namespace sourcemeta::core { + +/// @ingroup jsonld +/// A resolver callback for loading remote JSON-LD contexts referenced during +/// expansion. Given an absolute IRI, it returns the referenced document, or no +/// value if it cannot be resolved. +using JSONLDResolver = std::function(JSON::StringView)>; + +/// @ingroup jsonld +/// The JSON-LD processing mode +enum class JSONLDVersion : std::uint8_t { V1_0, V1_1 }; + +/// @ingroup jsonld +/// +/// Expand a JSON-LD document into its expanded form, resolving relative +/// references against the given base IRI and loading any remote context through +/// the given resolver. The result is always a JSON array. For example: +/// +/// ```cpp +/// #include +/// #include +/// #include +/// +/// const auto document{sourcemeta::core::parse_json(R"({ +/// "@context": { "name": "https://schema.org/name" }, +/// "name": "Sourcemeta" +/// })")}; +/// +/// const auto expanded{sourcemeta::core::jsonld_expand(document)}; +/// sourcemeta::core::prettify(expanded, std::cout); +/// std::cout << std::endl; +/// ``` +SOURCEMETA_CORE_JSONLD_EXPORT +auto jsonld_expand(const JSON &input, const JSON::StringView base_iri = "", + const JSONLDResolver &resolver = {}, + const JSONLDVersion version = JSONLDVersion::V1_1) -> JSON; + +/// @ingroup jsonld +/// +/// Expand a JSON-LD document, applying the given expansion context before the +/// document's own context, as if it had been prepended to the input. For +/// example: +/// +/// ```cpp +/// #include +/// #include +/// #include +/// +/// const auto document{sourcemeta::core::parse_json( +/// R"({ "name": "Sourcemeta" })")}; +/// const auto context{sourcemeta::core::parse_json( +/// R"({ "name": "https://schema.org/name" })")}; +/// +/// const auto expanded{sourcemeta::core::jsonld_expand(document, context)}; +/// sourcemeta::core::prettify(expanded, std::cout); +/// std::cout << std::endl; +/// ``` +SOURCEMETA_CORE_JSONLD_EXPORT +auto jsonld_expand(const JSON &input, const JSON &expand_context, + const JSON::StringView base_iri = "", + const JSONLDResolver &resolver = {}, + const JSONLDVersion version = JSONLDVersion::V1_1) -> JSON; + +/// @ingroup jsonld +/// +/// Determine whether a document is in the JSON-LD 1.1 expanded document form: +/// an array of node objects that carries no context and whose keys are all +/// absolute IRIs, blank node identifiers, or keywords. For example: +/// +/// ```cpp +/// #include +/// #include +/// #include +/// +/// const auto document{sourcemeta::core::parse_json(R"([ +/// { "http://schema.org/name": [ { "@value": "Sourcemeta" } ] } +/// ])")}; +/// +/// assert(sourcemeta::core::jsonld_is_expanded(document)); +/// ``` +SOURCEMETA_CORE_JSONLD_EXPORT +auto jsonld_is_expanded(const JSON &document) -> bool; + +/// @ingroup jsonld +/// +/// Compact a JSON-LD document in expanded form against the given context to +/// produce a friendlier shape. The input must already be expanded, and the +/// result carries the given context. For example: +/// +/// ```cpp +/// #include +/// #include +/// #include +/// +/// const auto document{sourcemeta::core::parse_json(R"([ +/// { "http://schema.org/name": [ { "@value": "Sourcemeta" } ] } +/// ])")}; +/// +/// const auto context{sourcemeta::core::parse_json(R"({ +/// "name": "http://schema.org/name" +/// })")}; +/// +/// const auto compacted{sourcemeta::core::jsonld_compact(document, context)}; +/// sourcemeta::core::prettify(compacted, std::cout); +/// std::cout << std::endl; +/// ``` +SOURCEMETA_CORE_JSONLD_EXPORT +auto jsonld_compact(const JSON &input, const JSON &context, + const JSON::StringView base_iri = "", + const JSONLDResolver &resolver = {}, + const JSONLDVersion version = JSONLDVersion::V1_1, + const bool compact_arrays = true, + const bool compact_to_relative = true) -> JSON; + +/// @ingroup jsonld +/// +/// Flatten a JSON-LD document in expanded form, collecting the properties of +/// each node into a single node object and labelling blank nodes. The input +/// must already be expanded. For example: +/// +/// ```cpp +/// #include +/// #include +/// #include +/// +/// const auto document{sourcemeta::core::parse_json(R"([ +/// { +/// "@id": "http://example.com/a", +/// "http://example.com/b": [ { "@id": "http://example.com/c" } ] +/// } +/// ])")}; +/// +/// const auto flattened{sourcemeta::core::jsonld_flatten(document)}; +/// sourcemeta::core::prettify(flattened, std::cout); +/// std::cout << std::endl; +/// ``` +SOURCEMETA_CORE_JSONLD_EXPORT +auto jsonld_flatten(const JSON &input) -> JSON; + +/// @ingroup jsonld +/// +/// Flatten a JSON-LD document in expanded form and compact the result against +/// the given context. The input must already be expanded, and the result +/// carries the given context. For example: +/// +/// ```cpp +/// #include +/// #include +/// #include +/// +/// const auto document{sourcemeta::core::parse_json(R"([ +/// { "http://schema.org/name": [ { "@value": "Sourcemeta" } ] } +/// ])")}; +/// +/// const auto context{sourcemeta::core::parse_json(R"({ +/// "name": "http://schema.org/name" +/// })")}; +/// +/// const auto flattened{sourcemeta::core::jsonld_flatten(document, context)}; +/// sourcemeta::core::prettify(flattened, std::cout); +/// std::cout << std::endl; +/// ``` +SOURCEMETA_CORE_JSONLD_EXPORT +auto jsonld_flatten(const JSON &input, const JSON &context, + const JSON::StringView base_iri = "", + const JSONLDResolver &resolver = {}, + const JSONLDVersion version = JSONLDVersion::V1_1, + const bool compact_arrays = true) -> JSON; + +} // namespace sourcemeta::core + +#endif diff --git a/vendor/core/src/core/jsonld/include/sourcemeta/core/jsonld_error.h b/vendor/core/src/core/jsonld/include/sourcemeta/core/jsonld_error.h new file mode 100644 index 00000000..4ba41242 --- /dev/null +++ b/vendor/core/src/core/jsonld/include/sourcemeta/core/jsonld_error.h @@ -0,0 +1,68 @@ +#ifndef SOURCEMETA_CORE_JSONLD_ERROR_H_ +#define SOURCEMETA_CORE_JSONLD_ERROR_H_ + +#ifndef SOURCEMETA_CORE_JSONLD_EXPORT +#include +#endif + +#include +#include + +#include // std::exception +#include // std::initializer_list +#include // std::move + +namespace sourcemeta::core { + +// Exporting symbols that depends on the standard C++ library is considered +// safe. +// https://learn.microsoft.com/en-us/cpp/error-messages/compiler-warnings/compiler-warning-level-2-c4275?view=msvc-170&redirectedfrom=MSDN +#if defined(_MSC_VER) +#pragma warning(disable : 4251 4275) +#endif + +/// @ingroup jsonld +/// An error that represents a JSON-LD processing failure. The message is one of +/// the error codes defined by the JSON-LD 1.1 API specification, and the +/// pointer locates the offending position in the input document +class SOURCEMETA_CORE_JSONLD_EXPORT JSONLDError : public std::exception { +public: + JSONLDError(const char *code, Pointer pointer) + : code_{code}, pointer_{std::move(pointer)} {} + + // Locate the error at a weak pointer, materialising an owned pointer. + JSONLDError(const char *code, const WeakPointer &pointer) + : code_{code}, pointer_{to_pointer(pointer)} {} + + // Locate the error at a weak pointer extended with the given trailing + // property tokens. + JSONLDError(const char *code, const WeakPointer &pointer, + const std::initializer_list children) + : code_{code}, pointer_{to_pointer(pointer)} { + for (const auto child : children) { + this->pointer_.push_back(JSON::String{child}); + } + } + + [[nodiscard]] auto what() const noexcept -> const char * override { + return this->code_.c_str(); + } + + /// Get the JSON Pointer to the position in the input document that caused the + /// error + [[nodiscard]] auto pointer() const noexcept -> const Pointer & { + return this->pointer_; + } + +private: + JSON::String code_; + Pointer pointer_; +}; + +#if defined(_MSC_VER) +#pragma warning(default : 4251 4275) +#endif + +} // namespace sourcemeta::core + +#endif diff --git a/vendor/core/src/core/jsonld/jsonld.cc b/vendor/core/src/core/jsonld/jsonld.cc new file mode 100644 index 00000000..f8687748 --- /dev/null +++ b/vendor/core/src/core/jsonld/jsonld.cc @@ -0,0 +1,156 @@ +#include +#include +#include + +#include "jsonld_algorithms.h" +#include "jsonld_keywords.h" + +#include // assert +#include // std::nullopt +#include // std::move + +namespace sourcemeta::core { + +// Run the expansion algorithm on a top-level input document and normalise the +// result into the expanded document form (JSON-LD 1.1 API Section 5.1). +auto run_expansion(ExpansionState &state, ActiveContext &active_context, + const JSON &input) -> JSON { + auto expanded{ + expand(state, active_context, std::nullopt, input, empty_weak_pointer)}; + + // A top-level map containing only @graph is replaced by its value. + if (expanded.is_object() && expanded.object_size() == 1 && + expanded.defines(KEYWORD_GRAPH, KEYWORD_GRAPH_HASH)) { + expanded = expanded.at(KEYWORD_GRAPH, KEYWORD_GRAPH_HASH); + } + + if (expanded.is_object()) { + if (expanded.empty() || (expanded.object_size() == 1 && + expanded.defines(KEYWORD_ID, KEYWORD_ID_HASH))) { + return JSON::make_array(); + } + auto result{JSON::make_array()}; + result.push_back(std::move(expanded)); + return result; + } + + if (!expanded.is_array()) { + return JSON::make_array(); + } + + return expanded; +} + +// Set up the shared expansion state and initial active context from the public +// entry-point arguments. +auto initialise_expansion(const JSONLDResolver &resolver, + const JSON::StringView base_iri, + const JSONLDVersion version, ExpansionState &state, + ActiveContext &active_context) -> void { + state.resolver = &resolver; + state.processing_1_0 = version == JSONLDVersion::V1_0; + if (!base_iri.empty()) { + active_context.base = JSON::String{base_iri}; + state.document_base = JSON::String{base_iri}; + } +} + +} // namespace sourcemeta::core + +namespace sourcemeta::core { + +auto jsonld_expand(const JSON &input, const JSON::StringView base_iri, + const JSONLDResolver &resolver, const JSONLDVersion version) + -> JSON { + ExpansionState state; + ActiveContext active_context; + initialise_expansion(resolver, base_iri, version, state, active_context); + return run_expansion(state, active_context, input); +} + +auto jsonld_expand(const JSON &input, const JSON &expand_context, + const JSON::StringView base_iri, + const JSONLDResolver &resolver, const JSONLDVersion version) + -> JSON { + ExpansionState state; + ActiveContext active_context; + initialise_expansion(resolver, base_iri, version, state, active_context); + const auto &context{ + expand_context.is_object() && + expand_context.defines(KEYWORD_CONTEXT, KEYWORD_CONTEXT_HASH) + ? expand_context.at(KEYWORD_CONTEXT, KEYWORD_CONTEXT_HASH) + : expand_context}; + // The external expansion context is not part of the input document, so its + // errors carry an empty pointer. + process_context(state, active_context, context, empty_weak_pointer); + return run_expansion(state, active_context, input); +} + +auto jsonld_compact(const JSON &input, const JSON &context, + const JSON::StringView base_iri, + const JSONLDResolver &resolver, const JSONLDVersion version, + const bool compact_arrays, const bool compact_to_relative) + -> JSON { + // Compaction operates on input already in expanded document form. Callers + // that hold a non-expanded document expand it first. + assert(jsonld_is_expanded(input)); + + ExpansionState state; + ActiveContext active_context; + initialise_expansion(resolver, base_iri, version, state, active_context); + active_context.compact_to_relative = compact_to_relative; + const auto &local_context{ + context.is_object() && + context.defines(KEYWORD_CONTEXT, KEYWORD_CONTEXT_HASH) + ? context.at(KEYWORD_CONTEXT, KEYWORD_CONTEXT_HASH) + : context}; + process_context(state, active_context, local_context, empty_weak_pointer); + + const auto inverse_context{create_inverse_context(active_context)}; + auto result{compact(state, active_context, inverse_context, std::nullopt, + input, compact_arrays)}; + + // A top-level array becomes a node with a single @graph entry. + if (result.is_array()) { + if (result.empty()) { + result = JSON::make_object(); + } else { + const auto graph_alias{compact_iri(active_context, inverse_context, + JSON::String{KEYWORD_GRAPH}, nullptr, + true, false)}; + auto wrapper{JSON::make_object()}; + wrapper.assign_assume_new(graph_alias, std::move(result)); + result = std::move(wrapper); + } + } + + // The compacted document carries the context it was compacted against, unless + // that context is empty. + if ((local_context.is_object() && !local_context.empty()) || + (local_context.is_array() && !local_context.empty()) || + local_context.is_string()) { + result.assign_assume_new(keyword_context(), JSON{local_context}); + } + + return result; +} + +auto jsonld_flatten(const JSON &input) -> JSON { + // Flattening operates on input already in expanded document form. Callers + // that hold a non-expanded document expand it first. + assert(jsonld_is_expanded(input)); + + BlankNodeState state; + return flatten(state, input); +} + +auto jsonld_flatten(const JSON &input, const JSON &context, + const JSON::StringView base_iri, + const JSONLDResolver &resolver, const JSONLDVersion version, + const bool compact_arrays) -> JSON { + // The flattened expanded output is compacted against the given context. + return jsonld_compact(jsonld_flatten(input), context, base_iri, resolver, + version, compact_arrays); +} + +} // namespace sourcemeta::core diff --git a/vendor/core/src/core/jsonld/jsonld_algorithms.h b/vendor/core/src/core/jsonld/jsonld_algorithms.h new file mode 100644 index 00000000..7e6362d1 --- /dev/null +++ b/vendor/core/src/core/jsonld/jsonld_algorithms.h @@ -0,0 +1,165 @@ +#ifndef SOURCEMETA_CORE_JSONLD_ALGORITHMS_H_ +#define SOURCEMETA_CORE_JSONLD_ALGORITHMS_H_ + +#include +#include +#include + +#include // std::size_t +#include // std::less +#include // std::map +#include // std::shared_ptr +#include // std::optional +#include // std::vector + +namespace sourcemeta::core { + +struct TermDefinition { + std::optional iri; + std::optional type_mapping; + std::vector container; + std::optional language; + bool has_language{false}; + std::optional direction; + bool has_direction{false}; + std::optional context; + std::optional context_base; + std::optional index; + std::optional index_iri; + std::optional nest; + bool reverse{false}; + bool prefix{false}; + bool is_protected{false}; +}; + +struct ActiveContext { + std::map> terms; + std::optional base; + std::optional vocabulary; + std::optional default_language; + std::optional default_direction; + std::shared_ptr previous; + // Whether compaction may relativise absolute IRIs against the base IRI. + bool compact_to_relative{true}; +}; + +// The mutable state shared by the expansion algorithms for the duration of a +// single top-level expansion. +struct ExpansionState { + // Used to load remote contexts. The chain detects recursive inclusion. + const JSONLDResolver *resolver{nullptr}; + std::vector remote_context_chain; + std::optional document_base; + // When a scoped context is processed after the fact, remote references in it + // resolve against the URL of the document that defined the term. + std::optional context_base_override; + // Protected-term state for the context currently being processed. + bool context_protected{false}; + bool protected_override{false}; + bool processing_1_0{false}; + + // Remote context references resolve against the URL of the document that + // contains them: the current remote context if any, otherwise the input + // document. This is distinct from the active context base, which @base + // mutates. + [[nodiscard]] auto context_resolution_base() const + -> std::optional { + if (!this->remote_context_chain.empty()) { + return this->remote_context_chain.back(); + } + if (this->context_base_override.has_value()) { + return this->context_base_override; + } + return this->document_base; + } +}; + +// Tracks, while a context is being processed, which terms have been fully +// defined (true) versus are still being defined (false, used to detect cycles). +using DefinedTerms = std::map; + +// IRI Expansion (JSON-LD 1.1 API Section 5.2) +auto expand_iri(ExpansionState &state, ActiveContext &active_context, + const JSON::String &value, const bool document_relative, + const bool vocabulary, const JSON *const local_context, + DefinedTerms *const defined, const WeakPointer &context_pointer) + -> std::optional; + +// Create Term Definition (JSON-LD 1.1 API Section 5.1.1) +auto create_term_definition(ExpansionState &state, + ActiveContext &active_context, + const JSON &local_context, const JSON::String &term, + DefinedTerms &defined, + const WeakPointer &context_pointer) -> void; + +// Context Processing (JSON-LD 1.1 API Section 5.1) +auto process_context(ExpansionState &state, ActiveContext &active_context, + const JSON &local_context, const WeakPointer &pointer, + const bool propagate = true) -> void; + +// Value Expansion (JSON-LD 1.1 API Section 5.3.3) +auto expand_value(ExpansionState &state, ActiveContext &active_context, + const std::optional &active_property, + const JSON &value) -> JSON; + +// Expansion (JSON-LD 1.1 API Section 5.1.2) +auto expand(ExpansionState &state, ActiveContext &active_context, + const std::optional &active_property, + const JSON &element, const WeakPointer &pointer) -> JSON; + +// Inverse Context Creation (JSON-LD 1.1 API Section 4.3.1). The inverse context +// is represented as a JSON map of IRI to container to type/language to term. +auto create_inverse_context(const ActiveContext &active_context) -> JSON; + +// IRI Compaction (JSON-LD 1.1 API Section 6.2). A null value is signalled by +// a null pointer. +auto compact_iri(const ActiveContext &active_context, + const JSON &inverse_context, const JSON::String &variable, + const JSON *const value, const bool vocabulary, + const bool reverse) -> JSON::String; + +// Value Compaction (JSON-LD 1.1 API Section 6.3) +auto compact_value(const ActiveContext &active_context, + const JSON &inverse_context, + const std::optional &active_property, + const JSON &value) -> JSON; + +// Compaction (JSON-LD 1.1 API Section 6.1) +auto compact(ExpansionState &state, const ActiveContext &active_context, + const JSON &inverse_context, + const std::optional &active_property, + const JSON &element, const bool compact_arrays) -> JSON; + +// Holds the running state for blank node identifier generation, so that +// identifiers in the source are consistently relabelled without collisions. +struct BlankNodeState { + std::size_t counter{0}; + std::map> identifiers; +}; + +// Generate Blank Node Identifier (JSON-LD 1.1 API Section 7.4). A null value is +// signalled by an empty optional. The view, when present, only needs to outlive +// the call, as the running state stores an owned copy. +auto generate_blank_node_identifier( + BlankNodeState &state, const std::optional &identifier) + -> JSON::String; + +// Node Map Generation (JSON-LD 1.1 API Section 7.2). The node map is a JSON map +// of graph name to a map of subject identifier to node object. The active +// subject is a subject identifier string, or a node object when a reverse +// property relationship is being processed, or a null pointer. A null active +// property is signalled by an empty optional, and the list accumulator by a +// null pointer. The active property view only needs to outlive the call. +auto generate_node_map(BlankNodeState &state, JSON &node_map, + const JSON &element, const JSON::StringView active_graph, + const JSON *active_subject, + const std::optional &active_property, + JSON *list) -> void; + +// Flattening (JSON-LD 1.1 API Section 7.1). Returns the flattened document in +// expanded form. +auto flatten(BlankNodeState &state, const JSON &element) -> JSON; + +} // namespace sourcemeta::core + +#endif diff --git a/vendor/core/src/core/jsonld/jsonld_compaction.cc b/vendor/core/src/core/jsonld/jsonld_compaction.cc new file mode 100644 index 00000000..886840fc --- /dev/null +++ b/vendor/core/src/core/jsonld/jsonld_compaction.cc @@ -0,0 +1,630 @@ +#include +#include + +#include "jsonld_algorithms.h" +#include "jsonld_keywords.h" + +#include // std::find, std::sort +#include // assert +#include // std::optional, std::nullopt +#include // std::move +#include // std::vector + +namespace sourcemeta::core { + +namespace { + +auto definition_for(const ActiveContext &active_context, + const std::optional &active_property) + -> const TermDefinition * { + if (!active_property.has_value()) { + return nullptr; + } + const auto iterator{active_context.terms.find(active_property.value())}; + return iterator == active_context.terms.cend() ? nullptr : &iterator->second; +} + +auto container_includes(const TermDefinition *const definition, + const JSON::StringView keyword) -> bool { + return definition != nullptr && + std::ranges::find(definition->container, keyword) != + definition->container.cend(); +} + +// Append a compacted value under a key, growing it into an array as needed and +// forcing an array when the term requires one. +auto add_value(JSON &result, const JSON::String &key, JSON &&value, + const bool as_array) -> void { + if (!result.defines(key)) { + if (as_array) { + auto array{JSON::make_array()}; + array.push_back(std::move(value)); + result.assign_assume_new(key, std::move(array)); + } else { + result.assign_assume_new(key, std::move(value)); + } + return; + } + auto &existing{result.at(key)}; + if (!existing.is_array()) { + auto array{JSON::make_array()}; + array.push_back(existing); + existing = std::move(array); + } + existing.push_back(std::move(value)); +} + +// The object a property is written into: the result itself, or a nesting +// container when the term carries an @nest mapping. +auto nest_target(JSON &result, const TermDefinition *const definition, + const ActiveContext &active_context, + const JSON &inverse_context) -> JSON & { + if (definition == nullptr || !definition->nest.has_value()) { + return result; + } + JSON::String key{definition->nest.value()}; + if (key == KEYWORD_NEST) { + key = compact_iri(active_context, inverse_context, + JSON::String{KEYWORD_NEST}, nullptr, true, false); + } else { + // A non-@nest nest value must reference a term that expands to @nest. + const auto iterator{active_context.terms.find(key)}; + if (iterator == active_context.terms.cend() || + !iterator->second.iri.has_value() || + iterator->second.iri.value() != KEYWORD_NEST) { + throw JSONLDError("Invalid @nest value", empty_weak_pointer); + } + } + if (!result.defines(key)) { + result.assign_assume_new(key, JSON::make_object()); + } + // The nest target is a context-defined term that expands to @nest, so it can + // never collide with an ordinary compacted property and is always an object. + assert(result.at(key).is_object()); + return result.at(key); +} + +} // namespace + +auto compact(ExpansionState &state, const ActiveContext &active_context, + const JSON &inverse_context, + const std::optional &active_property, + const JSON &element, const bool compact_arrays) -> JSON { + if (!element.is_object() && !element.is_array()) { + return element; + } + + const auto *definition{definition_for(active_context, active_property)}; + + if (element.is_array()) { + auto result{JSON::make_array()}; + for (const auto &item : element.as_array()) { + auto compacted{compact(state, active_context, inverse_context, + active_property, item, compact_arrays)}; + if (!compacted.is_null()) { + result.push_back(std::move(compacted)); + } + } + const bool force_array{!compact_arrays || + (active_property.has_value() && + active_property.value() == KEYWORD_SET) || + container_includes(definition, KEYWORD_SET) || + container_includes(definition, KEYWORD_LIST)}; + if (result.size() == 1 && !force_array) { + return result.front(); + } + return result; + } + + // A non-propagating (type-scoped) context does not apply when processing a + // new node object (JSON-LD 1.1 API Section 6.1.2 step 3.1): revert it unless + // the element is a value object or a sole @id reference. + ActiveContext reverted; + JSON reverted_inverse{JSON::make_object()}; + const bool did_revert{static_cast(active_context.previous) && + !element.defines(KEYWORD_VALUE, KEYWORD_VALUE_HASH) && + !(element.object_size() == 1 && + element.defines(KEYWORD_ID, KEYWORD_ID_HASH))}; + if (did_revert) { + reverted = *active_context.previous; + reverted_inverse = create_inverse_context(reverted); + } + const ActiveContext &base{did_revert ? reverted : active_context}; + const JSON &base_inverse{did_revert ? reverted_inverse : inverse_context}; + definition = definition_for(base, active_property); + + // A property-scoped context applies while compacting this position. Its term + // definition is taken from the active context as it was on entry so that + // reverting a non-propagating type-scoped context does not discard the local + // context of the term being compacted (Section 6.1.2 step 3.2). + const auto *scoped_definition{ + definition_for(active_context, active_property)}; + ActiveContext scoped{base}; + JSON scoped_inverse{base_inverse}; + bool scoped_changed{false}; + if (scoped_definition != nullptr && scoped_definition->context.has_value()) { + // A property-scoped context may legally override protected terms. + const bool saved_override{state.protected_override}; + const bool saved_context_protected{state.context_protected}; + state.protected_override = true; + process_context(state, scoped, scoped_definition->context.value(), + empty_weak_pointer); + state.protected_override = saved_override; + state.context_protected = saved_context_protected; + scoped_inverse = create_inverse_context(scoped); + scoped_changed = true; + definition = definition_for(scoped, active_property); + } + const ActiveContext &context{scoped_changed ? scoped : base}; + const JSON &inverse{scoped_changed ? scoped_inverse : base_inverse}; + + // A value object is fully handled by value compaction. + if (element.defines(KEYWORD_VALUE, KEYWORD_VALUE_HASH)) { + return compact_value(context, inverse, active_property, element); + } + // A node reference compacts to a scalar when the term so dictates, otherwise + // it is treated as a node below. + if (element.defines(KEYWORD_ID, KEYWORD_ID_HASH)) { + auto compacted{compact_value(context, inverse, active_property, element)}; + if (!compacted.is_object() && !compacted.is_array()) { + return compacted; + } + } + + // A list object whose property coerces @list compacts to a bare array, which + // also yields nested arrays for a list of lists (Section 6.1.2 step 3.3). + if (element.defines(KEYWORD_LIST, KEYWORD_LIST_HASH) && + container_includes(definition_for(context, active_property), + KEYWORD_LIST)) { + return compact(state, context, inverse, active_property, + element.at(KEYWORD_LIST, KEYWORD_LIST_HASH), compact_arrays); + } + + const bool inside_reverse{active_property.has_value() && + active_property.value() == KEYWORD_REVERSE}; + auto result{JSON::make_object()}; + + // A type-scoped context from the node's types applies to the remaining + // properties. + ActiveContext typed{context}; + JSON typed_inverse{inverse}; + bool typed_changed{false}; + if (element.defines(KEYWORD_TYPE, KEYWORD_TYPE_HASH)) { + // The type-scoped context is keyed by the compacted type term, applied in + // lexicographical order of those terms. + std::vector types; + for (const auto &item : + element.at(KEYWORD_TYPE, KEYWORD_TYPE_HASH).as_array()) { + types.push_back(compact_iri(context, inverse, item.to_string(), nullptr, + true, false)); + } + std::ranges::sort(types); + for (const auto &type : types) { + const auto iterator{context.terms.find(type)}; + if (iterator != context.terms.cend() && + iterator->second.context.has_value()) { + process_context(state, typed, iterator->second.context.value(), + empty_weak_pointer, false); + typed_changed = true; + } + } + if (typed_changed) { + typed_inverse = create_inverse_context(typed); + } + } + const ActiveContext &node_context{typed_changed ? typed : context}; + const JSON &node_inverse{typed_changed ? typed_inverse : inverse}; + + for (const auto &entry : element.as_object()) { + const auto &expanded_property{entry.first}; + const auto &expanded_value{entry.second}; + + if (entry.key_equals(KEYWORD_ID, KEYWORD_ID_HASH)) { + const auto alias{compact_iri(node_context, node_inverse, + JSON::String{KEYWORD_ID}, nullptr, true, + false)}; + result.assign_assume_new( + alias, + JSON{compact_iri(node_context, node_inverse, + expanded_value.to_string(), nullptr, false, false)}); + continue; + } + + if (entry.key_equals(KEYWORD_TYPE, KEYWORD_TYPE_HASH)) { + const auto alias{compact_iri(node_context, node_inverse, + JSON::String{KEYWORD_TYPE}, nullptr, true, + false)}; + // Type values are compacted against the context before type-scoping. + auto compacted{JSON::make_array()}; + for (const auto &item : expanded_value.as_array()) { + compacted.push_back(JSON{compact_iri(context, inverse, item.to_string(), + nullptr, true, false)}); + } + const auto *type_definition{definition_for(node_context, alias)}; + // A @set container forces @type to an array only in JSON-LD 1.1. + const bool type_as_array{ + !state.processing_1_0 && + container_includes(type_definition, KEYWORD_SET)}; + if (compacted.size() == 1 && compact_arrays && !type_as_array) { + result.assign_assume_new(alias, JSON{compacted.front()}); + } else { + result.assign_assume_new(alias, std::move(compacted)); + } + continue; + } + + if (entry.key_equals(KEYWORD_REVERSE, KEYWORD_REVERSE_HASH)) { + auto compacted{compact(state, node_context, node_inverse, + JSON::String{KEYWORD_REVERSE}, expanded_value, + compact_arrays)}; + auto reverse_map{JSON::make_object()}; + for (const auto &reverse : compacted.as_object()) { + const auto reverse_definition{node_context.terms.find(reverse.first)}; + if (reverse_definition != node_context.terms.cend() && + reverse_definition->second.reverse) { + // The compacted value already reflects the term's @set container. + add_value(result, reverse.first, JSON{reverse.second}, false); + } else { + reverse_map.assign(reverse.first, reverse.second); + } + } + if (!reverse_map.empty()) { + const auto alias{compact_iri(node_context, node_inverse, + JSON::String{KEYWORD_REVERSE}, nullptr, + true, false)}; + result.assign_assume_new(alias, std::move(reverse_map)); + } + continue; + } + + if (entry.key_equals(KEYWORD_INDEX, KEYWORD_INDEX_HASH) && + container_includes(definition, KEYWORD_INDEX)) { + continue; + } + + if (entry.key_equals(KEYWORD_INDEX, KEYWORD_INDEX_HASH) || + entry.key_equals(KEYWORD_VALUE, KEYWORD_VALUE_HASH) || + entry.key_equals(KEYWORD_LANGUAGE, KEYWORD_LANGUAGE_HASH) || + entry.key_equals(KEYWORD_DIRECTION, KEYWORD_DIRECTION_HASH)) { + const auto alias{compact_iri(node_context, node_inverse, + expanded_property, nullptr, true, false)}; + result.assign_assume_new(alias, JSON{expanded_value}); + continue; + } + + // A node's named graph compacts to an array of compacted node objects. + if (entry.key_equals(KEYWORD_GRAPH, KEYWORD_GRAPH_HASH)) { + const auto alias{compact_iri(node_context, node_inverse, + JSON::String{KEYWORD_GRAPH}, nullptr, true, + false)}; + auto compacted{compact(state, node_context, node_inverse, + JSON::String{KEYWORD_GRAPH}, expanded_value, + compact_arrays)}; + // A named graph keeps its @graph as an array; a bare graph object may + // collapse to a single value. + if (element.defines(KEYWORD_ID, KEYWORD_ID_HASH) && + !compacted.is_array()) { + auto array{JSON::make_array()}; + array.push_back(std::move(compacted)); + compacted = std::move(array); + } + result.assign_assume_new(alias, std::move(compacted)); + continue; + } + + if (expanded_value.is_array() && expanded_value.empty()) { + const auto item_property{compact_iri(node_context, node_inverse, + expanded_property, &expanded_value, + true, inside_reverse)}; + // An empty array stays empty rather than being wrapped into one. + if (!result.defines(item_property)) { + result.assign_assume_new(item_property, JSON::make_array()); + } + continue; + } + + for (const auto &item : expanded_value.as_array()) { + const auto item_property{compact_iri(node_context, node_inverse, + expanded_property, &item, true, + inside_reverse)}; + const auto *item_definition{definition_for(node_context, item_property)}; + const bool is_list{item.is_object() && + item.defines(KEYWORD_LIST, KEYWORD_LIST_HASH)}; + const bool is_graph_object{ + item.is_object() && item.defines(KEYWORD_GRAPH, KEYWORD_GRAPH_HASH)}; + JSON &target{ + nest_target(result, item_definition, node_context, node_inverse)}; + + // @graph container: a simple graph object (without @id) is mapped by @id + // or @index or inlined; a graph object carrying an @id that is not keyed + // by @id is wrapped back into a graph object (Section 6.1.2). + if (container_includes(item_definition, KEYWORD_GRAPH) && + is_graph_object) { + const bool graph_as_array{ + container_includes(item_definition, KEYWORD_SET) || + !compact_arrays}; + const bool by_id{container_includes(item_definition, KEYWORD_ID)}; + const bool by_index{container_includes(item_definition, KEYWORD_INDEX)}; + const bool simple_graph{!item.defines(KEYWORD_ID, KEYWORD_ID_HASH)}; + auto compacted{compact(state, node_context, node_inverse, item_property, + item.at(KEYWORD_GRAPH, KEYWORD_GRAPH_HASH), + compact_arrays)}; + const auto none_key{compact_iri(node_context, node_inverse, + JSON::String{KEYWORD_NONE}, nullptr, + true, false)}; + if (by_id || (by_index && simple_graph)) { + if (!target.defines(item_property)) { + target.assign_assume_new(item_property, JSON::make_object()); + } + JSON::String key{none_key}; + if (by_id && item.defines(KEYWORD_ID, KEYWORD_ID_HASH)) { + key = compact_iri(node_context, node_inverse, + item.at(KEYWORD_ID, KEYWORD_ID_HASH).to_string(), + nullptr, false, false); + } else if (by_index && + item.defines(KEYWORD_INDEX, KEYWORD_INDEX_HASH)) { + key = item.at(KEYWORD_INDEX, KEYWORD_INDEX_HASH).to_string(); + } + // The graph value is already compacted to an array when the term has + // an @set container or compactArrays is false, so it is added as-is. + add_value(target.at(item_property), key, std::move(compacted), false); + continue; + } + if (simple_graph) { + // A single node is inlined, several are wrapped in an @included + // block. + if (compacted.is_array() && compacted.size() != 1) { + const auto included_alias{compact_iri( + node_context, node_inverse, JSON::String{KEYWORD_INCLUDED}, + nullptr, true, false)}; + auto wrapper{JSON::make_object()}; + wrapper.assign_assume_new(included_alias, std::move(compacted)); + compacted = std::move(wrapper); + } else if (compacted.is_array()) { + auto single{compacted.front()}; + compacted = std::move(single); + } + add_value(target, item_property, std::move(compacted), + graph_as_array); + continue; + } + // Wrap a graph object that retains its @id back into a graph object. + const auto graph_alias{compact_iri(node_context, node_inverse, + JSON::String{KEYWORD_GRAPH}, nullptr, + true, false)}; + auto wrapper{JSON::make_object()}; + const auto id_alias{compact_iri(node_context, node_inverse, + JSON::String{KEYWORD_ID}, nullptr, true, + false)}; + wrapper.assign_assume_new( + id_alias, + JSON{compact_iri(node_context, node_inverse, + item.at(KEYWORD_ID, KEYWORD_ID_HASH).to_string(), + nullptr, false, false)}); + if (item.defines(KEYWORD_INDEX, KEYWORD_INDEX_HASH)) { + const auto index_alias{compact_iri(node_context, node_inverse, + JSON::String{KEYWORD_INDEX}, + nullptr, true, false)}; + wrapper.assign_assume_new( + index_alias, JSON{item.at(KEYWORD_INDEX, KEYWORD_INDEX_HASH)}); + } + wrapper.assign_assume_new(graph_alias, std::move(compacted)); + add_value(target, item_property, std::move(wrapper), graph_as_array); + continue; + } + + if (container_includes(item_definition, KEYWORD_LIST) || is_list) { + const auto &list_value{ + is_list ? item.at(KEYWORD_LIST, KEYWORD_LIST_HASH) : item}; + // JSON-LD 1.0 forbids a list of lists. + if (state.processing_1_0 && list_value.is_array()) { + for (const auto &member : list_value.as_array()) { + if (member.is_object() && + member.defines(KEYWORD_LIST, KEYWORD_LIST_HASH)) { + throw JSONLDError("Compaction to list of lists", + empty_weak_pointer); + } + } + } + auto compacted{compact(state, node_context, node_inverse, item_property, + list_value, compact_arrays)}; + if (!compacted.is_array()) { + auto array{JSON::make_array()}; + array.push_back(std::move(compacted)); + compacted = std::move(array); + } + if (container_includes(item_definition, KEYWORD_LIST)) { + add_value(target, item_property, std::move(compacted), + container_includes(item_definition, KEYWORD_SET)); + } else { + const auto list_alias{compact_iri(node_context, node_inverse, + JSON::String{KEYWORD_LIST}, nullptr, + true, false)}; + auto list_object{JSON::make_object()}; + list_object.assign_assume_new(list_alias, std::move(compacted)); + if (item.is_object() && + item.defines(KEYWORD_INDEX, KEYWORD_INDEX_HASH)) { + const auto index_alias{compact_iri(node_context, node_inverse, + JSON::String{KEYWORD_INDEX}, + nullptr, true, false)}; + list_object.assign_assume_new( + index_alias, JSON{item.at(KEYWORD_INDEX, KEYWORD_INDEX_HASH)}); + } + add_value(target, item_property, std::move(list_object), + container_includes(item_definition, KEYWORD_SET) || + !compact_arrays); + } + continue; + } + + const bool as_array{container_includes(item_definition, KEYWORD_SET) || + !compact_arrays}; + + const auto none_key{compact_iri(node_context, node_inverse, + JSON::String{KEYWORD_NONE}, nullptr, true, + false)}; + + // @language map container: index value objects by their language. + if (container_includes(item_definition, KEYWORD_LANGUAGE) && + item.is_object() && item.defines(KEYWORD_VALUE, KEYWORD_VALUE_HASH)) { + if (!target.defines(item_property)) { + target.assign_assume_new(item_property, JSON::make_object()); + } + const JSON::String key{ + item.defines(KEYWORD_LANGUAGE, KEYWORD_LANGUAGE_HASH) + ? item.at(KEYWORD_LANGUAGE, KEYWORD_LANGUAGE_HASH).to_string() + : none_key}; + add_value(target.at(item_property), key, + JSON{item.at(KEYWORD_VALUE, KEYWORD_VALUE_HASH)}, as_array); + continue; + } + + // @index map container: index by the item's @index, or for a + // property-valued index by the value of that property in the compacted + // item (Section 6.1.2). + if (container_includes(item_definition, KEYWORD_INDEX)) { + if (!target.defines(item_property)) { + target.assign_assume_new(item_property, JSON::make_object()); + } + const bool property_index{item_definition != nullptr && + item_definition->index_iri.has_value()}; + auto compacted{compact(state, node_context, node_inverse, item_property, + item, compact_arrays)}; + std::optional key; + if (property_index) { + // The index key is the expanded index mapping, compacted to match the + // corresponding property of the compacted item (Section 6.1.2). + const auto container_key{compact_iri( + node_context, node_inverse, item_definition->index_iri.value(), + nullptr, true, false)}; + if (compacted.is_object() && compacted.defines(container_key)) { + auto &values{compacted.at(container_key)}; + if (values.is_array() && !values.empty() && + values.front().is_string()) { + key = values.front().to_string(); + if (values.size() == 1) { + compacted.erase(container_key); + } else { + values.erase(values.as_array().cbegin()); + // A lone remaining value collapses to a scalar only when arrays + // are being compacted. + if (compact_arrays && values.size() == 1) { + auto remaining{values.front()}; + values = std::move(remaining); + } + } + } else if (!values.is_array() && values.is_string()) { + key = values.to_string(); + compacted.erase(container_key); + } + } + } else if (item.is_object() && + item.defines(KEYWORD_INDEX, KEYWORD_INDEX_HASH)) { + key = item.at(KEYWORD_INDEX, KEYWORD_INDEX_HASH).to_string(); + if (compacted.is_object()) { + compacted.erase(compact_iri(node_context, node_inverse, + JSON::String{KEYWORD_INDEX}, nullptr, + true, false)); + } + } + add_value(target.at(item_property), key.value_or(none_key), + std::move(compacted), as_array); + continue; + } + + // @id map container: index node objects by their @id. + if (container_includes(item_definition, KEYWORD_ID)) { + if (!target.defines(item_property)) { + target.assign_assume_new(item_property, JSON::make_object()); + } + const JSON::String key{ + item.is_object() && item.defines(KEYWORD_ID, KEYWORD_ID_HASH) + ? compact_iri(node_context, node_inverse, + item.at(KEYWORD_ID, KEYWORD_ID_HASH).to_string(), + nullptr, false, false) + : none_key}; + auto compacted{compact(state, node_context, node_inverse, item_property, + item, compact_arrays)}; + if (compacted.is_object()) { + compacted.erase(compact_iri(node_context, node_inverse, + JSON::String{KEYWORD_ID}, nullptr, true, + false)); + } + add_value(target.at(item_property), key, std::move(compacted), + as_array); + continue; + } + + // @type map container: index node objects by their first type. + if (container_includes(item_definition, KEYWORD_TYPE)) { + if (!target.defines(item_property)) { + target.assign_assume_new(item_property, JSON::make_object()); + } + JSON::String key{none_key}; + if (item.is_object() && item.defines(KEYWORD_TYPE, KEYWORD_TYPE_HASH) && + !item.at(KEYWORD_TYPE, KEYWORD_TYPE_HASH).empty()) { + key = compact_iri( + node_context, node_inverse, + item.at(KEYWORD_TYPE, KEYWORD_TYPE_HASH).front().to_string(), + nullptr, true, false); + } + auto compacted{compact(state, node_context, node_inverse, item_property, + item, compact_arrays)}; + if (compacted.is_object()) { + const auto type_alias{compact_iri(node_context, node_inverse, + JSON::String{KEYWORD_TYPE}, nullptr, + true, false)}; + if (compacted.defines(type_alias)) { + auto &types{compacted.at(type_alias)}; + if (!types.is_array() || types.size() <= 1) { + compacted.erase(type_alias); + } else { + types.erase(types.as_array().cbegin()); + // A lone remaining type collapses to a scalar only when arrays + // are being compacted. + if (compact_arrays && types.size() == 1) { + auto remaining{types.front()}; + types = std::move(remaining); + } + } + } + // A node left with only an @id reduces to that identifier, compacted + // with vocab when the term coerces @vocab. + const auto id_alias{compact_iri(node_context, node_inverse, + JSON::String{KEYWORD_ID}, nullptr, + true, false)}; + if (compacted.object_size() == 1 && compacted.defines(id_alias) && + item.defines(KEYWORD_ID, KEYWORD_ID_HASH)) { + const bool vocab{item_definition != nullptr && + item_definition->type_mapping.has_value() && + item_definition->type_mapping.value() == + KEYWORD_VOCAB}; + compacted = JSON{ + compact_iri(node_context, node_inverse, + item.at(KEYWORD_ID, KEYWORD_ID_HASH).to_string(), + nullptr, vocab, false)}; + } + } + add_value(target.at(item_property), key, std::move(compacted), + as_array); + continue; + } + + auto compacted{compact(state, node_context, node_inverse, item_property, + item, compact_arrays)}; + // A compacted JSON literal may itself be an array, in which case its + // members are added individually rather than nesting another array. + if (compacted.is_array()) { + for (const auto &member : compacted.as_array()) { + add_value(target, item_property, JSON{member}, as_array); + } + } else { + add_value(target, item_property, std::move(compacted), as_array); + } + } + } + + return result; +} + +} // namespace sourcemeta::core diff --git a/vendor/core/src/core/jsonld/jsonld_context_processing.cc b/vendor/core/src/core/jsonld/jsonld_context_processing.cc new file mode 100644 index 00000000..b7ce348e --- /dev/null +++ b/vendor/core/src/core/jsonld/jsonld_context_processing.cc @@ -0,0 +1,285 @@ +#include "jsonld_algorithms.h" +#include "jsonld_keywords.h" + +#include + +#include // std::make_shared +#include // std::optional +#include // std::move, std::pair +#include // std::vector + +namespace sourcemeta::core { + +// Context Processing (JSON-LD 1.1 API Section 5.1) +auto process_context(ExpansionState &state, ActiveContext &active_context, + const JSON &local_context, const WeakPointer &pointer, + const bool propagate) -> void { + std::vector> contexts; + if (local_context.is_array()) { + std::size_t index{0}; + for (const auto &item : local_context.as_array()) { + contexts.emplace_back(&item, pointer.concat(index)); + index += 1; + } + } else { + contexts.emplace_back(&local_context, pointer); + } + + // The @propagate flag is read once from a top-level map context, before the + // contexts are processed (JSON-LD 1.1 API Section 5.1 steps 2 and 3). A + // non-boolean value is reported per entry by the loop below. + bool effective_propagate{propagate}; + if (local_context.is_object()) { + if (const auto *propagate_entry{ + local_context.try_at(KEYWORD_PROPAGATE, KEYWORD_PROPAGATE_HASH)}; + propagate_entry != nullptr && propagate_entry->is_boolean()) { + effective_propagate = propagate_entry->to_boolean(); + } + } + if (!effective_propagate && !active_context.previous) { + auto snapshot{std::make_shared(active_context)}; + snapshot->previous = nullptr; + active_context.previous = snapshot; + } + + for (const auto &[entry_pointer, location] : contexts) { + const auto &context{*entry_pointer}; + if (context.is_null()) { + if (!state.protected_override) { + for (const auto &entry : active_context.terms) { + if (entry.second.is_protected) { + throw JSONLDError("Invalid context nullification", location); + } + } + } + // Nullifying the context resets to the initial context, whose base is + // the document base. Whether to relativise is a processing option rather + // than context state, so it survives the reset. + const bool relativise{active_context.compact_to_relative}; + active_context = ActiveContext{}; + active_context.base = state.document_base; + active_context.compact_to_relative = relativise; + continue; + } + + if (context.is_string()) { + auto reference{context.to_string()}; + const auto resolution_base{state.context_resolution_base()}; + if (resolution_base.has_value()) { + reference = URI::from_iri(reference) + .resolve_from(URI::from_iri(resolution_base.value())) + .recompose(); + } + for (const auto &loaded : state.remote_context_chain) { + if (loaded == reference) { + throw JSONLDError("Recursive context inclusion", location); + } + } + if (state.resolver == nullptr || !*state.resolver) { + throw JSONLDError("Loading remote context failed", location); + } + const auto document{(*state.resolver)(reference)}; + if (!document.has_value()) { + throw JSONLDError("Loading remote context failed", location); + } + const auto *context_entry{ + document->is_object() + ? document->try_at(KEYWORD_CONTEXT, KEYWORD_CONTEXT_HASH) + : nullptr}; + if (context_entry == nullptr) { + throw JSONLDError("Invalid remote context", location); + } + state.remote_context_chain.push_back(reference); + try { + // A loaded remote context is processed with the default propagation. + process_context(state, active_context, *context_entry, location); + } catch (...) { + state.remote_context_chain.pop_back(); + throw; + } + state.remote_context_chain.pop_back(); + continue; + } + + if (!context.is_object()) { + throw JSONLDError("Invalid local context", location); + } + + if (const auto *version{ + context.try_at(KEYWORD_VERSION, KEYWORD_VERSION_HASH)}; + version != nullptr && + (!version->is_real() || version->to_real() != 1.1)) { + throw JSONLDError("Invalid @version value", location, {KEYWORD_VERSION}); + } + if (state.processing_1_0 && + context.defines(KEYWORD_VERSION, KEYWORD_VERSION_HASH)) { + throw JSONLDError("Processing mode conflict", location, + {KEYWORD_VERSION}); + } + if (state.processing_1_0 && + (context.defines(KEYWORD_PROPAGATE, KEYWORD_PROPAGATE_HASH) || + context.defines(KEYWORD_IMPORT, KEYWORD_IMPORT_HASH) || + context.defines(KEYWORD_PROTECTED, KEYWORD_PROTECTED_HASH))) { + throw JSONLDError("Invalid context entry", location); + } + + if (const auto *propagate_entry{ + context.try_at(KEYWORD_PROPAGATE, KEYWORD_PROPAGATE_HASH)}; + propagate_entry != nullptr && !propagate_entry->is_boolean()) { + throw JSONLDError("Invalid @propagate value", location, + {KEYWORD_PROPAGATE}); + } + + // @protected applies to imported terms too, so it is set before @import. + const bool saved_protected{state.context_protected}; + if (const auto *protected_entry{ + context.try_at(KEYWORD_PROTECTED, KEYWORD_PROTECTED_HASH)}) { + if (!protected_entry->is_boolean()) { + throw JSONLDError("Invalid @protected value", location, + {KEYWORD_PROTECTED}); + } + state.context_protected = protected_entry->to_boolean(); + } + + if (const auto *import_entry{ + context.try_at(KEYWORD_IMPORT, KEYWORD_IMPORT_HASH)}) { + const auto &import{*import_entry}; + if (!import.is_string()) { + throw JSONLDError("Invalid @import value", location, {KEYWORD_IMPORT}); + } + auto reference{import.to_string()}; + const auto resolution_base{state.context_resolution_base()}; + if (resolution_base.has_value()) { + reference = URI::from_iri(reference) + .resolve_from(URI::from_iri(resolution_base.value())) + .recompose(); + } + if (state.resolver == nullptr || !*state.resolver) { + throw JSONLDError("Loading remote context failed", location, + {KEYWORD_IMPORT}); + } + const auto document{(*state.resolver)(reference)}; + if (!document.has_value()) { + throw JSONLDError("Loading remote context failed", location, + {KEYWORD_IMPORT}); + } + const auto *imported_context{ + document->is_object() + ? document->try_at(KEYWORD_CONTEXT, KEYWORD_CONTEXT_HASH) + : nullptr}; + if (imported_context == nullptr || !imported_context->is_object()) { + throw JSONLDError("Invalid remote context", location, {KEYWORD_IMPORT}); + } + if (imported_context->defines(KEYWORD_IMPORT, KEYWORD_IMPORT_HASH)) { + throw JSONLDError("Invalid context entry", location, {KEYWORD_IMPORT}); + } + // Merge the imported entries with the current ones, the current ones + // overriding, and process the result as a single context. + auto merged{JSON{*imported_context}}; + for (const auto &entry : context.as_object()) { + if (!entry.key_equals(KEYWORD_IMPORT, KEYWORD_IMPORT_HASH)) { + merged.assign(entry.first, entry.second); + } + } + process_context(state, active_context, merged, location, propagate); + state.context_protected = saved_protected; + continue; + } + + if (const auto *base_entry{ + state.remote_context_chain.empty() + ? context.try_at(KEYWORD_BASE, KEYWORD_BASE_HASH) + : nullptr}) { + const auto &base{*base_entry}; + if (base.is_null()) { + active_context.base = std::nullopt; + } else if (!base.is_string()) { + throw JSONLDError("Invalid base IRI", location, {KEYWORD_BASE}); + } else { + const auto &base_string{base.to_string()}; + if (active_context.base.has_value()) { + active_context.base = + URI::from_iri(base_string) + .resolve_from(URI::from_iri(active_context.base.value())) + .recompose(); + } else if (URI::from_iri(base_string).is_absolute()) { + active_context.base = base_string; + } else { + throw JSONLDError("Invalid base IRI", location, {KEYWORD_BASE}); + } + } + } + + if (const auto *vocabulary_entry{ + context.try_at(KEYWORD_VOCAB, KEYWORD_VOCAB_HASH)}) { + const auto &vocabulary{*vocabulary_entry}; + if (vocabulary.is_null()) { + active_context.vocabulary = std::nullopt; + } else if (!vocabulary.is_string()) { + throw JSONLDError("Invalid vocab mapping", location, {KEYWORD_VOCAB}); + } else { + const auto &vocabulary_string{vocabulary.to_string()}; + // In 1.0, @vocab must be an absolute IRI or blank node identifier. + if (state.processing_1_0 && + vocabulary_string.find(':') == JSON::String::npos) { + throw JSONLDError("Invalid vocab mapping", location, {KEYWORD_VOCAB}); + } + active_context.vocabulary = + expand_iri(state, active_context, vocabulary_string, true, true, + nullptr, nullptr, empty_weak_pointer); + } + } + + if (const auto *language_entry{ + context.try_at(KEYWORD_LANGUAGE, KEYWORD_LANGUAGE_HASH)}) { + const auto &language{*language_entry}; + if (language.is_null()) { + active_context.default_language = std::nullopt; + } else if (!language.is_string()) { + throw JSONLDError("Invalid default language", location, + {KEYWORD_LANGUAGE}); + } else { + active_context.default_language = language.to_string(); + } + } + + if (const auto *direction_entry{ + context.try_at(KEYWORD_DIRECTION, KEYWORD_DIRECTION_HASH)}) { + const auto &direction{*direction_entry}; + if (direction.is_null()) { + active_context.default_direction = std::nullopt; + } else if (!direction.is_string()) { + throw JSONLDError("Invalid base direction", location, + {KEYWORD_DIRECTION}); + } else { + const auto &direction_string{direction.to_string()}; + if (direction_string != "ltr" && direction_string != "rtl") { + throw JSONLDError("Invalid base direction", location, + {KEYWORD_DIRECTION}); + } + active_context.default_direction = direction_string; + } + } + + DefinedTerms defined; + for (const auto &entry : context.as_object()) { + const auto &name{entry.first}; + if (entry.key_equals(KEYWORD_BASE, KEYWORD_BASE_HASH) || + entry.key_equals(KEYWORD_VOCAB, KEYWORD_VOCAB_HASH) || + entry.key_equals(KEYWORD_LANGUAGE, KEYWORD_LANGUAGE_HASH) || + entry.key_equals(KEYWORD_VERSION, KEYWORD_VERSION_HASH) || + entry.key_equals(KEYWORD_DIRECTION, KEYWORD_DIRECTION_HASH) || + entry.key_equals(KEYWORD_IMPORT, KEYWORD_IMPORT_HASH) || + entry.key_equals(KEYWORD_PROPAGATE, KEYWORD_PROPAGATE_HASH) || + entry.key_equals(KEYWORD_PROTECTED, KEYWORD_PROTECTED_HASH)) { + continue; + } + create_term_definition(state, active_context, context, name, defined, + location); + } + + state.context_protected = saved_protected; + } +} + +} // namespace sourcemeta::core diff --git a/vendor/core/src/core/jsonld/jsonld_create_term_definition.cc b/vendor/core/src/core/jsonld/jsonld_create_term_definition.cc new file mode 100644 index 00000000..c91cdb6b --- /dev/null +++ b/vendor/core/src/core/jsonld/jsonld_create_term_definition.cc @@ -0,0 +1,619 @@ +#include "jsonld_algorithms.h" +#include "jsonld_keywords.h" + +#include + +#include // std::optional +#include // std::move + +namespace sourcemeta::core { + +namespace { + +// Whether the given value is a valid @container value. +auto is_valid_container(const JSON::StringView value) -> bool { + return value == KEYWORD_LIST || value == KEYWORD_SET || + value == KEYWORD_INDEX || value == KEYWORD_LANGUAGE || + value == KEYWORD_ID || value == KEYWORD_TYPE || value == KEYWORD_GRAPH; +} + +// Whether the given value ends with a URI generic delimiter (RFC 3986). +auto ends_with_gen_delim(const JSON::StringView value) -> bool { + return !value.empty() && URI::is_gen_delim(value.back()); +} + +// Whether two definitions are equivalent ignoring their protected status, which +// is what a protected-term redefinition check compares. +auto same_definition(const TermDefinition &left, const TermDefinition &right) + -> bool { + return left.iri == right.iri && left.type_mapping == right.type_mapping && + left.container == right.container && left.language == right.language && + left.has_language == right.has_language && + left.direction == right.direction && + left.has_direction == right.has_direction && + left.context == right.context && + left.context_base == right.context_base && left.index == right.index && + left.index_iri == right.index_iri && left.nest == right.nest && + left.reverse == right.reverse && left.prefix == right.prefix; +} + +// Store a freshly-built term definition, enforcing protected-term redefinition. +auto finalize_definition(ExpansionState &state, ActiveContext &active_context, + DefinedTerms &defined, const JSON::String &term, + const WeakPointer &term_pointer, + const std::optional &previous, + TermDefinition &&candidate) -> void { + if (previous.has_value() && previous->is_protected && + !state.protected_override) { + if (!same_definition(previous.value(), candidate)) { + throw JSONLDError("Protected term redefinition", term_pointer); + } + // A redefinition with the same definition retains the protected flag. + candidate.is_protected = true; + } + active_context.terms[term] = std::move(candidate); + defined[term] = true; +} + +} // namespace + +// Create Term Definition (JSON-LD 1.1 API Section 5.1.1) +auto create_term_definition(ExpansionState &state, + ActiveContext &active_context, + const JSON &local_context, const JSON::String &term, + DefinedTerms &defined, + const WeakPointer &context_pointer) -> void { + const auto status{defined.find(term)}; + if (status != defined.cend()) { + if (status->second) { + return; + } + throw JSONLDError("Cyclic IRI mapping", context_pointer.concat(term)); + } + + if (term.empty()) { + throw JSONLDError("Invalid term definition", context_pointer); + } + + defined[term] = false; + const auto &value{local_context.at(term)}; + const WeakPointer term_pointer{context_pointer.concat(term)}; + + if (is_keyword(term)) { + if (term == KEYWORD_TYPE && value.is_object() && !state.processing_1_0) { + TermDefinition type_definition; + bool has_container{false}; + bool invalid_entry{false}; + for (const auto &entry : value.as_object()) { + if (entry.key_equals(KEYWORD_PROTECTED, KEYWORD_PROTECTED_HASH)) { + if (!entry.second.is_boolean()) { + throw JSONLDError("Invalid @protected value", term_pointer, + {KEYWORD_PROTECTED}); + } + type_definition.is_protected = entry.second.to_boolean(); + } else if (entry.key_equals(KEYWORD_CONTAINER, + KEYWORD_CONTAINER_HASH) && + entry.second.is_string()) { + const auto &container{entry.second.to_string()}; + if (container == KEYWORD_SET) { + type_definition.container.push_back(container); + has_container = true; + } else { + invalid_entry = true; + } + } else { + invalid_entry = true; + } + } + // A redefinition of a protected @type is rejected before the shape of + // the new definition is validated. + const auto existing_type{active_context.terms.find(KEYWORD_TYPE)}; + if (existing_type != active_context.terms.cend() && + existing_type->second.is_protected && !state.protected_override) { + if (!same_definition(existing_type->second, type_definition)) { + throw JSONLDError("Protected term redefinition", term_pointer); + } + type_definition.is_protected = true; + } else if (invalid_entry || !has_container) { + throw JSONLDError("Keyword redefinition", term_pointer); + } else if (!type_definition.is_protected) { + type_definition.is_protected = state.context_protected; + } + active_context.terms[JSON::String{KEYWORD_TYPE}] = + std::move(type_definition); + defined[term] = true; + return; + } + throw JSONLDError("Keyword redefinition", term_pointer); + } + + if (has_keyword_form(term)) { + defined[term] = true; + return; + } + + std::optional previous; + const auto existing{active_context.terms.find(term)}; + if (existing != active_context.terms.cend()) { + previous = existing->second; + } + active_context.terms.erase(term); + + const auto *id_entry{ + value.is_object() ? value.try_at(KEYWORD_ID, KEYWORD_ID_HASH) : nullptr}; + if (value.is_null() || (id_entry != nullptr && id_entry->is_null())) { + TermDefinition empty; + empty.is_protected = state.context_protected; + // @protected is processed before the null @id is handled, so an explicitly + // protected term that maps to null stays protected. + if (id_entry != nullptr) { + if (const auto *protected_entry{ + value.try_at(KEYWORD_PROTECTED, KEYWORD_PROTECTED_HASH)}) { + if (!protected_entry->is_boolean()) { + throw JSONLDError("Invalid @protected value", term_pointer, + {KEYWORD_PROTECTED}); + } + if (state.processing_1_0) { + throw JSONLDError("Invalid term definition", term_pointer, + {KEYWORD_PROTECTED}); + } + empty.is_protected = protected_entry->to_boolean(); + } + } + finalize_definition(state, active_context, defined, term, term_pointer, + previous, std::move(empty)); + return; + } + + TermDefinition definition; + definition.is_protected = state.context_protected; + bool simple_term{false}; + + if (value.is_string()) { + simple_term = true; + const auto &string_value{value.to_string()}; + if (!is_keyword(string_value) && has_keyword_form(string_value)) { + defined[term] = true; + return; + } + if (string_value == term) { + // A self-referential simple term resolves through the term itself. + const auto colon{term.find(':')}; + if (colon != JSON::String::npos) { + const auto prefix{term.substr(0, colon)}; + const auto suffix{term.substr(colon + 1)}; + if (prefix != "_" && !suffix.starts_with("//") && + local_context.is_object() && local_context.defines(prefix)) { + const auto iterator{defined.find(prefix)}; + if (iterator == defined.cend() || !iterator->second) { + create_term_definition(state, active_context, local_context, prefix, + defined, context_pointer); + } + } + const auto prefix_definition{active_context.terms.find(prefix)}; + if (prefix_definition != active_context.terms.cend() && + prefix_definition->second.iri.has_value()) { + definition.iri = prefix_definition->second.iri.value() + suffix; + } else { + definition.iri = term; + } + } else if (term.find('/') != JSON::String::npos) { + definition.iri = expand_iri(state, active_context, term, false, true, + nullptr, nullptr, empty_weak_pointer); + } else if (active_context.vocabulary.has_value()) { + definition.iri = active_context.vocabulary.value() + term; + } + } else { + definition.iri = + expand_iri(state, active_context, string_value, false, true, + &local_context, &defined, context_pointer); + // In 1.1, an IRI-like term must expand to its IRI mapping. + if (!state.processing_1_0 && definition.iri.has_value()) { + const auto colon_position{term.find(':')}; + const bool iri_like_colon{colon_position != JSON::String::npos && + colon_position != 0 && + colon_position + 1 != term.size()}; + if (iri_like_colon || term.find('/') != JSON::String::npos) { + auto probe{active_context}; + const auto expanded_term{expand_iri(state, probe, term, false, true, + nullptr, nullptr, + empty_weak_pointer)}; + if (expanded_term.has_value() && expanded_term != definition.iri) { + throw JSONLDError("Invalid IRI mapping", term_pointer); + } + } + } + } + } else if (value.is_object()) { + const bool has_id{id_entry != nullptr}; + const JSON *const id{id_entry}; + if (const auto *reverse_entry{ + value.try_at(KEYWORD_REVERSE, KEYWORD_REVERSE_HASH)}) { + if (has_id || value.defines(KEYWORD_NEST, KEYWORD_NEST_HASH)) { + throw JSONLDError("Invalid reverse property", term_pointer, + {KEYWORD_REVERSE}); + } + const auto &reverse{*reverse_entry}; + if (!reverse.is_string()) { + throw JSONLDError("Invalid IRI mapping", term_pointer, + {KEYWORD_REVERSE}); + } + definition.reverse = true; + definition.iri = + expand_iri(state, active_context, reverse.to_string(), false, true, + &local_context, &defined, context_pointer); + if (!definition.iri.has_value()) { + // A reverse value with the form of a keyword is ignored. + defined[term] = true; + return; + } + if (definition.iri.value().find(':') == JSON::String::npos) { + throw JSONLDError("Invalid IRI mapping", term_pointer, + {KEYWORD_REVERSE}); + } + } else if (has_id && !id->is_null() && + (!id->is_string() || id->to_string() != term)) { + if (!id->is_string()) { + throw JSONLDError("Invalid IRI mapping", term_pointer, {KEYWORD_ID}); + } + const auto &id_value{id->to_string()}; + if (!is_keyword(id_value) && has_keyword_form(id_value)) { + defined[term] = true; + return; + } + definition.iri = expand_iri(state, active_context, id_value, false, true, + &local_context, &defined, context_pointer); + const auto &mapping{definition.iri}; + if (!mapping.has_value() || + (!is_keyword(mapping.value()) && + mapping.value().find(':') == JSON::String::npos && + !active_context.vocabulary.has_value())) { + throw JSONLDError("Invalid IRI mapping", term_pointer, {KEYWORD_ID}); + } + if (mapping.has_value() && mapping.value() == KEYWORD_CONTEXT) { + throw JSONLDError("Invalid keyword alias", term_pointer, {KEYWORD_ID}); + } + // In 1.1, a term that itself has the form of an IRI (a colon other than + // at the edges, or a slash) must expand to its IRI mapping. + if (!state.processing_1_0 && mapping.has_value()) { + const auto colon_position{term.find(':')}; + const bool iri_like_colon{colon_position != JSON::String::npos && + colon_position != 0 && + colon_position + 1 != term.size()}; + if (iri_like_colon || term.find('/') != JSON::String::npos) { + auto probe{active_context}; + const auto expanded_term{expand_iri(state, probe, term, false, true, + nullptr, nullptr, + empty_weak_pointer)}; + if (expanded_term.has_value() && expanded_term != mapping) { + throw JSONLDError("Invalid IRI mapping", term_pointer, + {KEYWORD_ID}); + } + } + } + } else if (term.find(':') != JSON::String::npos && !term.starts_with(':') && + !term.ends_with(':')) { + const auto colon{term.find(':')}; + const auto prefix{term.substr(0, colon)}; + const auto suffix{term.substr(colon + 1)}; + if (prefix != "_" && !suffix.starts_with("//") && + local_context.is_object() && local_context.defines(prefix)) { + const auto iterator{defined.find(prefix)}; + if (iterator == defined.cend() || !iterator->second) { + create_term_definition(state, active_context, local_context, prefix, + defined, context_pointer); + } + } + const auto prefix_definition{active_context.terms.find(prefix)}; + if (prefix_definition != active_context.terms.cend() && + prefix_definition->second.iri.has_value()) { + definition.iri = prefix_definition->second.iri.value() + suffix; + } else { + definition.iri = term; + } + } else if (term.find('/') != JSON::String::npos) { + definition.iri = expand_iri(state, active_context, term, false, true, + nullptr, nullptr, empty_weak_pointer); + } else if (active_context.vocabulary.has_value()) { + definition.iri = active_context.vocabulary.value() + term; + } + + if (const auto *type_entry{value.try_at(KEYWORD_TYPE, KEYWORD_TYPE_HASH)}) { + const auto &type_value{*type_entry}; + if (!type_value.is_string()) { + throw JSONLDError("Invalid type mapping", term_pointer, {KEYWORD_TYPE}); + } + const auto type{expand_iri(state, active_context, type_value.to_string(), + false, true, &local_context, &defined, + context_pointer)}; + if (!type.has_value() || type.value().starts_with("_:") || + (type.value() != KEYWORD_ID && type.value() != KEYWORD_VOCAB && + type.value() != KEYWORD_JSON && type.value() != KEYWORD_NONE && + type.value().find(':') == JSON::String::npos) || + (state.processing_1_0 && + (type.value() == KEYWORD_JSON || type.value() == KEYWORD_NONE))) { + throw JSONLDError("Invalid type mapping", term_pointer, {KEYWORD_TYPE}); + } + definition.type_mapping = type; + } + + if (const auto *container_entry{ + value.try_at(KEYWORD_CONTAINER, KEYWORD_CONTAINER_HASH)}) { + const auto &container{*container_entry}; + if (container.is_array()) { + // Array containers are a 1.1 feature. + if (state.processing_1_0) { + throw JSONLDError("Invalid container mapping", term_pointer, + {KEYWORD_CONTAINER}); + } + for (const auto &item : container.as_array()) { + if (!item.is_string()) { + throw JSONLDError("Invalid container mapping", term_pointer, + {KEYWORD_CONTAINER}); + } + const auto &item_string{item.to_string()}; + if (!is_valid_container(item_string)) { + throw JSONLDError("Invalid container mapping", term_pointer, + {KEYWORD_CONTAINER}); + } + // A keyword may not appear more than once in the container array. + for (const auto &seen : definition.container) { + if (seen == item_string) { + throw JSONLDError("Invalid container mapping", term_pointer, + {KEYWORD_CONTAINER}); + } + } + definition.container.push_back(item_string); + } + } else if (container.is_string()) { + const auto &container_string{container.to_string()}; + // In 1.0, the @graph, @id and @type containers are not permitted. + if (state.processing_1_0 && (container_string == KEYWORD_GRAPH || + container_string == KEYWORD_ID || + container_string == KEYWORD_TYPE)) { + throw JSONLDError("Invalid container mapping", term_pointer, + {KEYWORD_CONTAINER}); + } + if (!is_valid_container(container_string)) { + throw JSONLDError("Invalid container mapping", term_pointer, + {KEYWORD_CONTAINER}); + } + definition.container.push_back(container_string); + } else { + throw JSONLDError("Invalid container mapping", term_pointer, + {KEYWORD_CONTAINER}); + } + if (definition.reverse) { + for (const auto &item : definition.container) { + if (item != KEYWORD_SET && item != KEYWORD_INDEX) { + throw JSONLDError("Invalid reverse property", term_pointer, + {KEYWORD_CONTAINER}); + } + } + } + bool container_graph{false}; + bool container_id{false}; + bool container_index{false}; + bool container_language{false}; + bool container_list{false}; + bool container_set{false}; + bool container_type{false}; + for (const auto &item : definition.container) { + if (item == KEYWORD_GRAPH) { + container_graph = true; + } else if (item == KEYWORD_ID) { + container_id = true; + } else if (item == KEYWORD_INDEX) { + container_index = true; + } else if (item == KEYWORD_LANGUAGE) { + container_language = true; + } else if (item == KEYWORD_LIST) { + container_list = true; + } else if (item == KEYWORD_SET) { + container_set = true; + } else if (item == KEYWORD_TYPE) { + container_type = true; + } + } + // Valid array combinations (JSON-LD 1.1 API Section 5.1.1 step 19.1): a + // single keyword, or @graph with exactly one of @id or @index optionally + // with @set, or @set combined with any of @index, @graph, @id, @type, or + // @language. + if (definition.container.size() != 1) { + const bool graph_form{ + container_graph && (container_id != container_index) && + !container_list && !container_type && !container_language}; + const bool set_form{container_set && !container_list}; + if (!graph_form && !set_form) { + throw JSONLDError("Invalid container mapping", term_pointer, + {KEYWORD_CONTAINER}); + } + } + // A type-map container may only coerce its keys to identifiers. + if (container_type && definition.type_mapping.has_value() && + definition.type_mapping.value() != KEYWORD_ID && + definition.type_mapping.value() != KEYWORD_VOCAB) { + throw JSONLDError("Invalid type mapping", term_pointer, {KEYWORD_TYPE}); + } + } + + if (const auto *language_entry{ + value.try_at(KEYWORD_LANGUAGE, KEYWORD_LANGUAGE_HASH)}; + language_entry != nullptr && + !value.defines(KEYWORD_TYPE, KEYWORD_TYPE_HASH)) { + const auto &language{*language_entry}; + if (!language.is_null() && !language.is_string()) { + throw JSONLDError("Invalid language mapping", term_pointer, + {KEYWORD_LANGUAGE}); + } + definition.has_language = true; + if (language.is_string()) { + definition.language = language.to_string(); + } + } + + if (const auto *direction_entry{ + value.try_at(KEYWORD_DIRECTION, KEYWORD_DIRECTION_HASH)}; + direction_entry != nullptr && + !value.defines(KEYWORD_TYPE, KEYWORD_TYPE_HASH)) { + const auto &direction{*direction_entry}; + if (!direction.is_null() && + (!direction.is_string() || (direction.to_string() != "ltr" && + direction.to_string() != "rtl"))) { + throw JSONLDError("Invalid base direction", term_pointer, + {KEYWORD_DIRECTION}); + } + definition.has_direction = true; + if (direction.is_string()) { + definition.direction = direction.to_string(); + } + } + + if (const auto *context_entry{ + value.try_at(KEYWORD_CONTEXT, KEYWORD_CONTEXT_HASH)}) { + if (state.processing_1_0) { + throw JSONLDError("Invalid term definition", term_pointer, + {KEYWORD_CONTEXT}); + } + // Validate the scoped context eagerly so that errors surface even when + // the term is never used. Remote scoped contexts (including recursive + // ones) are validated lazily when the term is used instead. + const bool saved_override{state.protected_override}; + const bool saved_context_protected{state.context_protected}; + try { + // The error raised here is always discarded below, so its location does + // not matter. + ActiveContext probe{active_context}; + state.protected_override = true; + process_context(state, probe, *context_entry, empty_weak_pointer); + state.protected_override = saved_override; + state.context_protected = saved_context_protected; + } catch (const JSONLDError &error) { + state.protected_override = saved_override; + state.context_protected = saved_context_protected; + const JSON::StringView code{error.what()}; + if (code != "Loading remote context failed" && + code != "Recursive context inclusion" && + code != "Invalid remote context") { + throw JSONLDError("Invalid scoped context", term_pointer, + {KEYWORD_CONTEXT}); + } + } + definition.context = *context_entry; + definition.context_base = state.context_resolution_base(); + } + + if (const auto *prefix_entry{ + value.try_at(KEYWORD_PREFIX, KEYWORD_PREFIX_HASH)}) { + if (state.processing_1_0 || term.find(':') != JSON::String::npos || + term.find('/') != JSON::String::npos) { + throw JSONLDError("Invalid term definition", term_pointer, + {KEYWORD_PREFIX}); + } + if (!prefix_entry->is_boolean()) { + throw JSONLDError("Invalid @prefix value", term_pointer, + {KEYWORD_PREFIX}); + } + definition.prefix = prefix_entry->to_boolean(); + if (definition.prefix && definition.iri.has_value() && + is_keyword(definition.iri.value())) { + throw JSONLDError("Invalid term definition", term_pointer, + {KEYWORD_PREFIX}); + } + } + + if (const auto *nest_entry{value.try_at(KEYWORD_NEST, KEYWORD_NEST_HASH)}) { + if (state.processing_1_0) { + throw JSONLDError("Invalid term definition", term_pointer, + {KEYWORD_NEST}); + } + const auto &nest{*nest_entry}; + if (!nest.is_string()) { + throw JSONLDError("Invalid @nest value", term_pointer, {KEYWORD_NEST}); + } + const auto &nest_string{nest.to_string()}; + if (is_keyword(nest_string) && nest_string != KEYWORD_NEST) { + throw JSONLDError("Invalid @nest value", term_pointer, {KEYWORD_NEST}); + } + definition.nest = nest_string; + } + + if (const auto *index_entry{ + value.try_at(KEYWORD_INDEX, KEYWORD_INDEX_HASH)}) { + if (state.processing_1_0) { + throw JSONLDError("Invalid term definition", term_pointer, + {KEYWORD_INDEX}); + } + bool has_index_container{false}; + for (const auto &item : definition.container) { + if (item == KEYWORD_INDEX) { + has_index_container = true; + } + } + const auto &index{*index_entry}; + if (!index.is_string() || !has_index_container) { + throw JSONLDError("Invalid term definition", term_pointer, + {KEYWORD_INDEX}); + } + const auto &index_string{index.to_string()}; + const auto index_iri{expand_iri(state, active_context, index_string, + false, true, &local_context, &defined, + context_pointer)}; + if (!index_iri.has_value() || is_keyword(index_iri.value())) { + throw JSONLDError("Invalid term definition", term_pointer, + {KEYWORD_INDEX}); + } + definition.index = index_string; + definition.index_iri = index_iri.value(); + } + + if (const auto *protected_entry{ + value.try_at(KEYWORD_PROTECTED, KEYWORD_PROTECTED_HASH)}) { + if (!protected_entry->is_boolean()) { + throw JSONLDError("Invalid @protected value", term_pointer, + {KEYWORD_PROTECTED}); + } + if (state.processing_1_0) { + throw JSONLDError("Invalid term definition", term_pointer, + {KEYWORD_PROTECTED}); + } + definition.is_protected = protected_entry->to_boolean(); + } + + // A term definition may not contain any entry other than the keywords + // recognised above. + for (const auto &entry : value.as_object()) { + if (!entry.key_equals(KEYWORD_ID, KEYWORD_ID_HASH) && + !entry.key_equals(KEYWORD_REVERSE, KEYWORD_REVERSE_HASH) && + !entry.key_equals(KEYWORD_CONTAINER, KEYWORD_CONTAINER_HASH) && + !entry.key_equals(KEYWORD_CONTEXT, KEYWORD_CONTEXT_HASH) && + !entry.key_equals(KEYWORD_DIRECTION, KEYWORD_DIRECTION_HASH) && + !entry.key_equals(KEYWORD_INDEX, KEYWORD_INDEX_HASH) && + !entry.key_equals(KEYWORD_LANGUAGE, KEYWORD_LANGUAGE_HASH) && + !entry.key_equals(KEYWORD_NEST, KEYWORD_NEST_HASH) && + !entry.key_equals(KEYWORD_PREFIX, KEYWORD_PREFIX_HASH) && + !entry.key_equals(KEYWORD_PROTECTED, KEYWORD_PROTECTED_HASH) && + !entry.key_equals(KEYWORD_TYPE, KEYWORD_TYPE_HASH)) { + throw JSONLDError("Invalid term definition", term_pointer); + } + } + } else { + throw JSONLDError("Invalid term definition", term_pointer); + } + + if (simple_term && term.find(':') == JSON::String::npos && + term.find('/') == JSON::String::npos && definition.iri.has_value() && + (ends_with_gen_delim(definition.iri.value()) || + definition.iri.value().starts_with("_:"))) { + definition.prefix = true; + } + + if (!definition.reverse && !definition.iri.has_value()) { + throw JSONLDError("Invalid IRI mapping", term_pointer); + } + + finalize_definition(state, active_context, defined, term, term_pointer, + previous, std::move(definition)); +} + +} // namespace sourcemeta::core diff --git a/vendor/core/src/core/jsonld/jsonld_expansion.cc b/vendor/core/src/core/jsonld/jsonld_expansion.cc new file mode 100644 index 00000000..372abe27 --- /dev/null +++ b/vendor/core/src/core/jsonld/jsonld_expansion.cc @@ -0,0 +1,963 @@ +#include "jsonld_algorithms.h" +#include "jsonld_keywords.h" + +#include // std::ranges::sort +#include // std::size_t +#include // std::optional +#include // std::move, std::pair +#include // std::vector + +namespace sourcemeta::core { + +namespace { + +auto into_array(JSON &&value) -> JSON { + if (value.is_array()) { + return std::move(value); + } + auto result{JSON::make_array()}; + result.push_back(std::move(value)); + return result; +} + +// The entries of an object in sorted key order, which is the order expansion +// uses so that values merged from several keys are deterministic. The keys and +// values are referenced from the object (which must outlive the result), never +// copied. +auto sorted_entries(const JSON &object) + -> std::vector> { + std::vector> entries; + for (const auto &entry : object.as_object()) { + entries.emplace_back(&entry.first, &entry.second); + } + std::ranges::sort(entries, [](const auto &left, const auto &right) -> bool { + return *left.first < *right.first; + }); + return entries; +} + +// Append the values, which must be an array, into the array stored at the given +// key, creating it if absent. +auto merge(JSON &object, const JSON::StringView name, JSON &&values) -> void { + if (object.defines(name)) { + for (auto &item : values.as_array()) { + object.at(name).push_back(item); + } + } else { + object.assign(name, std::move(values)); + } +} + +auto container_includes(const TermDefinition *const definition, + const JSON::StringView name) -> bool { + if (definition == nullptr) { + return false; + } + for (const auto &entry : definition->container) { + if (entry == name) { + return true; + } + } + return false; +} + +// Expand a single @type value against the context that preceded type-scoped +// processing. +auto expand_type(ExpansionState &state, const ActiveContext &type_context, + const JSON &item) -> JSON { + auto context{type_context}; + const auto type{expand_iri(state, context, item.to_string(), true, true, + nullptr, nullptr, empty_weak_pointer)}; + return type.has_value() ? JSON{type.value()} : JSON{nullptr}; +} + +// Expand the direct (and deferred @nest) entries of a map into the result, +// mutating it in place. Mutually recursive with expand_object. +auto expand_entries(ExpansionState &state, ActiveContext &active_context, + const ActiveContext &type_context, JSON &result, + const std::optional &active_property, + const JSON &source, const WeakPointer &source_pointer) + -> void; + +// Expand a map element: the node-object (and value-object) branch of the +// Expansion algorithm, factored out of expand() below. +auto expand_object(ExpansionState &state, ActiveContext active_context, + const std::optional &active_property, + const JSON &element, const WeakPointer &pointer) -> JSON { + auto result{JSON::make_object()}; + + // @type values are expanded against the context before type-scoped contexts + // are applied. + const ActiveContext type_context{active_context}; + + // Type-scoped contexts (JSON-LD 1.1 API Section 5.1.2 step 11). The values + // are referenced from the input element, never copied. + std::vector type_values; + for (const auto &entry : element.as_object()) { + const auto expanded{expand_iri(state, active_context, entry.first, false, + true, nullptr, nullptr, empty_weak_pointer)}; + if (!expanded.has_value() || expanded.value() != KEYWORD_TYPE) { + continue; + } + if (entry.second.is_array()) { + for (const auto &item : entry.second.as_array()) { + if (item.is_string()) { + type_values.push_back(item.to_string()); + } + } + } else if (entry.second.is_string()) { + type_values.push_back(entry.second.to_string()); + } + } + std::ranges::sort(type_values); + for (const auto &type : type_values) { + // Each type-scoped context is resolved against the context that preceded + // type-scoped processing, so one type's context cannot hide another's. + const auto definition{type_context.terms.find(type)}; + if (definition != type_context.terms.cend() && + definition->second.context.has_value()) { + const auto &scoped{definition->second.context.value()}; + const auto saved_base{state.context_base_override}; + state.context_base_override = definition->second.context_base; + process_context(state, active_context, scoped, pointer, false); + state.context_base_override = saved_base; + } + } + + expand_entries(state, active_context, type_context, result, active_property, + element, pointer); + + // An empty reverse map carries no information. + if (const auto *reverse{result.try_at(KEYWORD_REVERSE, KEYWORD_REVERSE_HASH)}; + reverse != nullptr && reverse->empty()) { + result.erase(KEYWORD_REVERSE); + } + + // Post-processing (JSON-LD 1.1 API Section 5.1.2) + if (const auto *value_entry{ + result.try_at(KEYWORD_VALUE, KEYWORD_VALUE_HASH)}) { + const JSON *const type{result.try_at(KEYWORD_TYPE, KEYWORD_TYPE_HASH)}; + const bool has_type{type != nullptr}; + const JSON::String *const type_string{ + type != nullptr && type->is_string() ? &type->to_string() : nullptr}; + const bool is_json{type_string != nullptr && *type_string == KEYWORD_JSON}; + for (const auto &entry : result.as_object()) { + if (!entry.key_equals(KEYWORD_VALUE, KEYWORD_VALUE_HASH) && + !entry.key_equals(KEYWORD_TYPE, KEYWORD_TYPE_HASH) && + !entry.key_equals(KEYWORD_LANGUAGE, KEYWORD_LANGUAGE_HASH) && + !entry.key_equals(KEYWORD_INDEX, KEYWORD_INDEX_HASH) && + !entry.key_equals(KEYWORD_DIRECTION, KEYWORD_DIRECTION_HASH)) { + throw JSONLDError("Invalid value object", pointer); + } + if ((entry.key_equals(KEYWORD_LANGUAGE, KEYWORD_LANGUAGE_HASH) || + entry.key_equals(KEYWORD_DIRECTION, KEYWORD_DIRECTION_HASH)) && + has_type) { + throw JSONLDError("Invalid value object", pointer); + } + } + const auto &content{*value_entry}; + if (content.is_null() && !is_json) { + return JSON{nullptr}; + } + if (result.defines(KEYWORD_LANGUAGE, KEYWORD_LANGUAGE_HASH) && + !content.is_string()) { + throw JSONLDError("Invalid language-tagged value", pointer); + } + if (has_type && (type_string == nullptr || type_string->starts_with("_:") || + type_string->find(' ') != JSON::String::npos)) { + throw JSONLDError("Invalid typed value", pointer); + } + if (!is_json && !content.is_string() && !content.is_number() && + !content.is_boolean()) { + throw JSONLDError("Invalid value object value", pointer); + } + } else if (const auto *type_entry{ + result.try_at(KEYWORD_TYPE, KEYWORD_TYPE_HASH)}; + type_entry != nullptr && !type_entry->is_array()) { + // Node objects always carry @type as an array. + result.assign(KEYWORD_TYPE, into_array(JSON{*type_entry})); + } + + // A set or list object may only carry an @index entry besides, and this is + // validated before any value is dropped. + if (result.defines(KEYWORD_LIST, KEYWORD_LIST_HASH) || + result.defines(KEYWORD_SET, KEYWORD_SET_HASH)) { + for (const auto &entry : result.as_object()) { + if (!entry.key_equals(KEYWORD_LIST, KEYWORD_LIST_HASH) && + !entry.key_equals(KEYWORD_SET, KEYWORD_SET_HASH) && + !entry.key_equals(KEYWORD_INDEX, KEYWORD_INDEX_HASH)) { + throw JSONLDError("Invalid set or list object", pointer); + } + } + } + + // A bare @set collapses to its array. + if (const auto *set{result.try_at(KEYWORD_SET, KEYWORD_SET_HASH)}) { + return *set; + } + + // Drop an incomplete value object that has a language or direction but no + // value. + if (!result.defines(KEYWORD_VALUE, KEYWORD_VALUE_HASH) && + (result.defines(KEYWORD_LANGUAGE, KEYWORD_LANGUAGE_HASH) || + result.defines(KEYWORD_DIRECTION, KEYWORD_DIRECTION_HASH))) { + bool only_value_keys{true}; + for (const auto &entry : result.as_object()) { + if (!entry.key_equals(KEYWORD_LANGUAGE, KEYWORD_LANGUAGE_HASH) && + !entry.key_equals(KEYWORD_DIRECTION, KEYWORD_DIRECTION_HASH) && + !entry.key_equals(KEYWORD_INDEX, KEYWORD_INDEX_HASH)) { + only_value_keys = false; + } + } + if (only_value_keys) { + return JSON{nullptr}; + } + } + + // Drop free-floating values when not under a property. + if (!active_property.has_value() || + active_property.value() == KEYWORD_GRAPH) { + if (result.empty() || result.defines(KEYWORD_VALUE, KEYWORD_VALUE_HASH) || + result.defines(KEYWORD_LIST, KEYWORD_LIST_HASH) || + (result.object_size() == 1 && + result.defines(KEYWORD_ID, KEYWORD_ID_HASH))) { + return JSON{nullptr}; + } + } + + return result; +} + +auto expand_entries(ExpansionState &state, ActiveContext &active_context, + const ActiveContext &type_context, JSON &result, + const std::optional &active_property, + const JSON &source, const WeakPointer &source_pointer) + -> void { + // @nest entries are deferred and processed after the direct ones. The + // property is referenced from the source object, never copied. + std::vector> nests; + for (const auto &[key_pointer, value_pointer] : sorted_entries(source)) { + const std::pair entry{*key_pointer, + *value_pointer}; + const JSON::String &property{entry.first}; + const WeakPointer entry_pointer{source_pointer.concat(property)}; + if (property == KEYWORD_CONTEXT) { + continue; + } + + const auto expanded_property{expand_iri(state, active_context, property, + false, true, nullptr, nullptr, + empty_weak_pointer)}; + + if (expanded_property.has_value() && + expanded_property.value() == KEYWORD_NEST) { + if (entry.second.is_array()) { + for (const auto &nest_value : entry.second.as_array()) { + if (!nest_value.is_object() || + nest_value.defines(KEYWORD_VALUE, KEYWORD_VALUE_HASH)) { + throw JSONLDError("Invalid @nest value", entry_pointer); + } + nests.emplace_back(&property, &nest_value); + } + } else if (entry.second.is_object() && + !entry.second.defines(KEYWORD_VALUE, KEYWORD_VALUE_HASH)) { + nests.emplace_back(&property, &entry.second); + } else { + throw JSONLDError("Invalid @nest value", entry_pointer); + } + continue; + } + if (!expanded_property.has_value()) { + continue; + } + + const auto &name{expanded_property.value()}; + if (name.find(':') == JSON::String::npos && !is_keyword(name)) { + continue; + } + + if (is_keyword(name) && active_property.has_value() && + active_property.value() == KEYWORD_REVERSE) { + throw JSONLDError("Invalid reverse property map", entry_pointer); + } + + // The @type and @included exemption from colliding keywords does not apply + // in json-ld-1.0. + if (is_keyword(name) && result.defines(name) && + (state.processing_1_0 || + (name != KEYWORD_TYPE && name != KEYWORD_INCLUDED))) { + throw JSONLDError("Colliding keywords", entry_pointer); + } + + if (name == KEYWORD_ID) { + if (!entry.second.is_string()) { + throw JSONLDError("Invalid @id value", entry_pointer); + } + const auto identifier{expand_iri(state, active_context, + entry.second.to_string(), true, false, + nullptr, nullptr, empty_weak_pointer)}; + if (identifier.has_value()) { + result.assign_assume_new(JSON::String{KEYWORD_ID}, + JSON{identifier.value()}, KEYWORD_ID_HASH); + } else { + result.assign_assume_new(JSON::String{KEYWORD_ID}, JSON{nullptr}, + KEYWORD_ID_HASH); + } + continue; + } + + if (name == KEYWORD_TYPE) { + if (entry.second.is_array()) { + for (const auto &item : entry.second.as_array()) { + if (!item.is_string()) { + throw JSONLDError("Invalid type value", entry_pointer); + } + } + } else if (!entry.second.is_string()) { + throw JSONLDError("Invalid type value", entry_pointer); + } + // Expand each value, preserving whether the input was a string or an + // array. The node-object post-processing later turns a lone string into + // an array, but a value object keeps its @type as a string. + JSON expanded_type{nullptr}; + if (entry.second.is_array()) { + expanded_type = JSON::make_array(); + for (const auto &item : entry.second.as_array()) { + // A value that does not expand to an IRI is omitted, so @type stays + // an array of strings. + auto type{expand_type(state, type_context, item)}; + if (!type.is_null()) { + expanded_type.push_back(std::move(type)); + } + } + } else { + expanded_type = expand_type(state, type_context, entry.second); + } + // A lone @type value that does not expand to an IRI carries nothing. + if (expanded_type.is_null()) { + continue; + } + if (result.defines(KEYWORD_TYPE, KEYWORD_TYPE_HASH)) { + auto merged{ + into_array(std::move(result.at(KEYWORD_TYPE, KEYWORD_TYPE_HASH)))}; + auto expanded_type_array{into_array(std::move(expanded_type))}; + for (auto &item : expanded_type_array.as_array()) { + merged.push_back(item); + } + result.assign(KEYWORD_TYPE, std::move(merged)); + } else { + result.assign_assume_new(JSON::String{KEYWORD_TYPE}, + std::move(expanded_type), KEYWORD_TYPE_HASH); + } + continue; + } + + if (name == KEYWORD_VALUE) { + result.assign_assume_new(JSON::String{KEYWORD_VALUE}, JSON{entry.second}, + KEYWORD_VALUE_HASH); + continue; + } + + if (name == KEYWORD_LANGUAGE) { + if (!entry.second.is_string()) { + throw JSONLDError("Invalid language-tagged string", entry_pointer); + } + result.assign_assume_new(JSON::String{KEYWORD_LANGUAGE}, + JSON{entry.second}, KEYWORD_LANGUAGE_HASH); + continue; + } + + if (name == KEYWORD_DIRECTION) { + if (state.processing_1_0) { + continue; + } + if (!entry.second.is_string() || (entry.second.to_string() != "ltr" && + entry.second.to_string() != "rtl")) { + throw JSONLDError("Invalid base direction", entry_pointer); + } + result.assign_assume_new(JSON::String{KEYWORD_DIRECTION}, + JSON{entry.second}, KEYWORD_DIRECTION_HASH); + continue; + } + + if (name == KEYWORD_LIST || name == KEYWORD_SET) { + auto elements{JSON::make_array()}; + const auto values{into_array(JSON{entry.second})}; + std::size_t value_index{0}; + for (const auto &item : values.as_array()) { + const WeakPointer item_pointer{entry.second.is_array() + ? entry_pointer.concat(value_index) + : entry_pointer}; + auto expanded_item{ + expand(state, active_context, active_property, item, item_pointer)}; + if (expanded_item.is_array()) { + for (auto &nested : expanded_item.as_array()) { + elements.push_back(nested); + } + } else if (!expanded_item.is_null()) { + elements.push_back(std::move(expanded_item)); + } + value_index += 1; + } + if (name == KEYWORD_LIST && state.processing_1_0) { + for (const auto &item : elements.as_array()) { + if (item.is_object() && + item.defines(KEYWORD_LIST, KEYWORD_LIST_HASH)) { + throw JSONLDError("List of lists", entry_pointer); + } + } + } + result.assign(name, std::move(elements)); + continue; + } + + if (name == KEYWORD_GRAPH) { + // @graph expands to an array of node objects, so a value that expands to + // null contributes no element rather than a null one. + auto graph{expand(state, active_context, JSON::String{KEYWORD_GRAPH}, + entry.second, entry_pointer)}; + merge(result, KEYWORD_GRAPH, + graph.is_null() ? JSON::make_array() + : into_array(std::move(graph))); + continue; + } + + if (name == KEYWORD_INCLUDED) { + if (state.processing_1_0) { + continue; + } + auto included{into_array(expand(state, active_context, std::nullopt, + entry.second, entry_pointer))}; + for (const auto &item : included.as_array()) { + if (!item.is_object() || + item.defines(KEYWORD_VALUE, KEYWORD_VALUE_HASH) || + item.defines(KEYWORD_LIST, KEYWORD_LIST_HASH) || + item.defines(KEYWORD_SET, KEYWORD_SET_HASH)) { + throw JSONLDError("Invalid @included value", entry_pointer); + } + } + merge(result, KEYWORD_INCLUDED, std::move(included)); + continue; + } + + if (name == KEYWORD_INDEX) { + if (!entry.second.is_string()) { + throw JSONLDError("Invalid @index value", entry_pointer); + } + result.assign_assume_new(JSON::String{KEYWORD_INDEX}, JSON{entry.second}, + KEYWORD_INDEX_HASH); + continue; + } + + if (name == KEYWORD_REVERSE) { + if (!entry.second.is_object()) { + throw JSONLDError("Invalid @reverse value", entry_pointer); + } + auto reversed{expand(state, active_context, JSON::String{KEYWORD_REVERSE}, + entry.second, entry_pointer)}; + if (reversed.is_object()) { + const auto *existing_reverse{ + result.try_at(KEYWORD_REVERSE, KEYWORD_REVERSE_HASH)}; + auto reverse_map{existing_reverse != nullptr ? *existing_reverse + : JSON::make_object()}; + for (const auto &reverse_entry : reversed.as_object()) { + const auto &reverse_property{reverse_entry.first}; + if (reverse_entry.key_equals(KEYWORD_REVERSE, KEYWORD_REVERSE_HASH)) { + for (auto &forward : reverse_entry.second.as_object()) { + merge(result, JSON::StringView{forward.first}, + into_array(JSON{forward.second})); + } + } else if (is_keyword(reverse_property, reverse_entry.hash)) { + throw JSONLDError("Invalid reverse property map", entry_pointer); + } else { + const auto reverse_values{into_array(JSON{reverse_entry.second})}; + for (const auto &item : reverse_values.as_array()) { + if (item.is_object() && + (item.defines(KEYWORD_VALUE, KEYWORD_VALUE_HASH) || + item.defines(KEYWORD_LIST, KEYWORD_LIST_HASH))) { + throw JSONLDError("Invalid reverse property value", + entry_pointer); + } + } + merge(reverse_map, reverse_property, + into_array(JSON{reverse_entry.second})); + } + } + result.assign(KEYWORD_REVERSE, std::move(reverse_map)); + } + continue; + } + + if (is_keyword(name)) { + // Keywords with no property-level handler (such as @vocab or @none + // reached through an alias) carry no expanded value, so the Expansion + // algorithm adds nothing to the result for them. + continue; + } + + const TermDefinition *definition{nullptr}; + const auto term{active_context.terms.find(property)}; + if (term != active_context.terms.cend()) { + definition = &term->second; + } + + // Property-scoped context (JSON-LD 1.1 API Section 5.1.2 step 13.3) + ActiveContext scoped_context; + const bool scoped{definition != nullptr && definition->context.has_value()}; + if (scoped) { + scoped_context = active_context; + // A property-scoped context propagates by default, so it does not + // inherit an enclosing type-scoped revert. It may, however, set its + // own revert when it specifies @propagate: false. + scoped_context.previous = nullptr; + const auto saved_override{state.protected_override}; + state.protected_override = true; + const auto saved_base{state.context_base_override}; + state.context_base_override = definition->context_base; + process_context(state, scoped_context, definition->context.value(), + entry_pointer); + state.context_base_override = saved_base; + state.protected_override = saved_override; + } + ActiveContext &value_context{scoped ? scoped_context : active_context}; + + JSON expanded_value{nullptr}; + if (definition != nullptr && definition->type_mapping.has_value() && + definition->type_mapping.value() == KEYWORD_JSON) { + // A term coerced to @json keeps its value verbatim. + auto json_value{JSON::make_object()}; + json_value.assign_assume_new(JSON::String{KEYWORD_VALUE}, + JSON{entry.second}, KEYWORD_VALUE_HASH); + json_value.assign_assume_new(JSON::String{KEYWORD_TYPE}, + JSON{KEYWORD_JSON}, KEYWORD_TYPE_HASH); + expanded_value = std::move(json_value); + } else if (entry.second.is_object() && + container_includes(definition, KEYWORD_GRAPH) && + (container_includes(definition, KEYWORD_ID) || + container_includes(definition, KEYWORD_INDEX))) { + const bool by_id{container_includes(definition, KEYWORD_ID)}; + const bool property_valued{definition->index.has_value() && + definition->index.value() != KEYWORD_INDEX}; + std::optional index_property; + if (property_valued) { + index_property = + expand_iri(state, value_context, definition->index.value(), false, + true, nullptr, nullptr, empty_weak_pointer); + } + expanded_value = JSON::make_array(); + for (const auto &[graph_key, graph_value] : + sorted_entries(entry.second)) { + const JSON::String &index{*graph_key}; + const auto expanded_key{index == KEYWORD_NONE + ? std::optional{KEYWORD_NONE} + : expand_iri(state, value_context, index, + true, false, nullptr, nullptr, + empty_weak_pointer)}; + const bool none_key{expanded_key.has_value() && + expanded_key.value() == KEYWORD_NONE}; + auto graph_items{ + into_array(expand(state, value_context, property, *graph_value, + entry_pointer.concat(index)))}; + for (auto &item : graph_items.as_array()) { + // Wrap the item in a graph object, unless it is already one. + JSON graph{nullptr}; + if (item.is_object() && + item.defines(KEYWORD_GRAPH, KEYWORD_GRAPH_HASH)) { + graph = std::move(item); + } else { + graph = JSON::make_object(); + graph.assign_assume_new(JSON::String{KEYWORD_GRAPH}, + into_array(std::move(item)), + KEYWORD_GRAPH_HASH); + } + if (!none_key) { + if (by_id) { + if (!graph.defines(KEYWORD_ID, KEYWORD_ID_HASH)) { + graph.assign_assume_new(JSON::String{KEYWORD_ID}, + JSON{expanded_key.value_or(index)}, + KEYWORD_ID_HASH); + } + } else if (property_valued) { + auto combined{into_array(expand_value( + state, value_context, definition->index, JSON{index}))}; + if (graph.defines(index_property.value())) { + for (auto &existing : + graph.at(index_property.value()).as_array()) { + combined.push_back(existing); + } + } + graph.assign(index_property.value(), std::move(combined)); + } else if (!graph.defines(KEYWORD_INDEX, KEYWORD_INDEX_HASH)) { + graph.assign_assume_new(JSON::String{KEYWORD_INDEX}, JSON{index}, + KEYWORD_INDEX_HASH); + } + } + expanded_value.push_back(std::move(graph)); + } + } + } else if (entry.second.is_object() && + container_includes(definition, KEYWORD_LANGUAGE)) { + expanded_value = JSON::make_array(); + for (const auto &[language_key, language_value] : + sorted_entries(entry.second)) { + const JSON::String &language{*language_key}; + const auto expanded_language{expand_iri(state, value_context, language, + false, true, nullptr, nullptr, + empty_weak_pointer)}; + const bool is_none{language == KEYWORD_NONE || + (expanded_language.has_value() && + expanded_language.value() == KEYWORD_NONE)}; + auto language_items{into_array(JSON{*language_value})}; + for (auto &item : language_items.as_array()) { + if (item.is_null()) { + continue; + } + if (!item.is_string()) { + throw JSONLDError("Invalid language map value", + entry_pointer.concat(language)); + } + auto value{JSON::make_object()}; + value.assign_assume_new(JSON::String{KEYWORD_VALUE}, JSON{item}, + KEYWORD_VALUE_HASH); + if (!is_none) { + value.assign_assume_new(JSON::String{KEYWORD_LANGUAGE}, + JSON{language}, KEYWORD_LANGUAGE_HASH); + } + const auto direction{definition->has_direction + ? definition->direction + : value_context.default_direction}; + if (direction.has_value()) { + value.assign_assume_new(JSON::String{KEYWORD_DIRECTION}, + JSON{direction.value()}, + KEYWORD_DIRECTION_HASH); + } + expanded_value.push_back(std::move(value)); + } + } + } else if (entry.second.is_object() && + container_includes(definition, KEYWORD_INDEX)) { + const bool property_valued{definition->index.has_value() && + definition->index.value() != KEYWORD_INDEX}; + std::optional index_property; + if (property_valued) { + index_property = + expand_iri(state, value_context, definition->index.value(), false, + true, nullptr, nullptr, empty_weak_pointer); + } + expanded_value = JSON::make_array(); + for (const auto &[index_key, index_value] : + sorted_entries(entry.second)) { + const JSON::String &index{*index_key}; + auto index_items{ + into_array(expand(state, value_context, property, *index_value, + entry_pointer.concat(index)))}; + for (auto &item : index_items.as_array()) { + if (index != KEYWORD_NONE) { + if (property_valued) { + if (item.is_object() && + item.defines(KEYWORD_VALUE, KEYWORD_VALUE_HASH)) { + throw JSONLDError("Invalid value object", + entry_pointer.concat(index)); + } + // The index value is prepended to any existing values. + auto combined{into_array(expand_value( + state, value_context, definition->index, JSON{index}))}; + if (item.defines(index_property.value())) { + for (auto &existing : + item.at(index_property.value()).as_array()) { + combined.push_back(existing); + } + } + item.assign(index_property.value(), std::move(combined)); + } else if (!item.defines(KEYWORD_INDEX, KEYWORD_INDEX_HASH)) { + item.assign_assume_new(JSON::String{KEYWORD_INDEX}, JSON{index}, + KEYWORD_INDEX_HASH); + } + } + expanded_value.push_back(item); + } + } + } else if (entry.second.is_object() && + (container_includes(definition, KEYWORD_ID) || + container_includes(definition, KEYWORD_TYPE))) { + const bool by_id{container_includes(definition, KEYWORD_ID)}; + expanded_value = JSON::make_array(); + for (const auto &[map_key, map_value] : sorted_entries(entry.second)) { + const JSON::String &index{*map_key}; + std::optional expanded_index; + if (index != KEYWORD_NONE) { + expanded_index = + expand_iri(state, value_context, index, by_id, !by_id, nullptr, + nullptr, empty_weak_pointer); + } + // The key may be an alias of @none, which carries no identifier. + if (expanded_index.has_value() && + expanded_index.value() == KEYWORD_NONE) { + expanded_index = std::nullopt; + } + // A type map key may carry a type-scoped context for its values. + // Type-scoped contexts do not propagate, so the values are resolved + // against the context that preceded the containing type-scoped + // context, with only this key's context layered on top. + const ActiveContext &base_context{value_context.previous && !by_id + ? *value_context.previous + : value_context}; + ActiveContext entry_context{base_context}; + if (!by_id) { + // Resolve the type term against the context the copy was made from, + // which outlives the copy being mutated below. + const auto type_definition{base_context.terms.find(index)}; + if (type_definition != base_context.terms.cend() && + type_definition->second.context.has_value()) { + const auto saved_base{state.context_base_override}; + state.context_base_override = type_definition->second.context_base; + process_context(state, entry_context, + type_definition->second.context.value(), + entry_pointer.concat(index)); + state.context_base_override = saved_base; + entry_context.previous = nullptr; + } + } + // String values in a type map are node references. + auto entries{JSON::make_array()}; + auto raw_values{into_array(JSON{*map_value})}; + for (auto &raw : raw_values.as_array()) { + if (raw.is_string() && !by_id) { + auto reference{JSON::make_object()}; + const bool reference_vocab{definition->type_mapping.has_value() && + definition->type_mapping.value() == + KEYWORD_VOCAB}; + const auto &raw_string{raw.to_string()}; + const auto referenced{expand_iri(state, value_context, raw_string, + true, reference_vocab, nullptr, + nullptr, empty_weak_pointer)}; + reference.assign_assume_new(JSON::String{KEYWORD_ID}, + JSON{referenced.value_or(raw_string)}, + KEYWORD_ID_HASH); + entries.push_back(std::move(reference)); + } else { + auto expanded_items{ + into_array(expand(state, entry_context, property, raw, + entry_pointer.concat(index)))}; + for (auto &expanded : expanded_items.as_array()) { + entries.push_back(expanded); + } + } + } + for (auto &item : entries.as_array()) { + if (expanded_index.has_value()) { + if (by_id) { + if (!item.defines(KEYWORD_ID, KEYWORD_ID_HASH)) { + item.assign_assume_new(JSON::String{KEYWORD_ID}, + JSON{expanded_index.value()}, + KEYWORD_ID_HASH); + } + } else { + auto types{JSON::make_array()}; + types.push_back(JSON{expanded_index.value()}); + if (item.defines(KEYWORD_TYPE, KEYWORD_TYPE_HASH)) { + auto existing_types{into_array( + std::move(item.at(KEYWORD_TYPE, KEYWORD_TYPE_HASH)))}; + for (auto &existing : existing_types.as_array()) { + types.push_back(existing); + } + } + item.assign(KEYWORD_TYPE, std::move(types)); + } + } + expanded_value.push_back(item); + } + } + } else if (container_includes(definition, KEYWORD_GRAPH)) { + expanded_value = JSON::make_array(); + auto graph_items{into_array( + expand(state, value_context, property, entry.second, entry_pointer))}; + for (auto &item : graph_items.as_array()) { + auto graph{JSON::make_object()}; + graph.assign_assume_new(JSON::String{KEYWORD_GRAPH}, + into_array(std::move(item)), + KEYWORD_GRAPH_HASH); + expanded_value.push_back(std::move(graph)); + } + } else { + if (scoped && value_context.previous && entry.second.is_object() && + !entry.second.defines(KEYWORD_VALUE, KEYWORD_VALUE_HASH)) { + // A non-propagating property-scoped context applies to the immediate + // node, while nested nodes revert to the previous context. + expanded_value = expand_object(state, value_context, property, + entry.second, entry_pointer); + } else { + expanded_value = + expand(state, value_context, property, entry.second, entry_pointer); + } + } + + // A @list container wraps the expanded value, including a @json-coerced + // one. + if (container_includes(definition, KEYWORD_LIST) && + !expanded_value.is_null() && + !(expanded_value.is_object() && + expanded_value.defines(KEYWORD_LIST, KEYWORD_LIST_HASH))) { + expanded_value = into_array(std::move(expanded_value)); + if (state.processing_1_0) { + for (const auto &item : expanded_value.as_array()) { + if (item.is_object() && + item.defines(KEYWORD_LIST, KEYWORD_LIST_HASH)) { + throw JSONLDError("List of lists", entry_pointer); + } + } + } + auto wrapper{JSON::make_object()}; + wrapper.assign_assume_new(JSON::String{KEYWORD_LIST}, + std::move(expanded_value), KEYWORD_LIST_HASH); + expanded_value = std::move(wrapper); + } + + if (expanded_value.is_null()) { + continue; + } + + if (definition != nullptr && definition->reverse) { + const auto reverse_items{into_array(JSON{expanded_value})}; + for (const auto &item : reverse_items.as_array()) { + if (item.is_object() && + (item.defines(KEYWORD_VALUE, KEYWORD_VALUE_HASH) || + item.defines(KEYWORD_LIST, KEYWORD_LIST_HASH))) { + throw JSONLDError("Invalid reverse property value", entry_pointer); + } + } + const auto *existing_reverse{ + result.try_at(KEYWORD_REVERSE, KEYWORD_REVERSE_HASH)}; + auto reverse_map{existing_reverse != nullptr ? *existing_reverse + : JSON::make_object()}; + merge(reverse_map, name, into_array(std::move(expanded_value))); + result.assign(KEYWORD_REVERSE, std::move(reverse_map)); + continue; + } + + merge(result, name, into_array(std::move(expanded_value))); + } + for (const auto &[nest_property, nest] : nests) { + // A @nest alias term may carry a property-scoped context for the nested + // entries. + const WeakPointer nest_pointer{source_pointer.concat(*nest_property)}; + const auto definition{active_context.terms.find(*nest_property)}; + if (definition != active_context.terms.cend() && + definition->second.context.has_value()) { + // Process the scoped context into a copy so the term that owns it is not + // freed while it is being read. + ActiveContext nested{active_context}; + const auto saved_base{state.context_base_override}; + state.context_base_override = definition->second.context_base; + const auto saved_override{state.protected_override}; + state.protected_override = true; + process_context(state, nested, definition->second.context.value(), + nest_pointer); + state.protected_override = saved_override; + state.context_base_override = saved_base; + nested.previous = nullptr; + expand_entries(state, nested, type_context, result, active_property, + *nest, nest_pointer); + } else { + expand_entries(state, active_context, type_context, result, + active_property, *nest, nest_pointer); + } + } +} + +} // namespace + +// Expansion (JSON-LD 1.1 API Section 5.1.2) +auto expand(ExpansionState &state, ActiveContext &active_context, + const std::optional &active_property, + const JSON &element, const WeakPointer &pointer) -> JSON { + if (element.is_null()) { + return JSON{nullptr}; + } + + if (!element.is_object() && !element.is_array()) { + if (!active_property.has_value() || + active_property.value() == KEYWORD_GRAPH) { + return JSON{nullptr}; + } + return expand_value(state, active_context, active_property, element); + } + + if (element.is_array()) { + const TermDefinition *definition{nullptr}; + if (active_property.has_value()) { + const auto term{active_context.terms.find(active_property.value())}; + if (term != active_context.terms.cend()) { + definition = &term->second; + } + } + + auto result{JSON::make_array()}; + std::size_t item_index{0}; + for (const auto &item : element.as_array()) { + auto expanded{expand(state, active_context, active_property, item, + pointer.concat(item_index))}; + if (expanded.is_array()) { + for (auto &nested : expanded.as_array()) { + result.push_back(nested); + } + } else if (!expanded.is_null()) { + result.push_back(std::move(expanded)); + } + item_index += 1; + } + + if (container_includes(definition, KEYWORD_LIST)) { + if (state.processing_1_0) { + for (const auto &item : result.as_array()) { + if (item.is_object() && + item.defines(KEYWORD_LIST, KEYWORD_LIST_HASH)) { + throw JSONLDError("List of lists", pointer); + } + } + } + auto wrapper{JSON::make_object()}; + wrapper.assign_assume_new(JSON::String{KEYWORD_LIST}, std::move(result), + KEYWORD_LIST_HASH); + return wrapper; + } + + return result; + } + + // Revert a non-propagating (type-scoped) context when descending into a node + // object that is neither a value object nor an @id-only reference. + ActiveContext reverted; + ActiveContext *current{&active_context}; + if (active_context.previous) { + bool value_or_id{false}; + const bool single{element.object_size() == 1}; + for (const auto &entry : element.as_object()) { + const auto expanded{expand_iri(state, active_context, entry.first, false, + true, nullptr, nullptr, + empty_weak_pointer)}; + if (expanded.has_value() && + (expanded.value() == KEYWORD_VALUE || + (single && expanded.value() == KEYWORD_ID))) { + value_or_id = true; + break; + } + } + if (!value_or_id) { + reverted = *active_context.previous; + current = &reverted; + } + } + + if (element.defines(KEYWORD_CONTEXT, KEYWORD_CONTEXT_HASH)) { + ActiveContext local{*current}; + process_context(state, local, + element.at(KEYWORD_CONTEXT, KEYWORD_CONTEXT_HASH), + pointer.concat(keyword_context())); + return expand_object(state, local, active_property, element, pointer); + } + + return expand_object(state, *current, active_property, element, pointer); +} + +} // namespace sourcemeta::core diff --git a/vendor/core/src/core/jsonld/jsonld_flatten.cc b/vendor/core/src/core/jsonld/jsonld_flatten.cc new file mode 100644 index 00000000..648aedd3 --- /dev/null +++ b/vendor/core/src/core/jsonld/jsonld_flatten.cc @@ -0,0 +1,82 @@ +#include + +#include "jsonld_algorithms.h" +#include "jsonld_keywords.h" + +#include // std::ranges::sort +#include // std::nullopt +#include // std::move +#include // std::vector + +namespace sourcemeta::core { + +namespace { + +// The node with the only entry @id carries no statements, so it is dropped. +auto is_reference_only(const JSON &node) -> bool { + return node.object_size() == 1 && node.defines(KEYWORD_ID, KEYWORD_ID_HASH); +} + +// The identifiers of a graph's nodes in lexicographical order. The views are +// backed by the graph keys, so the graph must outlive the result. +auto sorted_ids(const JSON &graph) -> std::vector { + std::vector ids; + ids.reserve(graph.object_size()); + for (const auto &entry : graph.as_object()) { + ids.push_back(entry.first); + } + std::ranges::sort(ids); + return ids; +} + +} // namespace + +auto flatten(BlankNodeState &state, const JSON &element) -> JSON { + auto node_map{JSON::make_object()}; + node_map.assign_assume_new(JSON::String{KEYWORD_DEFAULT}, JSON::make_object(), + KEYWORD_DEFAULT_HASH); + generate_node_map(state, node_map, element, KEYWORD_DEFAULT, nullptr, + std::nullopt, nullptr); + + // Fold every named graph into a node of the default graph as an @graph entry. + std::vector graph_names; + for (const auto &entry : node_map.as_object()) { + if (entry.first != KEYWORD_DEFAULT) { + graph_names.push_back(entry.first); + } + } + std::ranges::sort(graph_names); + + auto &default_graph{node_map.at(KEYWORD_DEFAULT, KEYWORD_DEFAULT_HASH)}; + for (const auto graph_name : graph_names) { + if (!default_graph.defines(graph_name)) { + auto node{JSON::make_object()}; + node.assign_assume_new(JSON::String{KEYWORD_ID}, JSON{graph_name}, + KEYWORD_ID_HASH); + default_graph.assign_assume_new(JSON::String{graph_name}, + std::move(node)); + } + + auto graph_array{JSON::make_array()}; + const auto &graph{node_map.at(graph_name)}; + for (const auto &id : sorted_ids(graph)) { + const auto &node{graph.at(id)}; + if (!is_reference_only(node)) { + graph_array.push_back(node); + } + } + default_graph.at(graph_name) + .assign(JSON::String{KEYWORD_GRAPH}, std::move(graph_array)); + } + + auto flattened{JSON::make_array()}; + for (const auto &id : sorted_ids(default_graph)) { + const auto &node{default_graph.at(id)}; + if (!is_reference_only(node)) { + flattened.push_back(node); + } + } + return flattened; +} + +} // namespace sourcemeta::core diff --git a/vendor/core/src/core/jsonld/jsonld_inverse_context.cc b/vendor/core/src/core/jsonld/jsonld_inverse_context.cc new file mode 100644 index 00000000..1c1f24ed --- /dev/null +++ b/vendor/core/src/core/jsonld/jsonld_inverse_context.cc @@ -0,0 +1,147 @@ +#include +#include + +#include "jsonld_algorithms.h" +#include "jsonld_keywords.h" + +#include // std::sort +#include // std::vector + +namespace sourcemeta::core { + +namespace { + +// The text module only lowercases in place, so this returns a lowercased copy. +auto lowercase(const JSON::StringView value) -> JSON::String { + JSON::String result{value}; + to_lowercase(result); + return result; +} + +auto assign_if_missing(JSON &map, const JSON::StringView key, + const JSON::String &term) -> void { + if (!map.defines(key)) { + map.assign_assume_new(JSON::String{key}, JSON{term}); + } +} + +} // namespace + +auto create_inverse_context(const ActiveContext &active_context) -> JSON { + auto result{JSON::make_object()}; + + // When a default base direction is set, the default language is the language + // and direction joined by an underscore, even if there is no default + // language (Section 4.3.1). + JSON::String default_language{KEYWORD_NONE}; + if (active_context.default_direction.has_value()) { + default_language = active_context.default_language.has_value() + ? lowercase(active_context.default_language.value()) + : JSON::String{}; + default_language += "_"; + default_language += lowercase(active_context.default_direction.value()); + } else if (active_context.default_language.has_value()) { + default_language = lowercase(active_context.default_language.value()); + } + + // Terms are processed shortest first, breaking ties lexicographically, so + // that shorter and earlier terms win when several map to the same IRI. + std::vector terms; + terms.reserve(active_context.terms.size()); + for (const auto &entry : active_context.terms) { + terms.push_back(entry.first); + } + std::ranges::sort( + terms, [](const JSON::String &left, const JSON::String &right) -> bool { + if (left.size() != right.size()) { + return left.size() < right.size(); + } + return left < right; + }); + + for (const auto &term : terms) { + const auto &definition{active_context.terms.at(term)}; + if (!definition.iri.has_value()) { + continue; + } + + JSON::String container{KEYWORD_NONE}; + if (!definition.container.empty()) { + auto ordered{definition.container}; + std::ranges::sort(ordered); + container.clear(); + for (const auto &item : ordered) { + container += item; + } + } + + const auto &variable{definition.iri.value()}; + if (!result.defines(variable)) { + result.assign_assume_new(variable, JSON::make_object()); + } + auto &container_map{result.at(variable)}; + if (!container_map.defines(container)) { + auto type_language_map{JSON::make_object()}; + type_language_map.assign_assume_new(JSON::String{KEYWORD_LANGUAGE}, + JSON::make_object(), + KEYWORD_LANGUAGE_HASH); + type_language_map.assign_assume_new( + JSON::String{KEYWORD_TYPE}, JSON::make_object(), KEYWORD_TYPE_HASH); + auto any_map{JSON::make_object()}; + any_map.assign_assume_new(JSON::String{KEYWORD_NONE}, JSON{term}, + KEYWORD_NONE_HASH); + type_language_map.assign_assume_new(JSON::String{KEYWORD_ANY}, + std::move(any_map), KEYWORD_ANY_HASH); + container_map.assign_assume_new(container, std::move(type_language_map)); + } + auto &type_language_map{container_map.at(container)}; + auto &type_map{type_language_map.at(KEYWORD_TYPE, KEYWORD_TYPE_HASH)}; + auto &language_map{ + type_language_map.at(KEYWORD_LANGUAGE, KEYWORD_LANGUAGE_HASH)}; + + if (definition.reverse) { + assign_if_missing(type_map, KEYWORD_REVERSE, term); + } else if (definition.type_mapping.has_value() && + definition.type_mapping.value() == KEYWORD_NONE) { + assign_if_missing(language_map, KEYWORD_ANY, term); + assign_if_missing(type_map, KEYWORD_ANY, term); + } else if (definition.type_mapping.has_value()) { + assign_if_missing(type_map, definition.type_mapping.value(), term); + } else if (definition.has_language && definition.has_direction) { + JSON::String key; + if (definition.language.has_value() && definition.direction.has_value()) { + key = lowercase(definition.language.value()); + key += "_"; + key += lowercase(definition.direction.value()); + } else if (definition.language.has_value()) { + key = lowercase(definition.language.value()); + } else if (definition.direction.has_value()) { + key = "_"; + key += lowercase(definition.direction.value()); + } else { + key = KEYWORD_NULL; + } + assign_if_missing(language_map, key, term); + } else if (definition.has_language) { + const JSON::String key{definition.language.has_value() + ? lowercase(definition.language.value()) + : JSON::String{KEYWORD_NULL}}; + assign_if_missing(language_map, key, term); + } else if (definition.has_direction) { + JSON::String key{KEYWORD_NONE}; + if (definition.direction.has_value()) { + key = "_"; + key += lowercase(definition.direction.value()); + } + assign_if_missing(language_map, key, term); + } else { + assign_if_missing(language_map, default_language, term); + assign_if_missing(language_map, KEYWORD_NONE, term); + assign_if_missing(type_map, KEYWORD_NONE, term); + } + } + + return result; +} + +} // namespace sourcemeta::core diff --git a/vendor/core/src/core/jsonld/jsonld_iri_compaction.cc b/vendor/core/src/core/jsonld/jsonld_iri_compaction.cc new file mode 100644 index 00000000..5da2a502 --- /dev/null +++ b/vendor/core/src/core/jsonld/jsonld_iri_compaction.cc @@ -0,0 +1,387 @@ +#include +#include +#include + +#include "jsonld_algorithms.h" +#include "jsonld_keywords.h" + +#include // std::optional, std::nullopt +#include // std::vector + +namespace sourcemeta::core { + +namespace { + +// The text module only lowercases in place, so this returns a lowercased copy. +auto lowercase(const JSON::StringView value) -> JSON::String { + JSON::String result{value}; + to_lowercase(result); + return result; +} + +// Term Selection (JSON-LD 1.1 API Section 4.3.4) +auto term_selection(const JSON &inverse_context, const JSON::String &variable, + const std::vector &containers, + const JSON::StringView type_language, + const std::vector &preferred) + -> std::optional { + const auto &container_map{inverse_context.at(variable)}; + for (const auto &container : containers) { + if (!container_map.defines(container)) { + continue; + } + const auto &type_language_map{container_map.at(container)}; + const auto &value_map{type_language_map.at(type_language)}; + for (const auto &item : preferred) { + if (value_map.defines(item)) { + return value_map.at(item).to_string(); + } + } + } + return std::nullopt; +} + +// Relativise an absolute IRI against the base IRI. +auto relativize(const JSON::String &iri, const JSON::String &base) + -> JSON::String { + const auto reference{URI::from_iri(base)}; + auto relative{URI::from_iri(iri).relative_to(reference).recompose()}; + // An IRI equal to the base relativises to an empty reference, but JSON-LD + // expresses it as the base's last path segment. + if (relative.empty()) { + if (!reference.path().has_value()) { + return relative; + } + const auto path{reference.path().value()}; + const auto slash{path.rfind('/')}; + return JSON::String{ + slash == JSON::StringView::npos ? path : path.substr(slash + 1)}; + } + // Guard a keyword-like first segment so it is not read back as a keyword. + if (relative.front() == '@') { + return JSON::String{"./"} + relative; + } + return relative; +} + +} // namespace + +auto compact_iri(const ActiveContext &active_context, + const JSON &inverse_context, const JSON::String &variable, + const JSON *const value, const bool vocabulary, + const bool reverse) -> JSON::String { + if (variable.empty()) { + return variable; + } + + if (vocabulary && inverse_context.defines(variable)) { + // When a default base direction is set, the default language is the + // language and direction joined by an underscore, even if there is no + // default language (Section 6.2.3). + JSON::String default_language{KEYWORD_NONE}; + if (active_context.default_direction.has_value()) { + default_language = + active_context.default_language.has_value() + ? lowercase(active_context.default_language.value()) + : JSON::String{}; + default_language += "_"; + default_language += lowercase(active_context.default_direction.value()); + } else if (active_context.default_language.has_value()) { + default_language = lowercase(active_context.default_language.value()); + } + + std::vector containers; + JSON::String type_language{KEYWORD_LANGUAGE}; + JSON::String type_language_value{KEYWORD_NULL}; + + const bool is_object{value != nullptr && value->is_object()}; + const bool has_index{is_object && + value->defines(KEYWORD_INDEX, KEYWORD_INDEX_HASH)}; + const bool is_graph{is_object && + value->defines(KEYWORD_GRAPH, KEYWORD_GRAPH_HASH)}; + + if (has_index && !is_graph) { + containers.emplace_back(KEYWORD_INDEX); + containers.emplace_back(JSON::String{KEYWORD_INDEX} + + JSON::String{KEYWORD_SET}); + } + + if (reverse) { + type_language = KEYWORD_TYPE; + type_language_value = KEYWORD_REVERSE; + containers.emplace_back(KEYWORD_SET); + containers.emplace_back(KEYWORD_NONE); + // A reverse value that is a node without an explicit index may still + // match a reverse term that declares an index container. + if (!has_index) { + containers.emplace_back(KEYWORD_INDEX); + containers.emplace_back(JSON::String{KEYWORD_INDEX} + + JSON::String{KEYWORD_SET}); + } + } else if (is_graph) { + const JSON::String graph{KEYWORD_GRAPH}; + const JSON::String set{KEYWORD_SET}; + // A graph object may match any graph container the term declares, mapping + // by @id or @index when present and by @none otherwise. + containers.emplace_back(graph + JSON::String{KEYWORD_ID}); + containers.emplace_back(graph + JSON::String{KEYWORD_ID} + set); + containers.emplace_back(graph + JSON::String{KEYWORD_INDEX}); + containers.emplace_back(graph + JSON::String{KEYWORD_INDEX} + set); + containers.emplace_back(graph); + containers.emplace_back(graph + set); + if (has_index) { + containers.emplace_back(KEYWORD_INDEX); + containers.emplace_back(JSON::String{KEYWORD_INDEX} + set); + } + containers.emplace_back(KEYWORD_SET); + containers.emplace_back(KEYWORD_NONE); + type_language = KEYWORD_TYPE; + type_language_value = KEYWORD_ID; + } else if (is_object && value->defines(KEYWORD_LIST, KEYWORD_LIST_HASH)) { + if (!has_index) { + containers.emplace_back(KEYWORD_LIST); + } + const auto &list{value->at(KEYWORD_LIST, KEYWORD_LIST_HASH)}; + std::optional common_type; + std::optional common_language; + if (list.empty()) { + common_language = default_language; + } + for (const auto &item : list.as_array()) { + JSON::String item_language{KEYWORD_NONE}; + JSON::String item_type{KEYWORD_NONE}; + if (item.is_object() && + item.defines(KEYWORD_VALUE, KEYWORD_VALUE_HASH)) { + const bool item_has_language{ + item.defines(KEYWORD_LANGUAGE, KEYWORD_LANGUAGE_HASH)}; + const bool item_has_direction{ + item.defines(KEYWORD_DIRECTION, KEYWORD_DIRECTION_HASH)}; + if (item_has_language && item_has_direction) { + item_language = + lowercase(item.at(KEYWORD_LANGUAGE, KEYWORD_LANGUAGE_HASH) + .to_string()) + + "_" + + lowercase(item.at(KEYWORD_DIRECTION, KEYWORD_DIRECTION_HASH) + .to_string()); + } else if (item_has_language) { + item_language = lowercase( + item.at(KEYWORD_LANGUAGE, KEYWORD_LANGUAGE_HASH).to_string()); + } else if (item_has_direction) { + item_language = "_" + lowercase(item.at(KEYWORD_DIRECTION, + KEYWORD_DIRECTION_HASH) + .to_string()); + } else if (item.defines(KEYWORD_TYPE, KEYWORD_TYPE_HASH)) { + item_type = item.at(KEYWORD_TYPE, KEYWORD_TYPE_HASH).to_string(); + } else { + item_language = KEYWORD_NULL; + } + } else { + item_type = KEYWORD_ID; + } + if (!common_language.has_value()) { + common_language = item_language; + } else if (common_language.value() != item_language && + item.is_object() && + item.defines(KEYWORD_VALUE, KEYWORD_VALUE_HASH)) { + common_language = JSON::String{KEYWORD_NONE}; + } + if (!common_type.has_value()) { + common_type = item_type; + } else if (common_type.value() != item_type) { + common_type = JSON::String{KEYWORD_NONE}; + } + } + if (!common_language.has_value()) { + common_language = JSON::String{KEYWORD_NONE}; + } + if (!common_type.has_value()) { + common_type = JSON::String{KEYWORD_NONE}; + } + if (common_type.value() != KEYWORD_NONE) { + type_language = KEYWORD_TYPE; + type_language_value = common_type.value(); + } else { + type_language_value = common_language.value(); + } + // An empty list matches a term via the @any fallback. + if (list.empty()) { + type_language = KEYWORD_ANY; + } + // Fall back to an exact-match term when no list-container term applies. + containers.emplace_back(KEYWORD_NONE); + } else if (is_object && value->defines(KEYWORD_VALUE, KEYWORD_VALUE_HASH)) { + const bool has_language{ + value->defines(KEYWORD_LANGUAGE, KEYWORD_LANGUAGE_HASH)}; + const bool has_direction{ + value->defines(KEYWORD_DIRECTION, KEYWORD_DIRECTION_HASH)}; + const JSON::String language_set{JSON::String{KEYWORD_LANGUAGE} + + JSON::String{KEYWORD_SET}}; + if (has_direction && !has_index) { + type_language_value = + (has_language + ? lowercase(value->at(KEYWORD_LANGUAGE, KEYWORD_LANGUAGE_HASH) + .to_string()) + : JSON::String{}) + + "_" + + lowercase(value->at(KEYWORD_DIRECTION, KEYWORD_DIRECTION_HASH) + .to_string()); + containers.emplace_back(KEYWORD_LANGUAGE); + containers.emplace_back(language_set); + } else if (has_language && !has_index) { + type_language_value = lowercase( + value->at(KEYWORD_LANGUAGE, KEYWORD_LANGUAGE_HASH).to_string()); + containers.emplace_back(KEYWORD_LANGUAGE); + containers.emplace_back(language_set); + } else if (value->defines(KEYWORD_TYPE, KEYWORD_TYPE_HASH)) { + type_language = KEYWORD_TYPE; + type_language_value = + value->at(KEYWORD_TYPE, KEYWORD_TYPE_HASH).to_string(); + } + containers.emplace_back(KEYWORD_SET); + containers.emplace_back(KEYWORD_NONE); + if (!has_index) { + containers.emplace_back(KEYWORD_INDEX); + containers.emplace_back(JSON::String{KEYWORD_INDEX} + + JSON::String{KEYWORD_SET}); + } + // A value object carrying only @value also matches a language container + // term as a last resort, after a no-container term has been considered. + if (value->object_size() == 1) { + containers.emplace_back(KEYWORD_LANGUAGE); + containers.emplace_back(language_set); + } + } else { + type_language = KEYWORD_TYPE; + type_language_value = KEYWORD_ID; + containers.emplace_back(KEYWORD_ID); + containers.emplace_back(JSON::String{KEYWORD_ID} + + JSON::String{KEYWORD_SET}); + containers.emplace_back(KEYWORD_TYPE); + // The inverse context stores container keys sorted, so @set precedes + // @type. + containers.emplace_back(JSON::String{KEYWORD_SET} + + JSON::String{KEYWORD_TYPE}); + containers.emplace_back(KEYWORD_INDEX); + containers.emplace_back(JSON::String{KEYWORD_INDEX} + + JSON::String{KEYWORD_SET}); + containers.emplace_back(KEYWORD_SET); + containers.emplace_back(KEYWORD_NONE); + } + + std::vector preferred; + if (type_language_value == KEYWORD_REVERSE) { + preferred.emplace_back(KEYWORD_REVERSE); + } + if ((type_language_value == KEYWORD_ID || + type_language_value == KEYWORD_REVERSE) && + is_object && value->defines(KEYWORD_ID, KEYWORD_ID_HASH)) { + const auto &identifier{value->at(KEYWORD_ID, KEYWORD_ID_HASH)}; + if (identifier.is_string()) { + const auto compacted_id{compact_iri(active_context, inverse_context, + identifier.to_string(), nullptr, + true, false)}; + const auto iterator{active_context.terms.find(compacted_id)}; + if (iterator != active_context.terms.cend() && + iterator->second.iri.has_value() && + iterator->second.iri.value() == identifier.to_string()) { + preferred.emplace_back(KEYWORD_VOCAB); + preferred.emplace_back(KEYWORD_ID); + preferred.emplace_back(KEYWORD_NONE); + } else { + preferred.emplace_back(KEYWORD_ID); + preferred.emplace_back(KEYWORD_VOCAB); + preferred.emplace_back(KEYWORD_NONE); + } + } + } else if (value == nullptr) { + // A bare vocabulary reference may match a term that coerces @id or + // @vocab. + preferred.emplace_back(KEYWORD_ID); + preferred.emplace_back(KEYWORD_VOCAB); + preferred.emplace_back(KEYWORD_NONE); + } else { + preferred.emplace_back(type_language_value); + preferred.emplace_back(KEYWORD_NONE); + } + preferred.emplace_back(KEYWORD_ANY); + + // A language-and-direction preferred value also allows matching a term that + // only coerces direction, via the substring from the underscore onwards. + std::vector direction_fallbacks; + for (const auto &item : preferred) { + const auto underscore{item.find('_')}; + if (underscore != JSON::String::npos) { + direction_fallbacks.emplace_back(item.substr(underscore)); + } + } + for (auto &fallback : direction_fallbacks) { + preferred.push_back(std::move(fallback)); + } + + const auto term{term_selection(inverse_context, variable, containers, + type_language, preferred)}; + if (term.has_value()) { + return term.value(); + } + } + + // Compact relative to @vocab. + if (vocabulary && active_context.vocabulary.has_value()) { + const auto &vocab{active_context.vocabulary.value()}; + if (variable.size() > vocab.size() && variable.starts_with(vocab)) { + const auto suffix{variable.substr(vocab.size())}; + if (active_context.terms.find(suffix) == active_context.terms.cend()) { + return JSON::String{suffix}; + } + } + } + + // Compact into a prefix:suffix compact IRI. + std::optional candidate; + for (const auto &entry : active_context.terms) { + const auto &term{entry.first}; + const auto &definition{entry.second}; + if (!definition.iri.has_value() || definition.iri.value() == variable || + !definition.prefix || !variable.starts_with(definition.iri.value())) { + continue; + } + auto compact{term}; + compact += ":"; + compact += variable.substr(definition.iri.value().size()); + const auto existing{active_context.terms.find(compact)}; + const bool usable{existing == active_context.terms.cend() || + (value == nullptr && existing->second.iri.has_value() && + existing->second.iri.value() == variable)}; + if (usable && + (!candidate.has_value() || compact.size() < candidate.value().size() || + (compact.size() == candidate.value().size() && + compact < candidate.value()))) { + candidate = compact; + } + } + if (candidate.has_value()) { + return candidate.value(); + } + + // An absolute IRI whose scheme matches a prefix term and that has no + // authority would be misread as a compact IRI (Section 6.2.3). + const auto colon{variable.find(':')}; + if (colon != JSON::String::npos && + variable.compare(colon + 1, 2, "//") != 0) { + const auto scheme{variable.substr(0, colon)}; + const auto iterator{active_context.terms.find(scheme)}; + if (iterator != active_context.terms.cend() && iterator->second.prefix) { + throw JSONLDError("IRI confused with prefix", empty_weak_pointer); + } + } + + // Relativise an absolute IRI against the base IRI. + if (!vocabulary && active_context.compact_to_relative && + active_context.base.has_value() && URI::is_iri(variable)) { + return relativize(variable, active_context.base.value()); + } + + return variable; +} + +} // namespace sourcemeta::core diff --git a/vendor/core/src/core/jsonld/jsonld_iri_expansion.cc b/vendor/core/src/core/jsonld/jsonld_iri_expansion.cc new file mode 100644 index 00000000..736485ad --- /dev/null +++ b/vendor/core/src/core/jsonld/jsonld_iri_expansion.cc @@ -0,0 +1,87 @@ +#include "jsonld_algorithms.h" +#include "jsonld_keywords.h" + +#include + +#include // std::optional + +namespace sourcemeta::core { + +// IRI Expansion (JSON-LD 1.1 API Section 5.2) +auto expand_iri(ExpansionState &state, ActiveContext &active_context, + const JSON::String &value, const bool document_relative, + const bool vocabulary, const JSON *const local_context, + DefinedTerms *const defined, const WeakPointer &context_pointer) + -> std::optional { + if (is_keyword(value)) { + return value; + } + + if (has_keyword_form(value)) { + return std::nullopt; + } + + if (local_context != nullptr && defined != nullptr && + local_context->is_object() && local_context->defines(value)) { + const auto iterator{defined->find(value)}; + if (iterator == defined->cend() || !iterator->second) { + create_term_definition(state, active_context, *local_context, value, + *defined, context_pointer); + } + } + + const auto term{active_context.terms.find(value)}; + if (term != active_context.terms.cend() && term->second.iri.has_value() && + is_keyword(term->second.iri.value())) { + return term->second.iri; + } + + if (vocabulary && term != active_context.terms.cend()) { + return term->second.iri; + } + + if (value.find(':', 1) != JSON::String::npos) { + // The term has the form of a compact IRI, so split at the first colon. + const auto colon{value.find(':')}; + const auto prefix{value.substr(0, colon)}; + const auto suffix{value.substr(colon + 1)}; + if (prefix == "_" || suffix.starts_with("//")) { + return value; + } + + if (local_context != nullptr && defined != nullptr && + local_context->is_object() && local_context->defines(prefix)) { + const auto iterator{defined->find(prefix)}; + if (iterator == defined->cend() || !iterator->second) { + create_term_definition(state, active_context, *local_context, prefix, + *defined, context_pointer); + } + } + + const auto definition{active_context.terms.find(prefix)}; + if (definition != active_context.terms.cend() && + definition->second.iri.has_value() && definition->second.prefix) { + return definition->second.iri.value() + suffix; + } + + // The value is only already an IRI when its prefix is a valid scheme. + // Otherwise it is resolved against the vocabulary or document base below. + if (URI::is_scheme(prefix)) { + return value; + } + } + + if (vocabulary && active_context.vocabulary.has_value()) { + return active_context.vocabulary.value() + value; + } + + if (document_relative && active_context.base.has_value()) { + return URI::from_iri(value) + .resolve_from(URI::from_iri(active_context.base.value())) + .recompose(); + } + + return value; +} + +} // namespace sourcemeta::core diff --git a/vendor/core/src/core/jsonld/jsonld_is_expanded.cc b/vendor/core/src/core/jsonld/jsonld_is_expanded.cc new file mode 100644 index 00000000..154aab25 --- /dev/null +++ b/vendor/core/src/core/jsonld/jsonld_is_expanded.cc @@ -0,0 +1,239 @@ +#include +#include +#include +#include + +#include "jsonld_keywords.h" + +#include // std::uint8_t +#include // std::string_view +#include // std::pair +#include // std::vector + +namespace sourcemeta::core { + +namespace { + +// A blank node identifier is the prefix "_:" followed by a label (JSON-LD 1.1 +// Section 3.5). +auto is_blank_node(const std::string_view value) -> bool { + return value.size() > 2 && value.starts_with("_:"); +} + +// A property term in expanded form is an absolute IRI or a blank node +// identifier (JSON-LD 1.1 Section 9, "node object"). +auto is_term(const std::string_view value) -> bool { + return URI::is_iri(value) || is_blank_node(value); +} + +// @id and @type carry IRI references, which may be relative, or blank node +// identifiers (JSON-LD 1.1 Section 9, "node object"). +auto is_reference(const std::string_view value) -> bool { + return URI::is_iri_reference(value) || is_blank_node(value); +} + +// What a position is expected to be, so that the traversal can validate it +// without recursion: a node object, or a property array item that is a value, +// node, list, or set object. +enum class Expect : std::uint8_t { Node, Item }; + +using Pending = std::vector>; + +// "A value object MUST NOT contain any other entries" beyond @value, @type, +// @language, @direction, and @index, and "MUST NOT contain both @type and +// @language" (JSON-LD 1.1 Section 9, "value object"). A value object is a leaf: +// it carries no positions to traverse further. +auto is_value_object(const JSON &value) -> bool { + for (const auto &entry : value.as_object()) { + if (!entry.key_equals(KEYWORD_VALUE, KEYWORD_VALUE_HASH) && + !entry.key_equals(KEYWORD_TYPE, KEYWORD_TYPE_HASH) && + !entry.key_equals(KEYWORD_LANGUAGE, KEYWORD_LANGUAGE_HASH) && + !entry.key_equals(KEYWORD_DIRECTION, KEYWORD_DIRECTION_HASH) && + !entry.key_equals(KEYWORD_INDEX, KEYWORD_INDEX_HASH)) { + return false; + } + } + + const auto *const contents{value.try_at(KEYWORD_VALUE, KEYWORD_VALUE_HASH)}; + if (contents == nullptr) { + return false; + } + + const auto *const type{value.try_at(KEYWORD_TYPE, KEYWORD_TYPE_HASH)}; + const auto *const language{ + value.try_at(KEYWORD_LANGUAGE, KEYWORD_LANGUAGE_HASH)}; + const auto *const direction{ + value.try_at(KEYWORD_DIRECTION, KEYWORD_DIRECTION_HASH)}; + + if (type != nullptr && (language != nullptr || direction != nullptr)) { + return false; + } + + // When the datatype is @json the value is preserved verbatim, otherwise it is + // a scalar literal. + if (type != nullptr) { + if (!type->is_string()) { + return false; + } + if (type->to_string() != KEYWORD_JSON && + (!URI::is_iri(type->to_string()) || contents->is_object() || + contents->is_array())) { + return false; + } + } else if (contents->is_object() || contents->is_array()) { + return false; + } + + if (language != nullptr && + (!language->is_string() || !is_langtag(language->to_string()) || + !contents->is_string())) { + return false; + } + + if (direction != nullptr && + (!direction->is_string() || + (direction->to_string() != "ltr" && direction->to_string() != "rtl") || + !contents->is_string())) { + return false; + } + + const auto *const index{value.try_at(KEYWORD_INDEX, KEYWORD_INDEX_HASH)}; + return index == nullptr || index->is_string(); +} + +// Validate one node object, appending its descendant positions to the traversal +// rather than recursing. "A node object MUST NOT contain the @value, @list, or +// @set keywords" (JSON-LD 1.1 Section 9, "node object"). @id and @type carry +// IRI references, which may be relative, whereas property terms are absolute +// IRIs or blank node identifiers. +auto validate_node(const JSON &value, Pending &pending) -> bool { + if (!value.is_object()) { + return false; + } + + for (const auto &entry : value.as_object()) { + const JSON::StringView key{entry.first}; + if (!key.empty() && key.front() == '@') { + if (entry.key_equals(KEYWORD_ID, KEYWORD_ID_HASH)) { + if (!entry.second.is_string() || + !is_reference(entry.second.to_string())) { + return false; + } + } else if (entry.key_equals(KEYWORD_INDEX, KEYWORD_INDEX_HASH)) { + if (!entry.second.is_string()) { + return false; + } + } else if (entry.key_equals(KEYWORD_TYPE, KEYWORD_TYPE_HASH)) { + if (!entry.second.is_array()) { + return false; + } + for (const auto &item : entry.second.as_array()) { + if (!item.is_string() || !is_reference(item.to_string())) { + return false; + } + } + } else if (entry.key_equals(KEYWORD_GRAPH, KEYWORD_GRAPH_HASH) || + entry.key_equals(KEYWORD_INCLUDED, KEYWORD_INCLUDED_HASH)) { + if (!entry.second.is_array()) { + return false; + } + for (const auto &item : entry.second.as_array()) { + pending.emplace_back(&item, Expect::Node); + } + } else if (entry.key_equals(KEYWORD_REVERSE, KEYWORD_REVERSE_HASH)) { + if (!entry.second.is_object()) { + return false; + } + for (const auto &reverse : entry.second.as_object()) { + if (!is_term(reverse.first) || !reverse.second.is_array()) { + return false; + } + for (const auto &item : reverse.second.as_array()) { + pending.emplace_back(&item, Expect::Node); + } + } + } else { + return false; + } + } else if (!is_term(key) || !entry.second.is_array()) { + return false; + } else { + // "The value of an expanded property is an array" (JSON-LD 1.1 Section 9) + // whose entries are value, node, list, or set objects. + for (const auto &item : entry.second.as_array()) { + pending.emplace_back(&item, Expect::Item); + } + } + } + + return true; +} + +// Validate one list or set object, appending its entries to the traversal. It +// "MUST NOT contain any other entries" beyond its defining keyword and an +// optional @index (JSON-LD 1.1 Section 9, "list object" and "set object"). +auto validate_list_or_set(const JSON &value, const JSON::StringView keyword, + const JSON::Object::hash_type keyword_hash, + Pending &pending) -> bool { + for (const auto &entry : value.as_object()) { + if (entry.key_equals(keyword, keyword_hash)) { + if (!entry.second.is_array()) { + return false; + } + for (const auto &item : entry.second.as_array()) { + pending.emplace_back(&item, Expect::Item); + } + } else if (entry.key_equals(KEYWORD_INDEX, KEYWORD_INDEX_HASH)) { + if (!entry.second.is_string()) { + return false; + } + } else { + return false; + } + } + return true; +} + +// Validate one property array item, dispatching on its kind. +auto validate_item(const JSON &value, Pending &pending) -> bool { + if (!value.is_object()) { + return false; + } + if (value.defines(KEYWORD_VALUE, KEYWORD_VALUE_HASH)) { + return is_value_object(value); + } + if (value.defines(KEYWORD_LIST, KEYWORD_LIST_HASH)) { + return validate_list_or_set(value, KEYWORD_LIST, KEYWORD_LIST_HASH, + pending); + } + if (value.defines(KEYWORD_SET, KEYWORD_SET_HASH)) { + return validate_list_or_set(value, KEYWORD_SET, KEYWORD_SET_HASH, pending); + } + return validate_node(value, pending); +} + +} // namespace + +auto jsonld_is_expanded(const JSON &document) -> bool { + if (!document.is_array()) { + return false; + } + + Pending pending; + for (const auto &item : document.as_array()) { + pending.emplace_back(&item, Expect::Node); + } + + while (!pending.empty()) { + const auto [value, expect] = pending.back(); + pending.pop_back(); + if (!(expect == Expect::Node ? validate_node(*value, pending) + : validate_item(*value, pending))) { + return false; + } + } + + return true; +} + +} // namespace sourcemeta::core diff --git a/vendor/core/src/core/jsonld/jsonld_keywords.h b/vendor/core/src/core/jsonld/jsonld_keywords.h new file mode 100644 index 00000000..d84fde5a --- /dev/null +++ b/vendor/core/src/core/jsonld/jsonld_keywords.h @@ -0,0 +1,134 @@ +#ifndef SOURCEMETA_CORE_JSONLD_KEYWORDS_H_ +#define SOURCEMETA_CORE_JSONLD_KEYWORDS_H_ + +#include +#include + +#include // std::size_t + +namespace sourcemeta::core { + +// The JSON-LD 1.1 keywords (https://www.w3.org/TR/json-ld11/#keywords). +inline constexpr JSON::StringView KEYWORD_BASE{"@base"}; +inline constexpr JSON::StringView KEYWORD_CONTAINER{"@container"}; +inline constexpr JSON::StringView KEYWORD_CONTEXT{"@context"}; +inline constexpr JSON::StringView KEYWORD_DIRECTION{"@direction"}; +inline constexpr JSON::StringView KEYWORD_GRAPH{"@graph"}; +inline constexpr JSON::StringView KEYWORD_ID{"@id"}; +inline constexpr JSON::StringView KEYWORD_IMPORT{"@import"}; +inline constexpr JSON::StringView KEYWORD_INCLUDED{"@included"}; +inline constexpr JSON::StringView KEYWORD_INDEX{"@index"}; +inline constexpr JSON::StringView KEYWORD_JSON{"@json"}; +inline constexpr JSON::StringView KEYWORD_LANGUAGE{"@language"}; +inline constexpr JSON::StringView KEYWORD_LIST{"@list"}; +inline constexpr JSON::StringView KEYWORD_NEST{"@nest"}; +inline constexpr JSON::StringView KEYWORD_NONE{"@none"}; +inline constexpr JSON::StringView KEYWORD_PREFIX{"@prefix"}; +inline constexpr JSON::StringView KEYWORD_PROPAGATE{"@propagate"}; +inline constexpr JSON::StringView KEYWORD_PROTECTED{"@protected"}; +inline constexpr JSON::StringView KEYWORD_REVERSE{"@reverse"}; +inline constexpr JSON::StringView KEYWORD_SET{"@set"}; +inline constexpr JSON::StringView KEYWORD_TYPE{"@type"}; +inline constexpr JSON::StringView KEYWORD_VALUE{"@value"}; +inline constexpr JSON::StringView KEYWORD_VERSION{"@version"}; +inline constexpr JSON::StringView KEYWORD_VOCAB{"@vocab"}; + +// Precomputed object-key hashes for each keyword, so that object access never +// has to recompute them. +inline const auto KEYWORD_BASE_HASH{JSON::Object::hash(KEYWORD_BASE)}; +inline const auto KEYWORD_CONTAINER_HASH{JSON::Object::hash(KEYWORD_CONTAINER)}; +inline const auto KEYWORD_CONTEXT_HASH{JSON::Object::hash(KEYWORD_CONTEXT)}; +inline const auto KEYWORD_DIRECTION_HASH{JSON::Object::hash(KEYWORD_DIRECTION)}; +inline const auto KEYWORD_GRAPH_HASH{JSON::Object::hash(KEYWORD_GRAPH)}; +inline const auto KEYWORD_ID_HASH{JSON::Object::hash(KEYWORD_ID)}; +inline const auto KEYWORD_IMPORT_HASH{JSON::Object::hash(KEYWORD_IMPORT)}; +inline const auto KEYWORD_INCLUDED_HASH{JSON::Object::hash(KEYWORD_INCLUDED)}; +inline const auto KEYWORD_INDEX_HASH{JSON::Object::hash(KEYWORD_INDEX)}; +inline const auto KEYWORD_JSON_HASH{JSON::Object::hash(KEYWORD_JSON)}; +inline const auto KEYWORD_LANGUAGE_HASH{JSON::Object::hash(KEYWORD_LANGUAGE)}; +inline const auto KEYWORD_LIST_HASH{JSON::Object::hash(KEYWORD_LIST)}; +inline const auto KEYWORD_NEST_HASH{JSON::Object::hash(KEYWORD_NEST)}; +inline const auto KEYWORD_NONE_HASH{JSON::Object::hash(KEYWORD_NONE)}; +inline const auto KEYWORD_PREFIX_HASH{JSON::Object::hash(KEYWORD_PREFIX)}; +inline const auto KEYWORD_PROPAGATE_HASH{JSON::Object::hash(KEYWORD_PROPAGATE)}; +inline const auto KEYWORD_PROTECTED_HASH{JSON::Object::hash(KEYWORD_PROTECTED)}; +inline const auto KEYWORD_REVERSE_HASH{JSON::Object::hash(KEYWORD_REVERSE)}; +inline const auto KEYWORD_SET_HASH{JSON::Object::hash(KEYWORD_SET)}; +inline const auto KEYWORD_TYPE_HASH{JSON::Object::hash(KEYWORD_TYPE)}; +inline const auto KEYWORD_VALUE_HASH{JSON::Object::hash(KEYWORD_VALUE)}; +inline const auto KEYWORD_VERSION_HASH{JSON::Object::hash(KEYWORD_VERSION)}; +inline const auto KEYWORD_VOCAB_HASH{JSON::Object::hash(KEYWORD_VOCAB)}; + +// Markers used internally by inverse context creation, term selection, and node +// map generation. These are not JSON-LD document keywords: @default is the name +// the API algorithms give the default graph, and it is defined by the framing +// specification rather than the core keyword grammar. +inline constexpr JSON::StringView KEYWORD_ANY{"@any"}; +inline constexpr JSON::StringView KEYWORD_DEFAULT{"@default"}; +inline constexpr JSON::StringView KEYWORD_NULL{"@null"}; +inline const auto KEYWORD_ANY_HASH{JSON::Object::hash(KEYWORD_ANY)}; +inline const auto KEYWORD_DEFAULT_HASH{JSON::Object::hash(KEYWORD_DEFAULT)}; + +// A stable owned copy of the @context keyword, suitable as a JSON Pointer +// token. (A namespace-scope JSON::String constant would trip clang-tidy's +// throwing-static-initialization check, hence the function-local static.) +inline auto keyword_context() -> const JSON::String & { + static const JSON::String value{KEYWORD_CONTEXT}; + return value; +} + +inline auto is_keyword(const JSON::StringView value) -> bool { + if (value.size() < 2 || value.front() != '@') { + return false; + } + return value == KEYWORD_BASE || value == KEYWORD_CONTAINER || + value == KEYWORD_CONTEXT || value == KEYWORD_DIRECTION || + value == KEYWORD_GRAPH || value == KEYWORD_ID || + value == KEYWORD_IMPORT || value == KEYWORD_INCLUDED || + value == KEYWORD_INDEX || value == KEYWORD_JSON || + value == KEYWORD_LANGUAGE || value == KEYWORD_LIST || + value == KEYWORD_NEST || value == KEYWORD_NONE || + value == KEYWORD_PREFIX || value == KEYWORD_PROPAGATE || + value == KEYWORD_PROTECTED || value == KEYWORD_REVERSE || + value == KEYWORD_SET || value == KEYWORD_TYPE || + value == KEYWORD_VALUE || value == KEYWORD_VERSION || + value == KEYWORD_VOCAB; +} + +inline auto is_keyword(const JSON::StringView value, + const JSON::Object::hash_type hash) -> bool { + if (value.size() < 2 || value.front() != '@') { + return false; + } + return hash == KEYWORD_BASE_HASH || hash == KEYWORD_CONTAINER_HASH || + hash == KEYWORD_CONTEXT_HASH || hash == KEYWORD_DIRECTION_HASH || + hash == KEYWORD_GRAPH_HASH || hash == KEYWORD_ID_HASH || + hash == KEYWORD_IMPORT_HASH || hash == KEYWORD_INCLUDED_HASH || + hash == KEYWORD_INDEX_HASH || hash == KEYWORD_JSON_HASH || + hash == KEYWORD_LANGUAGE_HASH || hash == KEYWORD_LIST_HASH || + hash == KEYWORD_NEST_HASH || hash == KEYWORD_NONE_HASH || + hash == KEYWORD_PREFIX_HASH || hash == KEYWORD_PROPAGATE_HASH || + hash == KEYWORD_PROTECTED_HASH || hash == KEYWORD_REVERSE_HASH || + hash == KEYWORD_SET_HASH || hash == KEYWORD_TYPE_HASH || + hash == KEYWORD_VALUE_HASH || hash == KEYWORD_VERSION_HASH || + hash == KEYWORD_VOCAB_HASH; +} + +// Whether the given value has the generic form of a keyword (an `@` followed by +// one or more letters), which the algorithms treat as a reserved token even +// when it is not a defined keyword. +inline auto has_keyword_form(const JSON::StringView value) -> bool { + if (value.size() < 2 || value.front() != '@') { + return false; + } + for (std::size_t index{1}; index < value.size(); index += 1) { + if (!is_alpha(value[index])) { + return false; + } + } + return true; +} + +} // namespace sourcemeta::core + +#endif diff --git a/vendor/core/src/core/jsonld/jsonld_node_map.cc b/vendor/core/src/core/jsonld/jsonld_node_map.cc new file mode 100644 index 00000000..1bb54586 --- /dev/null +++ b/vendor/core/src/core/jsonld/jsonld_node_map.cc @@ -0,0 +1,264 @@ +#include +#include + +#include "jsonld_algorithms.h" +#include "jsonld_keywords.h" + +#include // std::ranges::sort +#include // std::optional, std::nullopt +#include // std::to_string +#include // std::move +#include // std::vector + +namespace sourcemeta::core { + +namespace { + +auto is_blank_node(const JSON::StringView value) -> bool { + return value.starts_with("_:"); +} + +// Append a value to the property array of node, creating the array as needed. +auto append_value(JSON &node, const JSON::StringView property, JSON value) + -> void { + if (!node.defines(property)) { + node.assign_assume_new(JSON::String{property}, JSON::make_array()); + } + node.at(property).push_back(std::move(value)); +} + +// Append a value to the property array of node unless an equivalent value is +// already present. +auto add_unique(JSON &node, const JSON::StringView property, JSON value) + -> void { + if (!node.defines(property)) { + node.assign_assume_new(JSON::String{property}, JSON::make_array()); + } + auto &array{node.at(property)}; + for (const auto &item : array.as_array()) { + if (item == value) { + return; + } + } + array.push_back(std::move(value)); +} + +} // namespace + +auto generate_blank_node_identifier( + BlankNodeState &state, const std::optional &identifier) + -> JSON::String { + if (identifier.has_value()) { + const auto iterator{state.identifiers.find(identifier.value())}; + if (iterator != state.identifiers.cend()) { + return iterator->second; + } + } + + JSON::String result{"_:b"}; + result += std::to_string(state.counter); + state.counter += 1; + if (identifier.has_value()) { + state.identifiers.emplace(identifier.value(), result); + } + return result; +} + +auto generate_node_map(BlankNodeState &state, JSON &node_map, + const JSON &element, const JSON::StringView active_graph, + const JSON *active_subject, + const std::optional &active_property, + JSON *list) -> void { + if (element.is_array()) { + for (const auto &item : element.as_array()) { + generate_node_map(state, node_map, item, active_graph, active_subject, + active_property, list); + } + return; + } + + if (!node_map.defines(active_graph)) { + node_map.assign_assume_new(JSON::String{active_graph}, JSON::make_object()); + } + + // The algorithm consumes entries of element, so work on a mutable copy. + JSON working{element}; + + // Relabel blank node identifiers used as @type values. + if (working.defines(KEYWORD_TYPE, KEYWORD_TYPE_HASH)) { + auto &type_value{working.at(KEYWORD_TYPE, KEYWORD_TYPE_HASH)}; + if (type_value.is_array()) { + for (auto &item : type_value.as_array()) { + if (item.is_string() && is_blank_node(item.to_string())) { + item = JSON{generate_blank_node_identifier(state, item.to_string())}; + } + } + } else if (type_value.is_string() && + is_blank_node(type_value.to_string())) { + type_value = + JSON{generate_blank_node_identifier(state, type_value.to_string())}; + } + } + + // A value object is appended to the active property or the list accumulator. + if (working.defines(KEYWORD_VALUE, KEYWORD_VALUE_HASH)) { + if (list == nullptr) { + auto &subject_node{ + node_map.at(active_graph).at(active_subject->to_string())}; + add_unique(subject_node, active_property.value(), std::move(working)); + } else { + list->at(KEYWORD_LIST, KEYWORD_LIST_HASH).push_back(std::move(working)); + } + return; + } + + // A set object is a transparent wrapper around its items, which are processed + // against the same subject and property. Expansion never emits one, but the + // expanded form predicate accepts it. + if (working.defines(KEYWORD_SET, KEYWORD_SET_HASH)) { + generate_node_map(state, node_map, + working.at(KEYWORD_SET, KEYWORD_SET_HASH), active_graph, + active_subject, active_property, list); + return; + } + + // A list object recurses into a fresh list accumulator. + if (working.defines(KEYWORD_LIST, KEYWORD_LIST_HASH)) { + auto result{JSON::make_object()}; + result.assign_assume_new(JSON::String{KEYWORD_LIST}, JSON::make_array(), + KEYWORD_LIST_HASH); + generate_node_map(state, node_map, + working.at(KEYWORD_LIST, KEYWORD_LIST_HASH), active_graph, + active_subject, active_property, &result); + if (list == nullptr) { + auto &subject_node{ + node_map.at(active_graph).at(active_subject->to_string())}; + append_value(subject_node, active_property.value(), std::move(result)); + } else { + list->at(KEYWORD_LIST, KEYWORD_LIST_HASH).push_back(std::move(result)); + } + return; + } + + // A node object. + std::optional id; + if (working.defines(KEYWORD_ID, KEYWORD_ID_HASH)) { + const auto &id_value{working.at(KEYWORD_ID, KEYWORD_ID_HASH)}; + if (id_value.is_string()) { + id = is_blank_node(id_value.to_string()) + ? generate_blank_node_identifier(state, id_value.to_string()) + : id_value.to_string(); + } + working.erase(KEYWORD_ID); + } + if (!id.has_value()) { + id = generate_blank_node_identifier(state, std::nullopt); + } + + if (!node_map.at(active_graph).defines(id.value())) { + auto node{JSON::make_object()}; + node.assign_assume_new(JSON::String{KEYWORD_ID}, JSON{id.value()}, + KEYWORD_ID_HASH); + node_map.at(active_graph) + .assign_assume_new(JSON::String{id.value()}, std::move(node)); + } + + if (active_subject != nullptr && active_subject->is_object()) { + // A reverse relationship wires the referenced subject into this node. + add_unique(node_map.at(active_graph).at(id.value()), + active_property.value(), *active_subject); + } else if (active_property.has_value()) { + auto reference{JSON::make_object()}; + reference.assign_assume_new(JSON::String{KEYWORD_ID}, JSON{id.value()}, + KEYWORD_ID_HASH); + if (list == nullptr) { + add_unique(node_map.at(active_graph).at(active_subject->to_string()), + active_property.value(), std::move(reference)); + } else { + list->at(KEYWORD_LIST, KEYWORD_LIST_HASH).push_back(std::move(reference)); + } + } + + if (working.defines(KEYWORD_TYPE, KEYWORD_TYPE_HASH)) { + auto &node{node_map.at(active_graph).at(id.value())}; + if (!node.defines(KEYWORD_TYPE, KEYWORD_TYPE_HASH)) { + node.assign_assume_new(JSON::String{KEYWORD_TYPE}, JSON::make_array(), + KEYWORD_TYPE_HASH); + } + auto &node_types{node.at(KEYWORD_TYPE, KEYWORD_TYPE_HASH)}; + for (const auto &item : + working.at(KEYWORD_TYPE, KEYWORD_TYPE_HASH).as_array()) { + bool present{false}; + for (const auto &existing : node_types.as_array()) { + if (existing == item) { + present = true; + break; + } + } + if (!present) { + node_types.push_back(item); + } + } + working.erase(KEYWORD_TYPE); + } + + if (working.defines(KEYWORD_INDEX, KEYWORD_INDEX_HASH)) { + auto &node{node_map.at(active_graph).at(id.value())}; + const auto &index_value{working.at(KEYWORD_INDEX, KEYWORD_INDEX_HASH)}; + if (node.defines(KEYWORD_INDEX, KEYWORD_INDEX_HASH) && + node.at(KEYWORD_INDEX, KEYWORD_INDEX_HASH) != index_value) { + throw JSONLDError("Conflicting indexes", empty_weak_pointer); + } + node.assign(JSON::String{KEYWORD_INDEX}, JSON{index_value}); + working.erase(KEYWORD_INDEX); + } + + if (working.defines(KEYWORD_REVERSE, KEYWORD_REVERSE_HASH)) { + auto referenced_node{JSON::make_object()}; + referenced_node.assign_assume_new(JSON::String{KEYWORD_ID}, + JSON{id.value()}, KEYWORD_ID_HASH); + for (const auto &entry : + working.at(KEYWORD_REVERSE, KEYWORD_REVERSE_HASH).as_object()) { + for (const auto &value : entry.second.as_array()) { + generate_node_map(state, node_map, value, active_graph, + &referenced_node, entry.first, nullptr); + } + } + working.erase(KEYWORD_REVERSE); + } + + if (working.defines(KEYWORD_GRAPH, KEYWORD_GRAPH_HASH)) { + generate_node_map(state, node_map, + working.at(KEYWORD_GRAPH, KEYWORD_GRAPH_HASH), id.value(), + nullptr, std::nullopt, nullptr); + working.erase(KEYWORD_GRAPH); + } + + if (working.defines(KEYWORD_INCLUDED, KEYWORD_INCLUDED_HASH)) { + generate_node_map(state, node_map, + working.at(KEYWORD_INCLUDED, KEYWORD_INCLUDED_HASH), + active_graph, nullptr, std::nullopt, nullptr); + working.erase(KEYWORD_INCLUDED); + } + + std::vector properties; + properties.reserve(working.object_size()); + for (const auto &entry : working.as_object()) { + properties.push_back(entry.first); + } + std::ranges::sort(properties); + const JSON subject{id.value()}; + for (const auto property : properties) { + const JSON::String key{is_blank_node(property) + ? generate_blank_node_identifier(state, property) + : JSON::String{property}}; + auto &node{node_map.at(active_graph).at(id.value())}; + if (!node.defines(key)) { + node.assign_assume_new(JSON::String{key}, JSON::make_array()); + } + generate_node_map(state, node_map, working.at(property), active_graph, + &subject, key, nullptr); + } +} + +} // namespace sourcemeta::core diff --git a/vendor/core/src/core/jsonld/jsonld_value_compaction.cc b/vendor/core/src/core/jsonld/jsonld_value_compaction.cc new file mode 100644 index 00000000..8012adbb --- /dev/null +++ b/vendor/core/src/core/jsonld/jsonld_value_compaction.cc @@ -0,0 +1,135 @@ +#include +#include + +#include "jsonld_algorithms.h" +#include "jsonld_keywords.h" + +#include // std::find +#include // std::optional + +namespace sourcemeta::core { + +namespace { + +// The text module only lowercases in place, so this returns a lowercased copy. +auto lowercase(const JSON::StringView value) -> JSON::String { + JSON::String result{value}; + to_lowercase(result); + return result; +} + +auto container_includes(const TermDefinition *const definition, + const JSON::StringView keyword) -> bool { + if (definition == nullptr) { + return false; + } + return std::ranges::find(definition->container, keyword) != + definition->container.cend(); +} + +} // namespace + +auto compact_value(const ActiveContext &active_context, + const JSON &inverse_context, + const std::optional &active_property, + const JSON &value) -> JSON { + const TermDefinition *definition{nullptr}; + if (active_property.has_value()) { + const auto iterator{active_context.terms.find(active_property.value())}; + if (iterator != active_context.terms.cend()) { + definition = &iterator->second; + } + } + + const std::optional language{ + definition != nullptr && definition->has_language + ? definition->language + : active_context.default_language}; + const std::optional direction{ + definition != nullptr && definition->has_direction + ? definition->direction + : active_context.default_direction}; + const bool index_ok{!value.defines(KEYWORD_INDEX, KEYWORD_INDEX_HASH) || + container_includes(definition, KEYWORD_INDEX)}; + + if (value.defines(KEYWORD_ID, KEYWORD_ID_HASH)) { + const auto extra{ + value.object_size() - 1 - + (value.defines(KEYWORD_INDEX, KEYWORD_INDEX_HASH) ? 1U : 0U)}; + const auto &identifier{value.at(KEYWORD_ID, KEYWORD_ID_HASH)}; + if (extra == 0 && index_ok && definition != nullptr && + definition->type_mapping.has_value() && identifier.is_string()) { + if (definition->type_mapping.value() == KEYWORD_ID) { + return JSON{compact_iri(active_context, inverse_context, + identifier.to_string(), nullptr, false, false)}; + } + if (definition->type_mapping.value() == KEYWORD_VOCAB) { + return JSON{compact_iri(active_context, inverse_context, + identifier.to_string(), nullptr, true, false)}; + } + } + } else if (value.defines(KEYWORD_VALUE, KEYWORD_VALUE_HASH)) { + const auto &contents{value.at(KEYWORD_VALUE, KEYWORD_VALUE_HASH)}; + const bool has_type{value.defines(KEYWORD_TYPE, KEYWORD_TYPE_HASH)}; + const bool type_matches{ + has_type && definition != nullptr && + definition->type_mapping.has_value() && + definition->type_mapping.value() == + value.at(KEYWORD_TYPE, KEYWORD_TYPE_HASH).to_string()}; + const bool type_disabled{ + (definition != nullptr && definition->type_mapping.has_value() && + definition->type_mapping.value() == KEYWORD_NONE) || + (has_type && !type_matches)}; + + if (type_matches && index_ok) { + return contents; + } else if (type_disabled) { + // Value compaction is disabled, but the @type IRI is still compacted in + // the full form below. + } else if (!contents.is_string()) { + if (index_ok) { + return contents; + } + } else { + const bool language_matches{ + value.defines(KEYWORD_LANGUAGE, KEYWORD_LANGUAGE_HASH) + ? (language.has_value() && + lowercase(value.at(KEYWORD_LANGUAGE, KEYWORD_LANGUAGE_HASH) + .to_string()) == lowercase(language.value())) + : !language.has_value()}; + const bool direction_matches{ + value.defines(KEYWORD_DIRECTION, KEYWORD_DIRECTION_HASH) + ? (direction.has_value() && + value.at(KEYWORD_DIRECTION, KEYWORD_DIRECTION_HASH) + .to_string() == direction.value()) + : !direction.has_value()}; + if (language_matches && direction_matches && index_ok) { + return contents; + } + } + } + + // Full form: keep the value object but alias its keyword keys and compact the + // @type and @id IRIs. + auto result{JSON::make_object()}; + for (const auto &entry : value.as_object()) { + const auto key{compact_iri(active_context, inverse_context, entry.first, + nullptr, true, false)}; + if (entry.first == KEYWORD_TYPE && entry.second.is_string()) { + result.assign_assume_new( + key, + JSON{compact_iri(active_context, inverse_context, + entry.second.to_string(), nullptr, true, false)}); + } else if (entry.first == KEYWORD_ID && entry.second.is_string()) { + result.assign_assume_new( + key, + JSON{compact_iri(active_context, inverse_context, + entry.second.to_string(), nullptr, false, false)}); + } else { + result.assign_assume_new(key, JSON{entry.second}); + } + } + return result; +} + +} // namespace sourcemeta::core diff --git a/vendor/core/src/core/jsonld/jsonld_value_expansion.cc b/vendor/core/src/core/jsonld/jsonld_value_expansion.cc new file mode 100644 index 00000000..ed5f3dfc --- /dev/null +++ b/vendor/core/src/core/jsonld/jsonld_value_expansion.cc @@ -0,0 +1,77 @@ +#include "jsonld_algorithms.h" +#include "jsonld_keywords.h" + +#include // std::optional + +namespace sourcemeta::core { + +// Value Expansion (JSON-LD 1.1 API Section 5.3.3) +auto expand_value(ExpansionState &state, ActiveContext &active_context, + const std::optional &active_property, + const JSON &value) -> JSON { + const TermDefinition *definition{nullptr}; + if (active_property.has_value()) { + const auto iterator{active_context.terms.find(active_property.value())}; + if (iterator != active_context.terms.cend()) { + definition = &iterator->second; + } + } + + if (definition != nullptr && definition->type_mapping.has_value() && + value.is_string()) { + if (definition->type_mapping.value() == KEYWORD_ID) { + auto result{JSON::make_object()}; + const auto identifier{expand_iri(state, active_context, value.to_string(), + true, false, nullptr, nullptr, + empty_weak_pointer)}; + result.assign_assume_new(JSON::String{KEYWORD_ID}, + identifier.has_value() ? JSON{identifier.value()} + : JSON{nullptr}, + KEYWORD_ID_HASH); + return result; + } + if (definition->type_mapping.value() == KEYWORD_VOCAB) { + auto result{JSON::make_object()}; + const auto identifier{expand_iri(state, active_context, value.to_string(), + true, true, nullptr, nullptr, + empty_weak_pointer)}; + result.assign_assume_new(JSON::String{KEYWORD_ID}, + identifier.has_value() ? JSON{identifier.value()} + : JSON{nullptr}, + KEYWORD_ID_HASH); + return result; + } + } + + auto result{JSON::make_object()}; + result.assign_assume_new(JSON::String{KEYWORD_VALUE}, JSON{value}, + KEYWORD_VALUE_HASH); + + if (definition != nullptr && definition->type_mapping.has_value() && + definition->type_mapping.value() != KEYWORD_ID && + definition->type_mapping.value() != KEYWORD_VOCAB && + definition->type_mapping.value() != KEYWORD_NONE) { + result.assign_assume_new(JSON::String{KEYWORD_TYPE}, + JSON{definition->type_mapping.value()}, + KEYWORD_TYPE_HASH); + } else if (value.is_string()) { + const auto language{definition != nullptr && definition->has_language + ? definition->language + : active_context.default_language}; + if (language.has_value()) { + result.assign_assume_new(JSON::String{KEYWORD_LANGUAGE}, + JSON{language.value()}, KEYWORD_LANGUAGE_HASH); + } + const auto direction{definition != nullptr && definition->has_direction + ? definition->direction + : active_context.default_direction}; + if (direction.has_value()) { + result.assign_assume_new(JSON::String{KEYWORD_DIRECTION}, + JSON{direction.value()}, KEYWORD_DIRECTION_HASH); + } + } + + return result; +} + +} // namespace sourcemeta::core diff --git a/vendor/core/src/core/jsonpointer/include/sourcemeta/core/jsonpointer_pointer.h b/vendor/core/src/core/jsonpointer/include/sourcemeta/core/jsonpointer_pointer.h index 5fa7e680..c17cb15e 100644 --- a/vendor/core/src/core/jsonpointer/include/sourcemeta/core/jsonpointer_pointer.h +++ b/vendor/core/src/core/jsonpointer/include/sourcemeta/core/jsonpointer_pointer.h @@ -535,6 +535,40 @@ template class GenericPointer { return result; } + /// Concatenate a JSON Pointer with a single property token, getting a new + /// pointer as a result. For example: + /// + /// ```cpp + /// #include + /// #include + /// + /// const sourcemeta::core::Pointer pointer{"foo"}; + /// assert(pointer.concat("bar") == sourcemeta::core::Pointer{"foo", "bar"}); + /// ``` + [[nodiscard]] auto concat(const typename Token::Property &property) const + -> GenericPointer { + GenericPointer result{*this}; + result.push_back(property); + return result; + } + + /// Concatenate a JSON Pointer with a single index token, getting a new + /// pointer as a result. For example: + /// + /// ```cpp + /// #include + /// #include + /// + /// const sourcemeta::core::Pointer pointer{"foo"}; + /// assert(pointer.concat(0) == sourcemeta::core::Pointer{"foo", 0}); + /// ``` + [[nodiscard]] auto concat(const typename Token::Index &index) const + -> GenericPointer { + GenericPointer result{*this}; + result.push_back(index); + return result; + } + /// Check whether a JSON Pointer starts with another JSON Pointer. For /// example: /// diff --git a/vendor/core/src/core/uri/grammar.h b/vendor/core/src/core/uri/grammar.h index 1c36df33..f560e6a2 100644 --- a/vendor/core/src/core/uri/grammar.h +++ b/vendor/core/src/core/uri/grammar.h @@ -65,6 +65,23 @@ inline auto uri_is_unreserved(const char character) -> bool { } } +// gen-delims = ":" / "/" / "?" / "#" / "[" / "]" / "@" +// See https://www.rfc-editor.org/rfc/rfc3986#section-2.2 +inline auto uri_is_gen_delim(const char character) -> bool { + switch (character) { + case URI_COLON: + case URI_SLASH: + case URI_QUESTION: + case URI_HASH: + case URI_OPEN_BRACKET: + case URI_CLOSE_BRACKET: + case URI_AT: + return true; + default: + return false; + } +} + // sub-delims = "!" / "$" / "&" / "'" / "(" / ")" / "*" / "+" / "," / ";" / "=" // See https://www.rfc-editor.org/rfc/rfc3986#section-2.2 inline auto uri_is_sub_delim(const char character) -> bool { diff --git a/vendor/core/src/core/uri/include/sourcemeta/core/uri.h b/vendor/core/src/core/uri/include/sourcemeta/core/uri.h index 31415cf9..c88c8cf2 100644 --- a/vendor/core/src/core/uri/include/sourcemeta/core/uri.h +++ b/vendor/core/src/core/uri/include/sourcemeta/core/uri.h @@ -737,6 +737,31 @@ class SOURCEMETA_CORE_URI_EXPORT URI { /// ``` static auto canonicalize(std::string_view input) -> std::string; + /// Check if the given string is a valid URI scheme per RFC 3986 + /// (`ALPHA *( ALPHA / DIGIT / "+" / "-" / "." )`). For example: + /// + /// ```cpp + /// #include + /// #include + /// + /// assert(sourcemeta::core::URI::is_scheme("https")); + /// assert(!sourcemeta::core::URI::is_scheme("1https")); + /// assert(!sourcemeta::core::URI::is_scheme("http:")); + /// ``` + [[nodiscard]] static auto is_scheme(std::string_view input) noexcept -> bool; + + /// Check if the given character is a URI generic delimiter per RFC 3986 + /// (`":" / "/" / "?" / "#" / "[" / "]" / "@"`). For example: + /// + /// ```cpp + /// #include + /// #include + /// + /// assert(sourcemeta::core::URI::is_gen_delim(':')); + /// assert(!sourcemeta::core::URI::is_gen_delim('a')); + /// ``` + [[nodiscard]] static auto is_gen_delim(char character) noexcept -> bool; + /// Check if the given string is a valid absolute URI (has a scheme) per /// RFC 3986 without constructing a full URI object. For example: /// diff --git a/vendor/core/src/core/uri/parse.cc b/vendor/core/src/core/uri/parse.cc index 0f90e9b8..dd8ba381 100644 --- a/vendor/core/src/core/uri/parse.cc +++ b/vendor/core/src/core/uri/parse.cc @@ -645,6 +645,22 @@ auto URI::parse(const std::string_view input) -> void { } } +auto URI::is_scheme(const std::string_view input) noexcept -> bool { + if (input.empty() || !is_alpha(input.front())) { + return false; + } + for (const auto character : input) { + if (!uri_is_scheme_char(character)) { + return false; + } + } + return true; +} + +auto URI::is_gen_delim(const char character) noexcept -> bool { + return uri_is_gen_delim(character); +} + auto URI::is_uri(const std::string_view input) noexcept -> bool { try { std::optional scheme, userinfo, host, path, query, fragment; diff --git a/vendor/core/src/core/uri/resolution.cc b/vendor/core/src/core/uri/resolution.cc index 42118b10..adf4eb19 100644 --- a/vendor/core/src/core/uri/resolution.cc +++ b/vendor/core/src/core/uri/resolution.cc @@ -156,6 +156,34 @@ auto URI::relative_to(const URI &base) -> URI & { return *this; } + // Both URIs share the same path and differ only in their query or fragment. + // An empty relative path is safe when this URI carries its own query or the + // base has none, since resolution then will not re-inherit the base's query. + // Otherwise a non-empty path is required to reset the query, which is only + // possible when there is a path to express. + if (this->path_ == base.path_) { + const bool empty_path_safe{this->query_.has_value() || + !base.query_.has_value()}; + if (empty_path_safe || this->path_.has_value()) { + this->scheme_.reset(); + this->userinfo_.reset(); + this->host_.reset(); + this->port_.reset(); + if (empty_path_safe) { + this->path_.reset(); + } else { + // Use the last path segment, or "./" when the path has no final segment + // (it ends in a slash). + const auto &path{this->path_.value()}; + const auto slash{path.rfind('/')}; + auto segment{slash == std::string::npos ? path + : path.substr(slash + 1)}; + this->path_ = segment.empty() ? std::string{"./"} : std::move(segment); + } + return *this; + } + } + // If this URI doesn't have a path, we can't make it relative if (!this->path_.has_value()) { return *this;