Skip to content
Draft
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
6 changes: 3 additions & 3 deletions src/core/jsonschema/CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -4,9 +4,9 @@ include(./official_resolver.cmake)

sourcemeta_library(NAMESPACE sourcemeta PROJECT core NAME jsonschema
PRIVATE_HEADERS bundle.h resolver.h walker.h frame.h error.h
types.h transform.h
SOURCES jsonschema.cc official_walker.cc frame.cc resolver.cc
walker.cc bundle.cc transformer.cc format.cc
types.h transform.h vocabularies.h
SOURCES jsonschema.cc vocabularies.cc official_walker.cc
frame.cc resolver.cc walker.cc bundle.cc transformer.cc format.cc
"${CMAKE_CURRENT_BINARY_DIR}/official_resolver.cc")

if(SOURCEMETA_CORE_INSTALL)
Expand Down
18 changes: 7 additions & 11 deletions src/core/jsonschema/include/sourcemeta/core/jsonschema_types.h
Original file line number Diff line number Diff line change
Expand Up @@ -3,21 +3,17 @@

#include <sourcemeta/core/json.h>
#include <sourcemeta/core/jsonpointer.h>
#include <sourcemeta/core/jsonschema_vocabularies.h>

#include <cstdint> // std::uint8_t
#include <functional> // std::function, std::reference_wrapper
#include <optional> // std::optional
#include <set> // std::set
#include <string> // std::string
#include <string_view> // std::string_view
#include <unordered_map> // std::unordered_map
#include <cstdint> // std::uint8_t
#include <functional> // std::function, std::reference_wrapper
#include <optional> // std::optional
#include <set> // std::set
#include <string> // std::string
#include <string_view> // std::string_view

namespace sourcemeta::core {

/// @ingroup jsonschema
/// A set of vocabularies
using Vocabularies = std::unordered_map<JSON::String, bool>;

// Take a URI and get back a schema
/// @ingroup jsonschema
///
Expand Down
138 changes: 138 additions & 0 deletions src/core/jsonschema/include/sourcemeta/core/jsonschema_vocabularies.h
Original file line number Diff line number Diff line change
@@ -0,0 +1,138 @@
#ifndef SOURCEMETA_CORE_JSONSCHEMA_VOCABULARIES_H_
#define SOURCEMETA_CORE_JSONSCHEMA_VOCABULARIES_H_

#ifndef SOURCEMETA_CORE_JSONSCHEMA_EXPORT
#include <sourcemeta/core/jsonschema_export.h>
#endif

#include <sourcemeta/core/json.h>

#include <bitset> // std::bitset
#include <cassert> // assert
#include <cstdint> // std::uint32_t, std::size_t
#include <optional> // std::optional
#include <stdexcept> // std::out_of_range
#include <string> // std::string
#include <string_view> // std::string_view
#include <unordered_map> // std::unordered_map
#include <utility> // std::pair
#include <vector> // std::vector

namespace sourcemeta::core {

/// @ingroup jsonschema
/// Optimized vocabulary set using bitflags for known vocabularies
/// and a fallback `std::unordered_map` for custom vocabularies.
///
/// TODO: To maximize performance gains, convert string-based vocabulary checks
/// throughout the codebase to use enum-based methods.
struct SOURCEMETA_CORE_JSONSCHEMA_EXPORT Vocabularies {
/// Vocabulary enumeration for known JSON Schema vocabularies.
/// Each vocabulary is represented as a bitflag for efficient storage and
/// lookup.
enum class Known : std::uint8_t {
// Pre-vocabulary dialects (treated as vocabularies)
JSON_Schema_Draft_0 = 0,
JSON_Schema_Draft_0_Hyper = 1,
JSON_Schema_Draft_1 = 2,
JSON_Schema_Draft_1_Hyper = 3,
JSON_Schema_Draft_2 = 4,
JSON_Schema_Draft_2_Hyper = 5,
JSON_Schema_Draft_3 = 6,
JSON_Schema_Draft_3_Hyper = 7,
JSON_Schema_Draft_4 = 8,
JSON_Schema_Draft_4_Hyper = 9,
JSON_Schema_Draft_6 = 10,
JSON_Schema_Draft_6_Hyper = 11,
JSON_Schema_Draft_7 = 12,
JSON_Schema_Draft_7_Hyper = 13,
// 2019-09 vocabularies
JSON_Schema_2019_09_Core = 14,
JSON_Schema_2019_09_Applicator = 15,
JSON_Schema_2019_09_Validation = 16,
JSON_Schema_2019_09_Meta_Data = 17,
JSON_Schema_2019_09_Format = 18,
JSON_Schema_2019_09_Content = 19,
JSON_Schema_2019_09_Hyper_Schema = 20,
// 2020-12 vocabularies
JSON_Schema_2020_12_Core = 21,
JSON_Schema_2020_12_Applicator = 22,
JSON_Schema_2020_12_Unevaluated = 23,
JSON_Schema_2020_12_Validation = 24,
JSON_Schema_2020_12_Meta_Data = 25,
JSON_Schema_2020_12_Format_Annotation = 26,
JSON_Schema_2020_12_Format_Assertion = 27,
JSON_Schema_2020_12_Content = 28,
// Sentinel value representing the total count of known vocabularies
COUNT
};

public:
/// Default constructor
Vocabularies() = default;
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Please add default copy/move constructors/assignment operators too!


/// Copy constructor
Vocabularies(const Vocabularies &) = default;

/// Move constructor
Vocabularies(Vocabularies &&) noexcept = default;

/// Copy assignment operator
auto operator=(const Vocabularies &) -> Vocabularies & = default;

/// Move assignment operator
auto operator=(Vocabularies &&) noexcept -> Vocabularies & = default;

/// Destructor
~Vocabularies() = default;

/// Construct from initializer list (for backward compatibility)
Vocabularies(std::initializer_list<std::pair<JSON::String, bool>> init);

/// Construct from initializer list using known vocabularies (optimized)
Vocabularies(std::initializer_list<std::pair<Known, bool>> init);

/// Check if a vocabulary is enabled
[[nodiscard]] auto contains(const JSON::String &uri) const noexcept -> bool;

/// Check if a known vocabulary is enabled (optimized)
[[nodiscard]] auto contains(Known vocabulary) const noexcept -> bool;

/// Insert a vocabulary with its required/optional status
auto insert(const JSON::String &uri, bool required) noexcept -> void;

/// Insert a known vocabulary with its required/optional status (optimized)
auto insert(Known vocabulary, bool required) noexcept -> void;

/// Get vocabulary status by URI
[[nodiscard]] auto get(const JSON::String &uri) const noexcept
-> std::optional<bool>;

/// Get known vocabulary status (optimized)
[[nodiscard]] auto get(Known vocabulary) const noexcept
-> std::optional<bool>;

/// Get the number of vocabularies (required + optional + custom)
[[nodiscard]] auto size() const noexcept -> std::size_t;

/// Check if there are no vocabularies
[[nodiscard]] auto empty() const noexcept -> bool;

private:
// Invariant: required_known and optional_known must be mutually exclusive
// A vocabulary can be either required (true) OR optional (false), never both
#ifdef _MSC_VER
#pragma warning(push)
#pragma warning(disable : 4251)
#endif
std::bitset<static_cast<std::size_t>(Known::COUNT)> required_known{};
std::bitset<static_cast<std::size_t>(Known::COUNT)> optional_known{};
std::unordered_map<JSON::String, bool> custom;
#ifdef _MSC_VER
#pragma warning(pop)
#endif
};

} // namespace sourcemeta::core

#endif
111 changes: 86 additions & 25 deletions src/core/jsonschema/jsonschema.cc
Original file line number Diff line number Diff line change
Expand Up @@ -295,20 +295,69 @@ auto sourcemeta::core::base_dialect(
}

namespace {
auto core_vocabulary(std::string_view base_dialect) -> std::string {
auto core_vocabulary_known(std::string_view base_dialect)
-> sourcemeta::core::Vocabularies::Known {
if (base_dialect == "https://json-schema.org/draft/2020-12/schema" ||
base_dialect == "https://json-schema.org/draft/2020-12/hyper-schema") {
return "https://json-schema.org/draft/2020-12/vocab/core";
return sourcemeta::core::Vocabularies::Known::JSON_Schema_2020_12_Core;
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Yes, I like this a lot!

} else if (base_dialect == "https://json-schema.org/draft/2019-09/schema" ||
base_dialect ==
"https://json-schema.org/draft/2019-09/hyper-schema") {
return "https://json-schema.org/draft/2019-09/vocab/core";
return sourcemeta::core::Vocabularies::Known::JSON_Schema_2019_09_Core;
} else {
std::ostringstream error;
error << "Unrecognized base dialect: " << base_dialect;
throw sourcemeta::core::SchemaError(error.str());
}
}

auto dialect_to_known(std::string_view dialect)
-> std::optional<sourcemeta::core::Vocabularies::Known> {
using sourcemeta::core::Vocabularies;
if (dialect == "http://json-schema.org/draft-07/schema#") {
return Vocabularies::Known::JSON_Schema_Draft_7;
}
if (dialect == "http://json-schema.org/draft-07/hyper-schema#") {
return Vocabularies::Known::JSON_Schema_Draft_7_Hyper;
}
if (dialect == "http://json-schema.org/draft-06/schema#") {
return Vocabularies::Known::JSON_Schema_Draft_6;
}
if (dialect == "http://json-schema.org/draft-06/hyper-schema#") {
return Vocabularies::Known::JSON_Schema_Draft_6_Hyper;
}
if (dialect == "http://json-schema.org/draft-04/schema#") {
return Vocabularies::Known::JSON_Schema_Draft_4;
}
if (dialect == "http://json-schema.org/draft-04/hyper-schema#") {
return Vocabularies::Known::JSON_Schema_Draft_4_Hyper;
}
if (dialect == "http://json-schema.org/draft-03/schema#") {
return Vocabularies::Known::JSON_Schema_Draft_3;
}
if (dialect == "http://json-schema.org/draft-03/hyper-schema#") {
return Vocabularies::Known::JSON_Schema_Draft_3_Hyper;
}
if (dialect == "http://json-schema.org/draft-02/schema#") {
return Vocabularies::Known::JSON_Schema_Draft_2;
}
if (dialect == "http://json-schema.org/draft-02/hyper-schema#") {
return Vocabularies::Known::JSON_Schema_Draft_2_Hyper;
}
if (dialect == "http://json-schema.org/draft-01/schema#") {
return Vocabularies::Known::JSON_Schema_Draft_1;
}
if (dialect == "http://json-schema.org/draft-01/hyper-schema#") {
return Vocabularies::Known::JSON_Schema_Draft_1_Hyper;
}
if (dialect == "http://json-schema.org/draft-00/schema#") {
return Vocabularies::Known::JSON_Schema_Draft_0;
}
if (dialect == "http://json-schema.org/draft-00/hyper-schema#") {
return Vocabularies::Known::JSON_Schema_Draft_0_Hyper;
}
return std::nullopt;
}
} // namespace

auto sourcemeta::core::vocabularies(
Expand Down Expand Up @@ -342,21 +391,22 @@ auto sourcemeta::core::vocabularies(const SchemaResolver &resolver,
// As a performance optimization shortcut
if (base_dialect == dialect) {
if (dialect == "https://json-schema.org/draft/2020-12/schema") {
return {{"https://json-schema.org/draft/2020-12/vocab/core", true},
{"https://json-schema.org/draft/2020-12/vocab/applicator", true},
{"https://json-schema.org/draft/2020-12/vocab/unevaluated", true},
{"https://json-schema.org/draft/2020-12/vocab/validation", true},
{"https://json-schema.org/draft/2020-12/vocab/meta-data", true},
{"https://json-schema.org/draft/2020-12/vocab/format-annotation",
true},
{"https://json-schema.org/draft/2020-12/vocab/content", true}};
return Vocabularies{
{Vocabularies::Known::JSON_Schema_2020_12_Core, true},
{Vocabularies::Known::JSON_Schema_2020_12_Applicator, true},
{Vocabularies::Known::JSON_Schema_2020_12_Unevaluated, true},
{Vocabularies::Known::JSON_Schema_2020_12_Validation, true},
{Vocabularies::Known::JSON_Schema_2020_12_Meta_Data, true},
{Vocabularies::Known::JSON_Schema_2020_12_Format_Annotation, true},
{Vocabularies::Known::JSON_Schema_2020_12_Content, true}};
} else if (dialect == "https://json-schema.org/draft/2019-09/schema") {
return {{"https://json-schema.org/draft/2019-09/vocab/core", true},
{"https://json-schema.org/draft/2019-09/vocab/applicator", true},
{"https://json-schema.org/draft/2019-09/vocab/validation", true},
{"https://json-schema.org/draft/2019-09/vocab/meta-data", true},
{"https://json-schema.org/draft/2019-09/vocab/format", false},
{"https://json-schema.org/draft/2019-09/vocab/content", true}};
return Vocabularies{
{Vocabularies::Known::JSON_Schema_2019_09_Core, true},
{Vocabularies::Known::JSON_Schema_2019_09_Applicator, true},
{Vocabularies::Known::JSON_Schema_2019_09_Validation, true},
{Vocabularies::Known::JSON_Schema_2019_09_Meta_Data, true},
{Vocabularies::Known::JSON_Schema_2019_09_Format, false},
{Vocabularies::Known::JSON_Schema_2019_09_Content, true}};
}
}

Expand All @@ -374,7 +424,11 @@ auto sourcemeta::core::vocabularies(const SchemaResolver &resolver,
dialect == "http://json-schema.org/draft-02/schema#" ||
dialect == "http://json-schema.org/draft-01/schema#" ||
dialect == "http://json-schema.org/draft-00/schema#") {
return {{dialect, true}};
const auto known = dialect_to_known(dialect);
if (known.has_value()) {
return Vocabularies{{known.value(), true}};
}
return Vocabularies{{dialect, true}};
}

/*
Expand All @@ -394,7 +448,11 @@ auto sourcemeta::core::vocabularies(const SchemaResolver &resolver,
base_dialect == "http://json-schema.org/draft-02/hyper-schema#" ||
base_dialect == "http://json-schema.org/draft-01/hyper-schema#" ||
base_dialect == "http://json-schema.org/draft-00/hyper-schema#") {
return {{base_dialect, true}};
const auto known = dialect_to_known(base_dialect);
if (known.has_value()) {
return Vocabularies{{known.value(), true}};
}
return Vocabularies{{base_dialect, true}};
}

/*
Expand Down Expand Up @@ -422,25 +480,28 @@ auto sourcemeta::core::vocabularies(const SchemaResolver &resolver,
*/

Vocabularies result;
const std::string core{core_vocabulary(base_dialect)};
const auto core{core_vocabulary_known(base_dialect)};
if (schema_dialect.defines("$vocabulary")) {
const sourcemeta::core::JSON &vocabularies{
schema_dialect.at("$vocabulary")};
assert(vocabularies.is_object());
for (const auto &entry : vocabularies.as_object()) {
result.insert({entry.first, entry.second.to_boolean()});
result.insert(entry.first, entry.second.to_boolean());
}
} else {
result.insert({core, true});
result.insert(core, true);
}

// The specification recommends these checks
if (!result.contains(core)) {
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Can we take advantage of your bitsets here? See that we get the core vocabulary for a dialect using core_vocabulary(). That one could return a known vocabulary enum member, and thus check containment using the bit maps?

throw sourcemeta::core::SchemaError(
"The core vocabulary must always be present");
} else if (!result.at(core)) {
throw sourcemeta::core::SchemaError(
"The core vocabulary must always be required");
} else {
const auto core_status{result.get(core)};
if (core_status.has_value() && !core_status.value()) {
throw sourcemeta::core::SchemaError(
"The core vocabulary must always be required");
}
}

return result;
Expand Down
Loading