-
-
Notifications
You must be signed in to change notification settings - Fork 9
perf: Replace unordered_map with bitset for vocabulary lookups #2040
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
base: main
Are you sure you want to change the base?
Changes from all commits
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
| Original file line number | Diff line number | Diff line change |
|---|---|---|
| @@ -0,0 +1,138 @@ | ||
| #ifndef SOURCEMETA_CORE_JSONSCHEMA_VOCABULARIES_H_ | ||
| #define SOURCEMETA_CORE_JSONSCHEMA_VOCABULARIES_H_ | ||
|
|
||
| #ifndef SOURCEMETA_CORE_JSONSCHEMA_EXPORT | ||
| #include <sourcemeta/core/jsonschema_export.h> | ||
| #endif | ||
|
|
||
| #include <sourcemeta/core/json.h> | ||
|
|
||
| #include <bitset> // std::bitset | ||
| #include <cassert> // assert | ||
| #include <cstdint> // std::uint32_t, std::size_t | ||
| #include <optional> // std::optional | ||
| #include <stdexcept> // std::out_of_range | ||
| #include <string> // std::string | ||
| #include <string_view> // std::string_view | ||
| #include <unordered_map> // std::unordered_map | ||
| #include <utility> // std::pair | ||
| #include <vector> // std::vector | ||
|
|
||
| namespace sourcemeta::core { | ||
|
|
||
| /// @ingroup jsonschema | ||
| /// Optimized vocabulary set using bitflags for known vocabularies | ||
| /// and a fallback `std::unordered_map` for custom vocabularies. | ||
| /// | ||
| /// TODO: To maximize performance gains, convert string-based vocabulary checks | ||
| /// throughout the codebase to use enum-based methods. | ||
| struct SOURCEMETA_CORE_JSONSCHEMA_EXPORT Vocabularies { | ||
| /// Vocabulary enumeration for known JSON Schema vocabularies. | ||
| /// Each vocabulary is represented as a bitflag for efficient storage and | ||
| /// lookup. | ||
| enum class Known : std::uint8_t { | ||
| // Pre-vocabulary dialects (treated as vocabularies) | ||
| JSON_Schema_Draft_0 = 0, | ||
| JSON_Schema_Draft_0_Hyper = 1, | ||
| JSON_Schema_Draft_1 = 2, | ||
| JSON_Schema_Draft_1_Hyper = 3, | ||
| JSON_Schema_Draft_2 = 4, | ||
| JSON_Schema_Draft_2_Hyper = 5, | ||
| JSON_Schema_Draft_3 = 6, | ||
| JSON_Schema_Draft_3_Hyper = 7, | ||
| JSON_Schema_Draft_4 = 8, | ||
| JSON_Schema_Draft_4_Hyper = 9, | ||
| JSON_Schema_Draft_6 = 10, | ||
| JSON_Schema_Draft_6_Hyper = 11, | ||
| JSON_Schema_Draft_7 = 12, | ||
| JSON_Schema_Draft_7_Hyper = 13, | ||
| // 2019-09 vocabularies | ||
| JSON_Schema_2019_09_Core = 14, | ||
| JSON_Schema_2019_09_Applicator = 15, | ||
| JSON_Schema_2019_09_Validation = 16, | ||
| JSON_Schema_2019_09_Meta_Data = 17, | ||
| JSON_Schema_2019_09_Format = 18, | ||
| JSON_Schema_2019_09_Content = 19, | ||
| JSON_Schema_2019_09_Hyper_Schema = 20, | ||
| // 2020-12 vocabularies | ||
| JSON_Schema_2020_12_Core = 21, | ||
| JSON_Schema_2020_12_Applicator = 22, | ||
| JSON_Schema_2020_12_Unevaluated = 23, | ||
| JSON_Schema_2020_12_Validation = 24, | ||
| JSON_Schema_2020_12_Meta_Data = 25, | ||
| JSON_Schema_2020_12_Format_Annotation = 26, | ||
| JSON_Schema_2020_12_Format_Assertion = 27, | ||
| JSON_Schema_2020_12_Content = 28, | ||
| // Sentinel value representing the total count of known vocabularies | ||
| COUNT | ||
| }; | ||
|
|
||
| public: | ||
| /// Default constructor | ||
| Vocabularies() = default; | ||
|
|
||
| /// Copy constructor | ||
| Vocabularies(const Vocabularies &) = default; | ||
|
|
||
| /// Move constructor | ||
| Vocabularies(Vocabularies &&) noexcept = default; | ||
|
|
||
| /// Copy assignment operator | ||
| auto operator=(const Vocabularies &) -> Vocabularies & = default; | ||
|
|
||
| /// Move assignment operator | ||
| auto operator=(Vocabularies &&) noexcept -> Vocabularies & = default; | ||
|
|
||
| /// Destructor | ||
| ~Vocabularies() = default; | ||
|
|
||
| /// Construct from initializer list (for backward compatibility) | ||
| Vocabularies(std::initializer_list<std::pair<JSON::String, bool>> init); | ||
|
|
||
| /// Construct from initializer list using known vocabularies (optimized) | ||
| Vocabularies(std::initializer_list<std::pair<Known, bool>> init); | ||
|
|
||
| /// Check if a vocabulary is enabled | ||
| [[nodiscard]] auto contains(const JSON::String &uri) const noexcept -> bool; | ||
|
|
||
| /// Check if a known vocabulary is enabled (optimized) | ||
| [[nodiscard]] auto contains(Known vocabulary) const noexcept -> bool; | ||
|
|
||
| /// Insert a vocabulary with its required/optional status | ||
| auto insert(const JSON::String &uri, bool required) noexcept -> void; | ||
|
|
||
| /// Insert a known vocabulary with its required/optional status (optimized) | ||
| auto insert(Known vocabulary, bool required) noexcept -> void; | ||
|
|
||
| /// Get vocabulary status by URI | ||
| [[nodiscard]] auto get(const JSON::String &uri) const noexcept | ||
| -> std::optional<bool>; | ||
|
|
||
| /// Get known vocabulary status (optimized) | ||
| [[nodiscard]] auto get(Known vocabulary) const noexcept | ||
| -> std::optional<bool>; | ||
|
|
||
| /// Get the number of vocabularies (required + optional + custom) | ||
| [[nodiscard]] auto size() const noexcept -> std::size_t; | ||
|
|
||
| /// Check if there are no vocabularies | ||
| [[nodiscard]] auto empty() const noexcept -> bool; | ||
|
|
||
| private: | ||
| // Invariant: required_known and optional_known must be mutually exclusive | ||
| // A vocabulary can be either required (true) OR optional (false), never both | ||
| #ifdef _MSC_VER | ||
| #pragma warning(push) | ||
| #pragma warning(disable : 4251) | ||
| #endif | ||
| std::bitset<static_cast<std::size_t>(Known::COUNT)> required_known{}; | ||
| std::bitset<static_cast<std::size_t>(Known::COUNT)> optional_known{}; | ||
| std::unordered_map<JSON::String, bool> custom; | ||
| #ifdef _MSC_VER | ||
| #pragma warning(pop) | ||
| #endif | ||
| }; | ||
|
|
||
| } // namespace sourcemeta::core | ||
|
|
||
| #endif | ||
| Original file line number | Diff line number | Diff line change |
|---|---|---|
|
|
@@ -295,20 +295,69 @@ auto sourcemeta::core::base_dialect( | |
| } | ||
|
|
||
| namespace { | ||
| auto core_vocabulary(std::string_view base_dialect) -> std::string { | ||
| auto core_vocabulary_known(std::string_view base_dialect) | ||
| -> sourcemeta::core::Vocabularies::Known { | ||
| if (base_dialect == "https://json-schema.org/draft/2020-12/schema" || | ||
| base_dialect == "https://json-schema.org/draft/2020-12/hyper-schema") { | ||
| return "https://json-schema.org/draft/2020-12/vocab/core"; | ||
| return sourcemeta::core::Vocabularies::Known::JSON_Schema_2020_12_Core; | ||
|
Member
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Yes, I like this a lot! |
||
| } else if (base_dialect == "https://json-schema.org/draft/2019-09/schema" || | ||
| base_dialect == | ||
| "https://json-schema.org/draft/2019-09/hyper-schema") { | ||
| return "https://json-schema.org/draft/2019-09/vocab/core"; | ||
| return sourcemeta::core::Vocabularies::Known::JSON_Schema_2019_09_Core; | ||
| } else { | ||
| std::ostringstream error; | ||
| error << "Unrecognized base dialect: " << base_dialect; | ||
| throw sourcemeta::core::SchemaError(error.str()); | ||
| } | ||
| } | ||
|
|
||
| auto dialect_to_known(std::string_view dialect) | ||
| -> std::optional<sourcemeta::core::Vocabularies::Known> { | ||
| using sourcemeta::core::Vocabularies; | ||
| if (dialect == "http://json-schema.org/draft-07/schema#") { | ||
| return Vocabularies::Known::JSON_Schema_Draft_7; | ||
| } | ||
| if (dialect == "http://json-schema.org/draft-07/hyper-schema#") { | ||
| return Vocabularies::Known::JSON_Schema_Draft_7_Hyper; | ||
| } | ||
| if (dialect == "http://json-schema.org/draft-06/schema#") { | ||
| return Vocabularies::Known::JSON_Schema_Draft_6; | ||
| } | ||
| if (dialect == "http://json-schema.org/draft-06/hyper-schema#") { | ||
| return Vocabularies::Known::JSON_Schema_Draft_6_Hyper; | ||
| } | ||
| if (dialect == "http://json-schema.org/draft-04/schema#") { | ||
| return Vocabularies::Known::JSON_Schema_Draft_4; | ||
| } | ||
| if (dialect == "http://json-schema.org/draft-04/hyper-schema#") { | ||
| return Vocabularies::Known::JSON_Schema_Draft_4_Hyper; | ||
| } | ||
| if (dialect == "http://json-schema.org/draft-03/schema#") { | ||
| return Vocabularies::Known::JSON_Schema_Draft_3; | ||
| } | ||
| if (dialect == "http://json-schema.org/draft-03/hyper-schema#") { | ||
| return Vocabularies::Known::JSON_Schema_Draft_3_Hyper; | ||
| } | ||
| if (dialect == "http://json-schema.org/draft-02/schema#") { | ||
| return Vocabularies::Known::JSON_Schema_Draft_2; | ||
| } | ||
| if (dialect == "http://json-schema.org/draft-02/hyper-schema#") { | ||
| return Vocabularies::Known::JSON_Schema_Draft_2_Hyper; | ||
| } | ||
| if (dialect == "http://json-schema.org/draft-01/schema#") { | ||
| return Vocabularies::Known::JSON_Schema_Draft_1; | ||
| } | ||
| if (dialect == "http://json-schema.org/draft-01/hyper-schema#") { | ||
| return Vocabularies::Known::JSON_Schema_Draft_1_Hyper; | ||
| } | ||
| if (dialect == "http://json-schema.org/draft-00/schema#") { | ||
| return Vocabularies::Known::JSON_Schema_Draft_0; | ||
| } | ||
| if (dialect == "http://json-schema.org/draft-00/hyper-schema#") { | ||
| return Vocabularies::Known::JSON_Schema_Draft_0_Hyper; | ||
| } | ||
| return std::nullopt; | ||
| } | ||
| } // namespace | ||
|
|
||
| auto sourcemeta::core::vocabularies( | ||
|
|
@@ -342,21 +391,22 @@ auto sourcemeta::core::vocabularies(const SchemaResolver &resolver, | |
| // As a performance optimization shortcut | ||
| if (base_dialect == dialect) { | ||
| if (dialect == "https://json-schema.org/draft/2020-12/schema") { | ||
| return {{"https://json-schema.org/draft/2020-12/vocab/core", true}, | ||
| {"https://json-schema.org/draft/2020-12/vocab/applicator", true}, | ||
| {"https://json-schema.org/draft/2020-12/vocab/unevaluated", true}, | ||
| {"https://json-schema.org/draft/2020-12/vocab/validation", true}, | ||
| {"https://json-schema.org/draft/2020-12/vocab/meta-data", true}, | ||
| {"https://json-schema.org/draft/2020-12/vocab/format-annotation", | ||
| true}, | ||
| {"https://json-schema.org/draft/2020-12/vocab/content", true}}; | ||
| return Vocabularies{ | ||
| {Vocabularies::Known::JSON_Schema_2020_12_Core, true}, | ||
| {Vocabularies::Known::JSON_Schema_2020_12_Applicator, true}, | ||
| {Vocabularies::Known::JSON_Schema_2020_12_Unevaluated, true}, | ||
| {Vocabularies::Known::JSON_Schema_2020_12_Validation, true}, | ||
| {Vocabularies::Known::JSON_Schema_2020_12_Meta_Data, true}, | ||
| {Vocabularies::Known::JSON_Schema_2020_12_Format_Annotation, true}, | ||
| {Vocabularies::Known::JSON_Schema_2020_12_Content, true}}; | ||
| } else if (dialect == "https://json-schema.org/draft/2019-09/schema") { | ||
| return {{"https://json-schema.org/draft/2019-09/vocab/core", true}, | ||
| {"https://json-schema.org/draft/2019-09/vocab/applicator", true}, | ||
| {"https://json-schema.org/draft/2019-09/vocab/validation", true}, | ||
| {"https://json-schema.org/draft/2019-09/vocab/meta-data", true}, | ||
| {"https://json-schema.org/draft/2019-09/vocab/format", false}, | ||
| {"https://json-schema.org/draft/2019-09/vocab/content", true}}; | ||
| return Vocabularies{ | ||
| {Vocabularies::Known::JSON_Schema_2019_09_Core, true}, | ||
| {Vocabularies::Known::JSON_Schema_2019_09_Applicator, true}, | ||
| {Vocabularies::Known::JSON_Schema_2019_09_Validation, true}, | ||
| {Vocabularies::Known::JSON_Schema_2019_09_Meta_Data, true}, | ||
| {Vocabularies::Known::JSON_Schema_2019_09_Format, false}, | ||
| {Vocabularies::Known::JSON_Schema_2019_09_Content, true}}; | ||
| } | ||
| } | ||
|
|
||
|
|
@@ -374,7 +424,11 @@ auto sourcemeta::core::vocabularies(const SchemaResolver &resolver, | |
| dialect == "http://json-schema.org/draft-02/schema#" || | ||
| dialect == "http://json-schema.org/draft-01/schema#" || | ||
| dialect == "http://json-schema.org/draft-00/schema#") { | ||
| return {{dialect, true}}; | ||
| const auto known = dialect_to_known(dialect); | ||
| if (known.has_value()) { | ||
| return Vocabularies{{known.value(), true}}; | ||
| } | ||
| return Vocabularies{{dialect, true}}; | ||
| } | ||
|
|
||
| /* | ||
|
|
@@ -394,7 +448,11 @@ auto sourcemeta::core::vocabularies(const SchemaResolver &resolver, | |
| base_dialect == "http://json-schema.org/draft-02/hyper-schema#" || | ||
| base_dialect == "http://json-schema.org/draft-01/hyper-schema#" || | ||
| base_dialect == "http://json-schema.org/draft-00/hyper-schema#") { | ||
| return {{base_dialect, true}}; | ||
| const auto known = dialect_to_known(base_dialect); | ||
| if (known.has_value()) { | ||
| return Vocabularies{{known.value(), true}}; | ||
| } | ||
| return Vocabularies{{base_dialect, true}}; | ||
| } | ||
|
|
||
| /* | ||
|
|
@@ -422,25 +480,28 @@ auto sourcemeta::core::vocabularies(const SchemaResolver &resolver, | |
| */ | ||
|
|
||
| Vocabularies result; | ||
| const std::string core{core_vocabulary(base_dialect)}; | ||
| const auto core{core_vocabulary_known(base_dialect)}; | ||
| if (schema_dialect.defines("$vocabulary")) { | ||
| const sourcemeta::core::JSON &vocabularies{ | ||
| schema_dialect.at("$vocabulary")}; | ||
| assert(vocabularies.is_object()); | ||
| for (const auto &entry : vocabularies.as_object()) { | ||
| result.insert({entry.first, entry.second.to_boolean()}); | ||
| result.insert(entry.first, entry.second.to_boolean()); | ||
| } | ||
| } else { | ||
| result.insert({core, true}); | ||
| result.insert(core, true); | ||
| } | ||
|
|
||
| // The specification recommends these checks | ||
| if (!result.contains(core)) { | ||
|
Member
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Can we take advantage of your bitsets here? See that we get the core vocabulary for a dialect using |
||
| throw sourcemeta::core::SchemaError( | ||
| "The core vocabulary must always be present"); | ||
| } else if (!result.at(core)) { | ||
| throw sourcemeta::core::SchemaError( | ||
| "The core vocabulary must always be required"); | ||
| } else { | ||
| const auto core_status{result.get(core)}; | ||
| if (core_status.has_value() && !core_status.value()) { | ||
| throw sourcemeta::core::SchemaError( | ||
| "The core vocabulary must always be required"); | ||
| } | ||
| } | ||
|
|
||
| return result; | ||
|
|
||
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Please add default copy/move constructors/assignment operators too!