|
| 1 | +// Standalone vocabulary benchmark |
| 2 | +// Compares bitset-based vocabulary implementation with baseline unordered_map |
| 3 | + |
| 4 | +#include "src/core/jsonschema/include/sourcemeta/core/jsonschema_vocabularies.h" |
| 5 | +#include "src/core/json/include/sourcemeta/core/json.h" |
| 6 | + |
| 7 | +#include <chrono> |
| 8 | +#include <iostream> |
| 9 | +#include <iomanip> |
| 10 | +#include <unordered_map> |
| 11 | +#include <string> |
| 12 | +#include <vector> |
| 13 | + |
| 14 | +using namespace sourcemeta::core; |
| 15 | + |
| 16 | +// Baseline: Original unordered_map implementation |
| 17 | +using VocabulariesBaseline = std::unordered_map<std::string, bool>; |
| 18 | + |
| 19 | +// Test vocabularies (mix of known and custom) |
| 20 | +const std::vector<std::pair<std::string, bool>> TEST_VOCABULARIES = { |
| 21 | + // Known vocabularies |
| 22 | + {"https://json-schema.org/draft/2020-12/vocab/core", true}, |
| 23 | + {"https://json-schema.org/draft/2020-12/vocab/applicator", true}, |
| 24 | + {"https://json-schema.org/draft/2020-12/vocab/validation", true}, |
| 25 | + {"https://json-schema.org/draft/2020-12/vocab/meta-data", true}, |
| 26 | + {"https://json-schema.org/draft/2019-09/vocab/core", true}, |
| 27 | + {"https://json-schema.org/draft/2019-09/vocab/applicator", true}, |
| 28 | + {"http://json-schema.org/draft-07/schema#", true}, |
| 29 | + {"http://json-schema.org/draft-06/schema#", true}, |
| 30 | + {"http://json-schema.org/draft-04/schema#", true}, |
| 31 | + // Custom vocabularies |
| 32 | + {"https://example.com/custom/vocab1", true}, |
| 33 | + {"https://example.com/custom/vocab2", false}, |
| 34 | + {"https://my-org.com/schemas/v1", true} |
| 35 | +}; |
| 36 | + |
| 37 | +// Frequently looked up vocabularies (80% hits) |
| 38 | +const std::vector<std::string> LOOKUP_URIS = { |
| 39 | + "https://json-schema.org/draft/2020-12/vocab/core", |
| 40 | + "https://json-schema.org/draft/2020-12/vocab/applicator", |
| 41 | + "https://json-schema.org/draft/2020-12/vocab/validation", |
| 42 | + "https://json-schema.org/draft/2019-09/vocab/core", |
| 43 | + "http://json-schema.org/draft-07/schema#", |
| 44 | + "https://json-schema.org/draft/2020-12/vocab/core", // duplicate for cache effects |
| 45 | + "https://example.com/custom/vocab1", |
| 46 | + "https://json-schema.org/draft/2020-12/vocab/meta-data", |
| 47 | + "http://json-schema.org/draft-04/schema#", |
| 48 | + "https://json-schema.org/draft/2019-09/vocab/applicator" |
| 49 | +}; |
| 50 | + |
| 51 | +template <typename Func> |
| 52 | +double benchmark(const std::string& name, Func func, int iterations = 100000) { |
| 53 | + auto start = std::chrono::high_resolution_clock::now(); |
| 54 | + func(iterations); |
| 55 | + auto end = std::chrono::high_resolution_clock::now(); |
| 56 | + |
| 57 | + double ms = std::chrono::duration<double, std::milli>(end - start).count(); |
| 58 | + std::cout << std::setw(50) << std::left << name |
| 59 | + << std::setw(12) << std::right << std::fixed << std::setprecision(3) |
| 60 | + << ms << " ms" |
| 61 | + << std::setw(15) << std::right << (ms / iterations * 1000000) << " ns/op" |
| 62 | + << std::endl; |
| 63 | + return ms; |
| 64 | +} |
| 65 | + |
| 66 | +void benchmark_insert() { |
| 67 | + std::cout << "\n=== INSERT BENCHMARK ===" << std::endl; |
| 68 | + |
| 69 | + double baseline_time = benchmark("Baseline (unordered_map) insert", [](int iterations) { |
| 70 | + for (int i = 0; i < iterations; i++) { |
| 71 | + VocabulariesBaseline vocabs; |
| 72 | + for (const auto& entry : TEST_VOCABULARIES) { |
| 73 | + vocabs.insert(entry); |
| 74 | + } |
| 75 | + } |
| 76 | + }); |
| 77 | + |
| 78 | + double optimized_time = benchmark("Optimized (bitset) insert", [](int iterations) { |
| 79 | + for (int i = 0; i < iterations; i++) { |
| 80 | + Vocabularies vocabs; |
| 81 | + for (const auto& entry : TEST_VOCABULARIES) { |
| 82 | + vocabs.insert(entry); |
| 83 | + } |
| 84 | + } |
| 85 | + }); |
| 86 | + |
| 87 | + double improvement = ((baseline_time - optimized_time) / baseline_time) * 100.0; |
| 88 | + std::cout << "Improvement: " << std::fixed << std::setprecision(1) |
| 89 | + << improvement << "%" << std::endl; |
| 90 | +} |
| 91 | + |
| 92 | +void benchmark_lookup() { |
| 93 | + std::cout << "\n=== LOOKUP BENCHMARK (contains) ===" << std::endl; |
| 94 | + |
| 95 | + // Prepare data |
| 96 | + VocabulariesBaseline baseline; |
| 97 | + for (const auto& entry : TEST_VOCABULARIES) { |
| 98 | + baseline.insert(entry); |
| 99 | + } |
| 100 | + |
| 101 | + Vocabularies optimized; |
| 102 | + for (const auto& entry : TEST_VOCABULARIES) { |
| 103 | + optimized.insert(entry); |
| 104 | + } |
| 105 | + |
| 106 | + double baseline_time = benchmark("Baseline (unordered_map) contains", [&](int iterations) { |
| 107 | + volatile bool result = false; |
| 108 | + for (int i = 0; i < iterations; i++) { |
| 109 | + for (const auto& uri : LOOKUP_URIS) { |
| 110 | + auto it = baseline.find(uri); |
| 111 | + result = (it != baseline.end() && it->second); |
| 112 | + } |
| 113 | + } |
| 114 | + }); |
| 115 | + |
| 116 | + double optimized_time = benchmark("Optimized (bitset) contains", [&](int iterations) { |
| 117 | + volatile bool result = false; |
| 118 | + for (int i = 0; i < iterations; i++) { |
| 119 | + for (const auto& uri : LOOKUP_URIS) { |
| 120 | + result = optimized.contains(uri); |
| 121 | + } |
| 122 | + } |
| 123 | + }); |
| 124 | + |
| 125 | + double improvement = ((baseline_time - optimized_time) / baseline_time) * 100.0; |
| 126 | + std::cout << "Improvement: " << std::fixed << std::setprecision(1) |
| 127 | + << improvement << "%" << std::endl; |
| 128 | +} |
| 129 | + |
| 130 | +void benchmark_find() { |
| 131 | + std::cout << "\n=== FIND BENCHMARK (with status check) ===" << std::endl; |
| 132 | + |
| 133 | + // Prepare data |
| 134 | + VocabulariesBaseline baseline; |
| 135 | + for (const auto& entry : TEST_VOCABULARIES) { |
| 136 | + baseline.insert(entry); |
| 137 | + } |
| 138 | + |
| 139 | + Vocabularies optimized; |
| 140 | + for (const auto& entry : TEST_VOCABULARIES) { |
| 141 | + optimized.insert(entry); |
| 142 | + } |
| 143 | + |
| 144 | + double baseline_time = benchmark("Baseline (unordered_map) find", [&](int iterations) { |
| 145 | + volatile bool result = false; |
| 146 | + for (int i = 0; i < iterations; i++) { |
| 147 | + for (const auto& uri : LOOKUP_URIS) { |
| 148 | + auto it = baseline.find(uri); |
| 149 | + if (it != baseline.end()) { |
| 150 | + result = it->second; |
| 151 | + } |
| 152 | + } |
| 153 | + } |
| 154 | + }); |
| 155 | + |
| 156 | + double optimized_time = benchmark("Optimized (bitset) find", [&](int iterations) { |
| 157 | + volatile bool result = false; |
| 158 | + for (int i = 0; i < iterations; i++) { |
| 159 | + for (const auto& uri : LOOKUP_URIS) { |
| 160 | + auto status = optimized.find(uri); |
| 161 | + if (status.has_value()) { |
| 162 | + result = status.value(); |
| 163 | + } |
| 164 | + } |
| 165 | + } |
| 166 | + }); |
| 167 | + |
| 168 | + double improvement = ((baseline_time - optimized_time) / baseline_time) * 100.0; |
| 169 | + std::cout << "Improvement: " << std::fixed << std::setprecision(1) |
| 170 | + << improvement << "%" << std::endl; |
| 171 | +} |
| 172 | + |
| 173 | +void benchmark_merge() { |
| 174 | + std::cout << "\n=== MERGE BENCHMARK ===" << std::endl; |
| 175 | + |
| 176 | + double baseline_time = benchmark("Baseline (unordered_map) merge", [](int iterations) { |
| 177 | + for (int i = 0; i < iterations; i++) { |
| 178 | + VocabulariesBaseline vocabs1; |
| 179 | + VocabulariesBaseline vocabs2; |
| 180 | + |
| 181 | + for (size_t j = 0; j < TEST_VOCABULARIES.size() / 2; j++) { |
| 182 | + vocabs1.insert(TEST_VOCABULARIES[j]); |
| 183 | + } |
| 184 | + for (size_t j = TEST_VOCABULARIES.size() / 2; j < TEST_VOCABULARIES.size(); j++) { |
| 185 | + vocabs2.insert(TEST_VOCABULARIES[j]); |
| 186 | + } |
| 187 | + |
| 188 | + vocabs1.merge(vocabs2); |
| 189 | + } |
| 190 | + }); |
| 191 | + |
| 192 | + double optimized_time = benchmark("Optimized (bitset) merge", [](int iterations) { |
| 193 | + for (int i = 0; i < iterations; i++) { |
| 194 | + Vocabularies vocabs1; |
| 195 | + Vocabularies vocabs2; |
| 196 | + |
| 197 | + for (size_t j = 0; j < TEST_VOCABULARIES.size() / 2; j++) { |
| 198 | + vocabs1.insert(TEST_VOCABULARIES[j]); |
| 199 | + } |
| 200 | + for (size_t j = TEST_VOCABULARIES.size() / 2; j < TEST_VOCABULARIES.size(); j++) { |
| 201 | + vocabs2.insert(TEST_VOCABULARIES[j]); |
| 202 | + } |
| 203 | + |
| 204 | + vocabs1.merge(vocabs2); |
| 205 | + } |
| 206 | + }); |
| 207 | + |
| 208 | + double improvement = ((baseline_time - optimized_time) / baseline_time) * 100.0; |
| 209 | + std::cout << "Improvement: " << std::fixed << std::setprecision(1) |
| 210 | + << improvement << "%" << std::endl; |
| 211 | +} |
| 212 | + |
| 213 | +void verify_correctness() { |
| 214 | + std::cout << "\n=== CORRECTNESS VERIFICATION ===" << std::endl; |
| 215 | + |
| 216 | + Vocabularies vocabs; |
| 217 | + |
| 218 | + // Test insert and contains |
| 219 | + vocabs.insert({"https://json-schema.org/draft/2020-12/vocab/core", true}); |
| 220 | + vocabs.insert({"https://json-schema.org/draft/2019-09/vocab/format", false}); |
| 221 | + vocabs.insert({"https://example.com/custom", true}); |
| 222 | + |
| 223 | + std::cout << "✓ Insert: 3 vocabularies added" << std::endl; |
| 224 | + |
| 225 | + // Test contains |
| 226 | + bool test1 = vocabs.contains("https://json-schema.org/draft/2020-12/vocab/core"); |
| 227 | + bool test2 = !vocabs.contains("https://json-schema.org/draft/2019-09/vocab/format"); |
| 228 | + bool test3 = vocabs.contains("https://example.com/custom"); |
| 229 | + bool test4 = !vocabs.contains("https://non-existent.com/vocab"); |
| 230 | + |
| 231 | + if (test1 && test2 && test3 && test4) { |
| 232 | + std::cout << "✓ Contains: All lookups correct" << std::endl; |
| 233 | + } else { |
| 234 | + std::cout << "✗ Contains: FAILED" << std::endl; |
| 235 | + } |
| 236 | + |
| 237 | + // Test find |
| 238 | + auto found1 = vocabs.find("https://json-schema.org/draft/2020-12/vocab/core"); |
| 239 | + auto found2 = vocabs.find("https://json-schema.org/draft/2019-09/vocab/format"); |
| 240 | + auto found3 = vocabs.find("https://non-existent.com/vocab"); |
| 241 | + |
| 242 | + bool find_test = found1.has_value() && found1.value() == true && |
| 243 | + found2.has_value() && found2.value() == false && |
| 244 | + !found3.has_value(); |
| 245 | + |
| 246 | + if (find_test) { |
| 247 | + std::cout << "✓ Find: All queries correct" << std::endl; |
| 248 | + } else { |
| 249 | + std::cout << "✗ Find: FAILED" << std::endl; |
| 250 | + } |
| 251 | + |
| 252 | + // Test merge |
| 253 | + Vocabularies vocabs2; |
| 254 | + vocabs2.insert({"https://json-schema.org/draft/2020-12/vocab/applicator", true}); |
| 255 | + vocabs2.insert({"https://example.com/custom2", false}); |
| 256 | + |
| 257 | + vocabs.merge(vocabs2); |
| 258 | + |
| 259 | + bool merge_test = vocabs.contains("https://json-schema.org/draft/2020-12/vocab/applicator") && |
| 260 | + !vocabs.contains("https://example.com/custom2"); |
| 261 | + |
| 262 | + if (merge_test) { |
| 263 | + std::cout << "✓ Merge: Vocabularies merged correctly" << std::endl; |
| 264 | + } else { |
| 265 | + std::cout << "✗ Merge: FAILED" << std::endl; |
| 266 | + } |
| 267 | + |
| 268 | + // Test all_vocabularies |
| 269 | + auto all = vocabs.all_vocabularies(); |
| 270 | + std::cout << "✓ All vocabularies: " << all.size() << " entries" << std::endl; |
| 271 | +} |
| 272 | + |
| 273 | +int main() { |
| 274 | + std::cout << "========================================" << std::endl; |
| 275 | + std::cout << " VOCABULARY OPTIMIZATION BENCHMARK" << std::endl; |
| 276 | + std::cout << "========================================" << std::endl; |
| 277 | + |
| 278 | + verify_correctness(); |
| 279 | + benchmark_insert(); |
| 280 | + benchmark_lookup(); |
| 281 | + benchmark_find(); |
| 282 | + benchmark_merge(); |
| 283 | + |
| 284 | + std::cout << "\n========================================" << std::endl; |
| 285 | + std::cout << " BENCHMARK COMPLETE" << std::endl; |
| 286 | + std::cout << "========================================" << std::endl; |
| 287 | + |
| 288 | + return 0; |
| 289 | +} |
0 commit comments