diff --git a/bench/various_appraches_bench.rb b/bench/various_appraches_bench.rb new file mode 100644 index 00000000..727d16d8 --- /dev/null +++ b/bench/various_appraches_bench.rb @@ -0,0 +1,770 @@ +#!/usr/bin/env ruby +# frozen_string_literal: true + +require "bundler/inline" + +gemfile(true) do + source "https://rubygems.org" + if ENV["MSGPACK_PATH"] + gem "msgpack", path: ENV["MSGPACK_PATH"] + else + gem "msgpack", git: "https://github.com/Shopify/msgpack-ruby.git", branch: "gm/ref-tracking" + end + gem "benchmark-ips" +end + +require "msgpack" +require "benchmark" + +RubyVM::YJIT.enable + +begin + factory = MessagePack::Factory.new + test_struct = Struct.new(:a, keyword_init: true) + factory.register_type(0x01, test_struct, optimized_struct: true) + obj = test_struct.new(a: 1) + arr = [obj, obj, obj] + dump = factory.dump(arr) + loaded = factory.load(dump) +rescue StandardError + puts "ERROR: msgpack-ruby does not support optimized_struct; the benchmark cannot run." + exit(2) +end + +# +-----------------+ +# | Product | +# +-----------------+ +# | | .variants +# | .options |----------------+------------------------------------| +# v v v +# +----------------+ +--------------------+ +--------------------+ +# | ProductOption |----->| ProductVariant[1] | | ProductVariant[2] | +# | (e.g. "Color") | +--------------------+ +--------------------+ +# +----------------+ | ^ | ^ +# | | .options | .selling_plans | .options | .selling_plans +# | .values | | | | +# v v | v | +# +----------------------+<--+ | +----------------------+ | +# | ProductOptionValue A | (SHARED) | (SHARED) | ProductOptionValue B | | +# | (e.g. "Red") | | | (e.g. "Blue") | | +# +----------------------+ | +----------------------+ | +# | | +# | | +# +----------------------+ | | +# | SellingPlanGroup X |---------------+------------------------------------------| +# +----------------------+ +# | +# | .selling_plans +# v +# +----------------------+ +# | SellingPlan[1] | (SHARED, referenced by ProductVariant) +# +----------------------+ +# Metafield - matches real DataApi::Messages::Metafield (12 fields) +Metafield = Struct.new( + :id, + :namespace, + :key, + :value, + :type, + :value_type, + :definition_id, + :owner_type, + :owner_id, + :created_at, + :updated_at, + :original_type, + keyword_init: true, +) + +SellingPlanPriceAdjustment = Struct.new( + :order_count, + :position, + :value_type, + :value, + keyword_init: true, +) + +SellingPlanOption = Struct.new( + :name, + :position, + :value, + keyword_init: true, +) + +SellingPlanCheckoutCharge = Struct.new( + :value_type, + :value, + keyword_init: true, +) + +# SellingPlan is SHARED - the same selling plan instance is referenced by +# multiple products and variants that offer the same subscription option +SellingPlan = Struct.new( + :id, + :name, + :description, + :recurring_deliveries, + :options, + :price_adjustments, + :checkout_charge, + keyword_init: true, +) + +SellingPlanGroup = Struct.new( + :id, + :name, + :options, + :selling_plans, + keyword_init: true, +) + +# ProductOptionValue is SHARED - the same option value instance appears in +# both product.options[].values AND variant.options +ProductOptionValue = Struct.new( + :id, + :name, + :position, + :swatch_color, + keyword_init: true, +) + +ProductOption = Struct.new( + :id, + :name, + :position, + :values, + keyword_init: true, +) + +# ProductVariant - matches real ProductLoader::Messages::ProductVariant (37 fields) +# Many fields are nil in typical use, which affects serialization size +ProductVariant = Struct.new( + :id, + :product_id, + :title, + :uncontextualized_title, + :price, + :compare_at_price, + :barcode, + :options, # References SHARED ProductOptionValue objects + :option1, + :option2, + :option1_id, + :option2_id, + :parent_option_value_ids, + :taxable, + :unit_price_measurement, + :position, + :created_at, + :updated_at, + :fulfillment_service, + :requires_components, + :inventory_management, + :inventory_policy, + :weight_unit, + :weight_value, + :sku, + :requires_shipping, + :selling_plans, # References SHARED SellingPlan objects + :metafields, + :variant_unit_price_measurement, + keyword_init: true, +) + +# Product - matches real ProductLoader::Messages::Product (28 fields) +# Many fields are nil in typical use, which affects serialization size +Product = Struct.new( + :id, + :title, + :handle, + :description, + :type, + :vendor, + :published_at, + :created_at, + :updated_at, + :template_suffix, + :gift_card, + :is_published, + :requires_selling_plan, + :published_scope, + :variants, + :options, # Contains SHARED ProductOptionValue objects + :selling_plan_groups, # Contains SHARED SellingPlan objects + :metafields, + keyword_init: true, +) + +ALL_STRUCTS = [ + Metafield, + SellingPlanPriceAdjustment, + SellingPlanOption, + SellingPlanCheckoutCharge, + SellingPlan, + SellingPlanGroup, + ProductOptionValue, + ProductOption, + ProductVariant, + Product, +].freeze + +# Struct types that are shared and benefit from ref_tracking +# - ProductOptionValue: shared between product.options[].values and variant.options +# - SellingPlan: shared between product.selling_plan_groups[].selling_plans and variant.selling_plans +# - SellingPlanGroup: can be shared across multiple products +# - ProductVariant: shared in combined listings +SHARED_STRUCTS = [ + SellingPlan, + SellingPlanGroup, + ProductOptionValue, + ProductVariant, +].to_set.freeze + +module CodeGen + def self.build_tracked_packer(struct) + packer_body = struct.members.map { |m| "packer.write(obj.#{m})" }.join("; ") + + eval(<<~RUBY, binding, __FILE__, __LINE__ + 1) + ->(obj, packer) { + tracker = packer.ref_tracker + ref_id = tracker[obj.__id__] + if ref_id + packer.write(ref_id) + else + tracker[obj.__id__] = tracker.size + 1 + packer.write(nil) + #{packer_body} + end + } + RUBY + end + + def self.build_untracked_packer(struct) + packer_body = struct.members.map { |m| "packer.write(obj.#{m})" }.join("; ") + eval(<<~RUBY, binding, __FILE__, __LINE__ + 1) + ->(obj, packer) { + #{packer_body} + } + RUBY + end + + def self.build_tracked_unpacker(struct) + args = struct.members.map { |m| "#{m}: unpacker.read" }.join(", ") + + eval(<<~RUBY, binding, __FILE__, __LINE__ + 1) + ->(unpacker) { + ref_id = unpacker.read + if ref_id + unpacker.ref_tracker[ref_id - 1] + else + tracker = unpacker.ref_tracker + idx = tracker.size + tracker << (obj = #{struct}.allocate) + obj.send(:initialize, #{args}) + obj + end + } + RUBY + end + + def self.build_untracked_unpacker(struct) + args = struct.members.map { |m| "#{m}: unpacker.read" }.join(", ") + eval(<<~RUBY, binding, __FILE__, __LINE__ + 1) + ->(unpacker) { + #{struct}.new(#{args}) + } + RUBY + end +end + +module Coders + def self.register_time(factory) + factory.register_type( + MessagePack::Timestamp::TYPE, + Time, + packer: MessagePack::Time::Packer, + unpacker: MessagePack::Time::Unpacker, + ) + end + + module Marshal + def self.factory + ::Marshal + end + + def self.description + "Marshal is what we currently use and is the baseline for comparison" + end + end + + module PlainRubyNaive + def self.build_factory + factory = MessagePack::Factory.new + Coders.register_time(factory) + + type_id = 0x20 + ALL_STRUCTS.each do |struct| + factory.register_type( + type_id, + struct, + packer: CodeGen.build_untracked_packer(struct), + unpacker: CodeGen.build_untracked_unpacker(struct), + recursive: true, + ) + type_id += 1 + end + + factory + end + + def self.factory + @factory ||= build_factory + end + + def self.description + "A naive pure-Ruby coder without any optimizations to the msgpack-ruby gem, and no ref_tracking." + end + end + + module PlainRubyArrays + def self.build_factory + factory = MessagePack::Factory.new + Coders.register_time(factory) + + type_id = 0x20 + ALL_STRUCTS.each do |struct| + unpacker = eval(<<~RUBY, binding, __FILE__, __LINE__ + 1) + -> (unpacker) { + #{struct.members.join(", ")} = unpacker.read + #{struct}.new(#{struct.members.map{ |m| "#{m}: #{m}" }.join(", ")}) + } + RUBY + + factory.register_type( + type_id, + struct, + packer: -> (obj, packer) { + packer.write(obj.to_a) + }, + unpacker:, + recursive: true, + ) + type_id += 1 + end + + factory + end + + def self.factory + @factory ||= build_factory + end + + def self.description + "A pure-Ruby coder that uses array-based unpacking without ref_tracking." + end + end + + module PlainRubyRefTracking + def self.build_factory + factory = MessagePack::Factory.new + Coders.register_time(factory) + + type_id = 0x20 + ALL_STRUCTS.each do |struct| + factory.register_type( + type_id, + struct, + packer: CodeGen.build_tracked_packer(struct), + unpacker: CodeGen.build_tracked_unpacker(struct), + recursive: true, + ) + type_id += 1 + end + + factory + end + + def self.factory + @factory ||= build_factory + end + + def self.description + "A pure-Ruby coder without any optimizations to the msgpack-ruby gem, but with reference tracking implemented in Ruby." + end + end + + module OptimizedStruct + def self.build_factory + factory = MessagePack::Factory.new + Coders.register_time(factory) + + type_id = 0x20 + ALL_STRUCTS.each do |struct| + factory.register_type(type_id, struct, optimized_struct: true) + type_id += 1 + end + + factory + end + + def self.factory + @factory ||= build_factory + end + + def self.description + "Uses msgpack-ruby's optimized_struct without ref_tracking. The lack of ref_tracking can lead to size bloat." + end + end + + module HybridRuby + module PackerExt + def ref_tracker + @ref_tracker ||= {} + end + end + + module UnpackerExt + def ref_tracker + @ref_tracker ||= [] + end + end + + MessagePack::Packer.include(PackerExt) + MessagePack::Unpacker.include(UnpackerExt) + + def self.build_factory + factory = MessagePack::Factory.new + Coders.register_time(factory) + + type_id = 0x20 + ALL_STRUCTS.each do |struct| + if SHARED_STRUCTS.include?(struct) + # Ruby callbacks with ref_tracking for shared types + factory.register_type( + type_id, + struct, + packer: CodeGen.build_tracked_packer(struct), + unpacker: CodeGen.build_tracked_unpacker(struct), + recursive: true, + ) + else + # C-level optimized_struct for non-shared types + factory.register_type(type_id, struct, optimized_struct: true) + end + type_id += 1 + end + + factory + end + + def self.factory + @factory ||= build_factory + end + + def self.description + "Uses optimized_struct for non-referenced Structs, and Ruby-level reference tracking for shared types" + end + end + +end + +# ============================================================================= +# Data Generation +# ============================================================================= + +def create_selling_plan(id:) + SellingPlan.new( + id: id, + name: "Subscribe & Save #{id}", + description: "Save 10% with a subscription", + recurring_deliveries: true, + options: [ + SellingPlanOption.new(name: "Delivery Frequency", position: 1, value: "1 Month"), + ], + price_adjustments: [ + SellingPlanPriceAdjustment.new(order_count: nil, position: 1, value_type: "percentage", value: 10), + ], + checkout_charge: SellingPlanCheckoutCharge.new(value_type: "percentage", value: 100), + ) +end + +def create_selling_plan_group(id:, selling_plans:) + SellingPlanGroup.new( + id: id, + name: "Subscription Group #{id}", + options: [{ name: "Delivery Frequency", position: 1, values: ["1 Month", "2 Months"] }], + selling_plans: selling_plans, + ) +end + +def create_metafields(owner_id:, count:, owner_type:) + # Match real-world metafield characteristics: + # - Short, often repeated type values + # - Some nil fields (definition_id, original_type) + # - Relatively short values + (1..count).map do |i| + Metafield.new( + id: owner_id * 1000 + i, + namespace: "custom", + key: "field_#{i}", + value: "Value #{i}", + type: "single_line_text_field", # this should be an enum + value_type: "string", + definition_id: nil, + owner_type: owner_type, + owner_id: owner_id, + created_at: Time.now, + updated_at: Time.now, + original_type: nil, + ) + end +end + +def create_product(id:, num_variants:, num_options:, selling_plan_groups:, num_product_metafields:, num_variant_metafields:) + # Create shared option values + option_values_by_option = {} + options = (1..num_options).map do |opt_idx| + values = (1..3).map do |val_idx| + ProductOptionValue.new( + id: id * 1000 + opt_idx * 100 + val_idx, + name: "Option#{opt_idx} Value#{val_idx}", + position: val_idx, + swatch_color: nil, # Most products don't have swatch colors + ) + end + option_values_by_option[opt_idx] = values + + ProductOption.new( + id: id * 100 + opt_idx, + name: "Option #{opt_idx}", + position: opt_idx, + values: values, + ) + end + + # Create variants that SHARE the option values + variants = (1..num_variants).map do |var_idx| + # Each variant references the SAME ProductOptionValue objects + variant_options = option_values_by_option.values.map { |vals| vals.sample } + + # Variants also SHARE the selling plans + variant_selling_plans = selling_plan_groups.flat_map(&:selling_plans) + + # Match real ProductVariant structure with some nil fields (sparse data) + ProductVariant.new( + id: id * 1000 + var_idx, + product_id: id, + title: "Variant #{var_idx}", + uncontextualized_title: nil, + price: 1999 + var_idx * 100, + compare_at_price: nil, # Most variants don't have compare_at_price + barcode: nil, # Most variants don't have barcodes + options: variant_options, + option1: variant_options[0]&.name, + option2: variant_options[1]&.name, + option1_id: variant_options[0]&.id, + option2_id: variant_options[1]&.id, + taxable: true, + position: var_idx, + created_at: Time.now, + updated_at: Time.now, + fulfillment_service: "manual", + requires_components: false, + inventory_management: "shopify", + inventory_policy: "deny", + weight_unit: "kg", + weight_value: nil, + sku: "SKU-#{id}-#{var_idx}", + requires_shipping: true, + selling_plans: variant_selling_plans, + metafields: create_metafields(owner_id: id * 1000 + var_idx, count: num_variant_metafields, owner_type: "ProductVariant"), + variant_unit_price_measurement: nil, + ) + end + + # Match real Product structure with some nil fields (sparse data) + Product.new( + id: id, + title: "Product #{id}", + handle: "product-#{id}", + description: "Description for product #{id}", + vendor: "Vendor", + published_at: Time.now, + created_at: Time.now, + updated_at: Time.now, + template_suffix: nil, + gift_card: false, + is_published: true, + requires_selling_plan: selling_plan_groups.any?, + published_scope: :published_scope_global, + variants: variants, + options: options, + selling_plan_groups: selling_plan_groups, + metafields: create_metafields(owner_id: id, count: num_product_metafields, owner_type: "Product"), + ) +end + +def create_test_data(num_products:, num_variants:, num_selling_plan_groups:, num_selling_plans_per_group:, num_product_metafields: 0, num_variant_metafields: 0) + # Create SHARED selling plans - same instances used across all products + selling_plan_id = 1 + selling_plan_groups = (1..num_selling_plan_groups).map do |group_idx| + selling_plans = (1..num_selling_plans_per_group).map do + plan = create_selling_plan(id: selling_plan_id) + selling_plan_id += 1 + plan + end + create_selling_plan_group(id: group_idx, selling_plans: selling_plans) + end + + # Create products that share the same selling plan groups + products = (1..num_products).map do |product_id| + create_product( + id: product_id, + num_variants: num_variants, + num_options: 2, + selling_plan_groups: selling_plan_groups, + num_product_metafields: num_product_metafields, + num_variant_metafields: num_variant_metafields, + ) + end + + products +end + +# ============================================================================= +# Benchmark +# ============================================================================= + +SCENARIOS = [ + # Baseline: minimal + { products: 1, variants: 1, product_metafields: 0, variant_metafields: 0, spg: 0, sp: 0 }, + + # Scale products + { products: 50, variants: 1, product_metafields: 0, variant_metafields: 0, spg: 0, sp: 0 }, + + # Scale variants + { products: 10, variants: 50, product_metafields: 0, variant_metafields: 0, spg: 0, sp: 0 }, + + # Scale product metafields + { products: 10, variants: 5, product_metafields: 20, variant_metafields: 0, spg: 0, sp: 0 }, + + # Scale variant metafields + { products: 10, variants: 5, product_metafields: 0, variant_metafields: 10, spg: 0, sp: 0 }, + + # Scale selling plans for ref_tracking + { products: 10, variants: 5, product_metafields: 0, variant_metafields: 0, spg: 1, sp: 2 }, + { products: 10, variants: 5, product_metafields: 0, variant_metafields: 0, spg: 3, sp: 5 }, + + # Combined + { products: 10, variants: 10, product_metafields: 5, variant_metafields: 3, spg: 1, sp: 2 }, + { products: 50, variants: 250, product_metafields: 20, variant_metafields: 10, spg: 3, sp: 5 }, +].freeze + +Report = Struct.new(:scenario, :bench_results, :bytesize_results, keyword_init: true) + +def run_benchmark(coders, scenario) + data = create_test_data( + num_products: scenario[:products], + num_variants: scenario[:variants], + num_selling_plan_groups: scenario[:spg], + num_selling_plans_per_group: scenario[:sp], + num_product_metafields: scenario[:product_metafields] || 0, + num_variant_metafields: scenario[:variant_metafields] || 0, + ) + + puts "\nBenchmarking scenario: P:#{scenario[:products]} V:#{scenario[:variants]} PM:#{scenario[:product_metafields]} VM:#{scenario[:variant_metafields]} SPG:#{scenario[:spg]} SP:#{scenario[:sp]}" + + payloads = coders.map { |coder| coder.factory.dump(data) } + + result = Benchmark.ips(quiet: true) do |x| + coders.each.with_index do |coder, index| + x.report(coder.name.split("::").last) do + coder.factory.load(payloads[index]) + end + end + end + + marshal_ips = result.entries.first.stats.central_tendency + bench_results = result.entries.map.with_index do |entry, index| + entry_ips = entry.stats.central_tendency + speedup = entry_ips / marshal_ips + [coders[index], speedup] + end.to_h + + bytesize_results = coders.each_with_index.map do |coder, index| + [coder, payloads[index].bytesize] + end.to_h + + Report.new( + scenario:, + bench_results:, + bytesize_results:, + ) +end + +BOLD = "\e[1m" +RESET = "\e[0m" + +def print_results(coders, reports) + puts "=" * 80 + puts "BENCHMARK: Decode Performance & Size Comparison" + puts "=" * 80 + puts "Coders:" + coders.each do |coder| + puts " - #{coder.name.split("::").last}: #{coder.description}" + end + puts + + reports.each do |report| + s = report.scenario + scenario_str = "P:#{s[:products]} V:#{s[:variants]} PM:#{s[:product_metafields]} VM:#{s[:variant_metafields]} SPG:#{s[:spg]} SP:#{s[:sp]}" + + sorted_results = report.bench_results.sort_by { |_, v| -v } + + marshal_size = report.bytesize_results[Coders::Marshal] + + puts "Scenario: #{scenario_str}" + puts "Winner: #{sorted_results.first[0].name.split("::").last} with #{'%.2f' % sorted_results.first[1]}x speedup" + linesize = 56 + puts "-" * linesize + puts format("%-20s %12s %10s %10s", "Coder", "Size (bytes)", "Size (%)", "Speedup") + puts "-" * linesize + + sorted_results.each do |result| + coder, speedup = result + size = report.bytesize_results[coder] + size_pct = (size.to_f / marshal_size) * 100 + coder_name = coder.name.split("::").last + + line = format("%-20s %12d %9.1f%% %9.2fx", coder_name, size, size_pct, speedup) + if coder == Coders::Marshal + puts "#{BOLD}#{line}#{RESET}" + else + puts line + end + end + + puts "=" * linesize + puts + end +end + +# ============================================================================= +# Main +# ============================================================================= + +puts "Running benchmarks..." +puts + +coders = Coders.constants.map { |c| Coders.const_get(c) } +# Always put Marshal first for baseline comparison +coders.delete(Coders::Marshal) +coders.unshift(Coders::Marshal) + +results = SCENARIOS.map.with_index do |scenario, i| + print "\r[#{i + 1}/#{SCENARIOS.size}] Testing: P:#{scenario[:products]} V:#{scenario[:variants]} PM:#{scenario[:product_metafields]} VM:#{scenario[:variant_metafields]} SPG:#{scenario[:spg]} SP:#{scenario[:sp]}..." + run_benchmark(coders, scenario) +end +puts "\r" + " " * 80 + "\r" + +print_results(coders, results) diff --git a/ext/msgpack/factory_class.c b/ext/msgpack/factory_class.c index 1eeb0081..eb3b81a3 100644 --- a/ext/msgpack/factory_class.c +++ b/ext/msgpack/factory_class.c @@ -250,6 +250,27 @@ static VALUE Factory_register_type_internal(VALUE self, VALUE rb_ext_type, VALUE if(RTEST(rb_hash_aref(options, ID2SYM(rb_intern("recursive"))))) { flags |= MSGPACK_EXT_RECURSIVE; } + + /* optimized_struct: true enables C-level fast path for Struct types */ + if (RTEST(rb_hash_aref(options, ID2SYM(rb_intern("optimized_struct"))))) { + /* Verify it's actually a Struct subclass */ + if (!(rb_obj_is_kind_of(ext_module, rb_cClass) && + RTEST(rb_class_inherited_p(ext_module, rb_cStruct)))) { + rb_raise(rb_eArgError, "optimized_struct: true requires a Struct subclass"); + } + /* Verify packer/unpacker procs weren't also provided */ + if (RTEST(packer_proc) || RTEST(unpacker_proc)) { + rb_raise(rb_eArgError, "optimized_struct: true cannot be used with packer or unpacker options"); + } + /* Verify recursive wasn't also provided - optimized_struct handles recursion automatically */ + if (flags & MSGPACK_EXT_RECURSIVE) { + rb_raise(rb_eArgError, "optimized_struct: true cannot be used with recursive option"); + } + flags |= MSGPACK_EXT_STRUCT_FAST_PATH; + /* Store the class itself - C code uses it directly */ + packer_proc = ext_module; + unpacker_proc = ext_module; + } } msgpack_packer_ext_registry_put(self, &fc->pkrg, ext_module, ext_type, flags, packer_proc); diff --git a/ext/msgpack/packer.c b/ext/msgpack/packer.c index 8e8dd966..6beb8029 100644 --- a/ext/msgpack/packer.c +++ b/ext/msgpack/packer.c @@ -91,6 +91,28 @@ void msgpack_packer_write_hash_value(msgpack_packer_t* pk, VALUE v) rb_hash_foreach(v, write_hash_foreach, (VALUE) pk); } +/* Pack a Struct's fields directly using C API, bypassing Ruby callbacks */ +struct msgpack_packer_struct_args_t; +typedef struct msgpack_packer_struct_args_t msgpack_packer_struct_args_t; +struct msgpack_packer_struct_args_t { + msgpack_packer_t* pk; + VALUE v; +}; + +static VALUE msgpack_packer_write_struct_fields_protected(VALUE value) +{ + msgpack_packer_struct_args_t *args = (msgpack_packer_struct_args_t *)value; + msgpack_packer_t* pk = args->pk; + VALUE v = args->v; + + long len = RSTRUCT_LEN(v); + for (int i = 0; i < len; i++) { + VALUE field = RSTRUCT_GET(v, i); + msgpack_packer_write_value(pk, field); + } + return Qnil; +} + struct msgpack_call_proc_args_t; typedef struct msgpack_call_proc_args_t msgpack_call_proc_args_t; struct msgpack_call_proc_args_t { @@ -114,6 +136,33 @@ bool msgpack_packer_try_write_with_ext_type_lookup(msgpack_packer_t* pk, VALUE v return false; } + if(ext_flags & MSGPACK_EXT_STRUCT_FAST_PATH) { + /* Fast path for Struct: directly access fields in C, no Ruby callbacks */ + VALUE held_buffer = MessagePack_Buffer_hold(&pk->buffer); + + msgpack_buffer_t parent_buffer = pk->buffer; + msgpack_buffer_init(PACKER_BUFFER_(pk)); + + /* Write struct fields with exception handling */ + int exception_occured = 0; + msgpack_packer_struct_args_t args = { pk, v }; + rb_protect(msgpack_packer_write_struct_fields_protected, (VALUE)&args, &exception_occured); + + if (exception_occured) { + msgpack_buffer_destroy(PACKER_BUFFER_(pk)); + pk->buffer = parent_buffer; + rb_jump_tag(exception_occured); // re-raise the exception + } else { + VALUE payload = msgpack_buffer_all_as_string(PACKER_BUFFER_(pk)); + msgpack_buffer_destroy(PACKER_BUFFER_(pk)); + pk->buffer = parent_buffer; + msgpack_packer_write_ext(pk, ext_type, payload); + } + + RB_GC_GUARD(held_buffer); + return true; + } + if(ext_flags & MSGPACK_EXT_RECURSIVE) { VALUE held_buffer = MessagePack_Buffer_hold(&pk->buffer); diff --git a/ext/msgpack/packer_ext_registry.h b/ext/msgpack/packer_ext_registry.h index 779f3efa..3b5c60d9 100644 --- a/ext/msgpack/packer_ext_registry.h +++ b/ext/msgpack/packer_ext_registry.h @@ -22,6 +22,7 @@ #include "ruby.h" #define MSGPACK_EXT_RECURSIVE 0b0001 +#define MSGPACK_EXT_STRUCT_FAST_PATH 0b0010 struct msgpack_packer_ext_registry_t; typedef struct msgpack_packer_ext_registry_t msgpack_packer_ext_registry_t; diff --git a/ext/msgpack/unpacker.c b/ext/msgpack/unpacker.c index ca8d0640..b7c14c51 100644 --- a/ext/msgpack/unpacker.c +++ b/ext/msgpack/unpacker.c @@ -231,6 +231,13 @@ static inline int object_complete_ext(msgpack_unpacker_t* uk, int ext_type, VALU VALUE proc = msgpack_unpacker_ext_registry_lookup(uk->ext_registry, ext_type, &ext_flags); if(proc != Qnil) { + /* Handle struct fast path for empty structs (0 fields) */ + if (ext_flags & MSGPACK_EXT_STRUCT_FAST_PATH) { + VALUE struct_class = proc; + VALUE obj = rb_class_new_instance(0, NULL, struct_class); + return object_complete(uk, obj); + } + VALUE obj; VALUE arg = (str == Qnil ? rb_str_buf_new(0) : str); int raised; @@ -371,6 +378,70 @@ static inline int read_raw_body_begin(msgpack_unpacker_t* uk, int raw_type) if(!(raw_type == RAW_TYPE_STRING || raw_type == RAW_TYPE_BINARY)) { proc = msgpack_unpacker_ext_registry_lookup(uk->ext_registry, raw_type, &ext_flags); + + if(proc != Qnil && ext_flags & MSGPACK_EXT_STRUCT_FAST_PATH) { + /* Fast path for Struct: proc is actually the Struct class + * Read fields directly and construct struct in C */ + VALUE struct_class = proc; + uk->last_object = Qnil; + reset_head_byte(uk); + uk->reading_raw_remaining = 0; + + /* Get struct members */ + VALUE members = rb_struct_s_members(struct_class); + long num_fields = RARRAY_LEN(members); + + /* Check if this is a keyword_init struct (Ruby 2.7+) */ + VALUE keyword_init = Qfalse; + if (rb_respond_to(struct_class, rb_intern("keyword_init?"))) { + keyword_init = rb_funcall(struct_class, rb_intern("keyword_init?"), 0); + } + + /* Push a recursive marker so nested reads don't prematurely return */ + _msgpack_unpacker_stack_push(uk, STACK_TYPE_RECURSIVE, 1, Qnil); + + VALUE obj; + if (num_fields == 0) { + /* Special case for empty structs */ + obj = rb_class_new_instance(0, NULL, struct_class); + } else if (RTEST(keyword_init)) { + /* For keyword_init structs, build a hash with member names as keys */ + VALUE kwargs = rb_hash_new(); + for (long i = 0; i < num_fields; i++) { + int ret = msgpack_unpacker_read(uk, 0); + if (ret < 0) { + msgpack_unpacker_stack_pop(uk); + return ret; + } + VALUE key = rb_ary_entry(members, i); + rb_hash_aset(kwargs, key, uk->last_object); + } + /* Call new with keyword arguments */ + obj = rb_class_new_instance_kw(1, &kwargs, struct_class, RB_PASS_KEYWORDS); + } else { + /* For regular structs, use positional arguments + * Use RB_ALLOCV to avoid stack overflow with large structs */ + VALUE allocv_holder; + VALUE *values = RB_ALLOCV_N(VALUE, allocv_holder, num_fields); + for (int i = 0; i < num_fields; i++) { + int ret = msgpack_unpacker_read(uk, 0); + if (ret < 0) { + msgpack_unpacker_stack_pop(uk); + RB_ALLOCV_END(allocv_holder); + return ret; + } + values[i] = uk->last_object; + } + obj = rb_class_new_instance((int)num_fields, values, struct_class); + RB_ALLOCV_END(allocv_holder); + } + + RB_GC_GUARD(struct_class); + RB_GC_GUARD(members); + msgpack_unpacker_stack_pop(uk); + return object_complete(uk, obj); + } + if(proc != Qnil && ext_flags & MSGPACK_EXT_RECURSIVE) { VALUE obj; uk->last_object = Qnil; diff --git a/ext/msgpack/unpacker_ext_registry.h b/ext/msgpack/unpacker_ext_registry.h index e957816d..f76a0175 100644 --- a/ext/msgpack/unpacker_ext_registry.h +++ b/ext/msgpack/unpacker_ext_registry.h @@ -22,6 +22,7 @@ #include "ruby.h" #define MSGPACK_EXT_RECURSIVE 0b0001 +#define MSGPACK_EXT_STRUCT_FAST_PATH 0b0010 struct msgpack_unpacker_ext_registry_t; typedef struct msgpack_unpacker_ext_registry_t msgpack_unpacker_ext_registry_t; diff --git a/spec/optimized_struct_spec.rb b/spec/optimized_struct_spec.rb new file mode 100644 index 00000000..0d815fc3 --- /dev/null +++ b/spec/optimized_struct_spec.rb @@ -0,0 +1,294 @@ +# frozen_string_literal: true + +require 'spec_helper' + +RSpec.describe 'optimized_struct' do + let(:point_struct) { Struct.new(:x, :y) } + let(:person_struct) { Struct.new(:name, :age, :email) } + let(:keyword_struct) { Struct.new(:foo, :bar, keyword_init: true) } + + describe 'basic functionality' do + it 'serializes and deserializes a simple struct' do + factory = MessagePack::Factory.new + factory.register_type(0x01, point_struct, optimized_struct: true) + + point = point_struct.new(10, 20) + packed = factory.dump(point) + restored = factory.load(packed) + + expect(restored).to be_a(point_struct) + expect(restored.x).to eq(10) + expect(restored.y).to eq(20) + end + + it 'serializes and deserializes a struct with various field types' do + factory = MessagePack::Factory.new + factory.register_type(0x01, person_struct, optimized_struct: true) + + person = person_struct.new('Alice', 30, 'alice@example.com') + packed = factory.dump(person) + restored = factory.load(packed) + + expect(restored.name).to eq('Alice') + expect(restored.age).to eq(30) + expect(restored.email).to eq('alice@example.com') + end + + it 'handles nil fields' do + factory = MessagePack::Factory.new + factory.register_type(0x01, person_struct, optimized_struct: true) + + person = person_struct.new('Bob', nil, nil) + packed = factory.dump(person) + restored = factory.load(packed) + + expect(restored.name).to eq('Bob') + expect(restored.age).to be_nil + expect(restored.email).to be_nil + end + end + + describe 'keyword_init structs' do + it 'serializes and deserializes keyword_init structs' do + factory = MessagePack::Factory.new + factory.register_type(0x01, keyword_struct, optimized_struct: true) + + obj = keyword_struct.new(foo: 'hello', bar: 42) + packed = factory.dump(obj) + restored = factory.load(packed) + + expect(restored.foo).to eq('hello') + expect(restored.bar).to eq(42) + end + end + + describe 'nested structs' do + let(:address_struct) { Struct.new(:street, :city) } + let(:employee_struct) { Struct.new(:name, :address) } + + it 'handles nested optimized structs' do + factory = MessagePack::Factory.new + factory.register_type(0x01, address_struct, optimized_struct: true) + factory.register_type(0x02, employee_struct, optimized_struct: true) + + address = address_struct.new('123 Main St', 'Springfield') + employee = employee_struct.new('John', address) + + packed = factory.dump(employee) + restored = factory.load(packed) + + expect(restored.name).to eq('John') + expect(restored.address).to be_a(address_struct) + expect(restored.address.street).to eq('123 Main St') + expect(restored.address.city).to eq('Springfield') + end + + it 'handles self-referential structs (same type nested)' do + person_struct = Struct.new(:name, :friends) + + factory = MessagePack::Factory.new + factory.register_type(0x01, person_struct, optimized_struct: true) + + alice = person_struct.new('Alice', []) + bob = person_struct.new('Bob', []) + charlie = person_struct.new('Charlie', [alice, bob]) + + packed = factory.dump(charlie) + restored = factory.load(packed) + + expect(restored.name).to eq('Charlie') + expect(restored.friends.length).to eq(2) + expect(restored.friends[0]).to be_a(person_struct) + expect(restored.friends[0].name).to eq('Alice') + expect(restored.friends[1].name).to eq('Bob') + end + end + + describe 'arrays and hashes' do + let(:container_struct) { Struct.new(:items, :metadata) } + + it 'handles structs containing arrays and hashes' do + factory = MessagePack::Factory.new + factory.register_type(0x01, container_struct, optimized_struct: true) + + obj = container_struct.new([1, 2, 3], { 'key' => 'value' }) + packed = factory.dump(obj) + restored = factory.load(packed) + + expect(restored.items).to eq([1, 2, 3]) + expect(restored.metadata).to eq({ 'key' => 'value' }) + end + + it 'handles arrays of structs' do + factory = MessagePack::Factory.new + factory.register_type(0x01, point_struct, optimized_struct: true) + + points = [point_struct.new(1, 2), point_struct.new(3, 4), point_struct.new(5, 6)] + packed = factory.dump(points) + restored = factory.load(packed) + + expect(restored.length).to eq(3) + expect(restored[0].x).to eq(1) + expect(restored[1].x).to eq(3) + expect(restored[2].x).to eq(5) + end + end + + describe 'error handling' do + it 'raises error when optimized_struct is used with non-Struct class' do + factory = MessagePack::Factory.new + + expect do + factory.register_type(0x01, String, optimized_struct: true) + end.to raise_error(ArgumentError, /optimized_struct.*requires.*Struct/) + end + + it 'raises error when optimized_struct is used with a module' do + factory = MessagePack::Factory.new + some_module = Module.new + + expect do + factory.register_type(0x01, some_module, optimized_struct: true) + end.to raise_error(ArgumentError, /optimized_struct.*requires.*Struct/) + end + + it 'raises error when optimized_struct is used with packer option' do + factory = MessagePack::Factory.new + + expect do + factory.register_type(0x01, point_struct, + optimized_struct: true, + packer: ->(obj) { obj.to_a.pack('l*') }) + end.to raise_error(ArgumentError, /optimized_struct.*cannot be used with packer or unpacker/) + end + + it 'raises error when optimized_struct is used with unpacker option' do + factory = MessagePack::Factory.new + + expect do + factory.register_type(0x01, point_struct, + optimized_struct: true, + unpacker: ->(data) { point_struct.new(*data.unpack('l*')) }) + end.to raise_error(ArgumentError, /optimized_struct.*cannot be used with packer or unpacker/) + end + + it 'raises error when optimized_struct is used with recursive option' do + factory = MessagePack::Factory.new + + expect do + factory.register_type(0x01, point_struct, + optimized_struct: true, + recursive: true) + end.to raise_error(ArgumentError, /optimized_struct.*cannot be used with recursive/) + end + + it 'propagates exceptions from nested serialization' do + error_struct = Struct.new(:value) + bad_object = Object.new + def bad_object.to_msgpack(_packer) + raise 'intentional error' + end + + factory = MessagePack::Factory.new + factory.register_type(0x01, error_struct, optimized_struct: true) + + obj = error_struct.new(bad_object) + expect { factory.dump(obj) }.to raise_error(RuntimeError, 'intentional error') + end + end + + describe 'performance comparison' do + let(:large_struct) do + Struct.new(:f1, :f2, :f3, :f4, :f5, :f6, :f7, :f8, :f9, :f10) + end + + it 'produces same results as recursive packer/unpacker' do + # Factory with optimized_struct + factory_optimized = MessagePack::Factory.new + factory_optimized.register_type(0x01, large_struct, optimized_struct: true) + + # Factory with traditional recursive packer/unpacker + factory_recursive = MessagePack::Factory.new + factory_recursive.register_type(0x01, large_struct, + packer: lambda { |obj, packer| + large_struct.members.each { |m| packer.write(obj.send(m)) } + }, + unpacker: lambda { |unpacker| + large_struct.new(*large_struct.members.map { unpacker.read }) + }, + recursive: true) + + obj = large_struct.new(1, 'two', 3.0, nil, true, false, [1, 2], { 'a' => 1 }, 'nine', 10) + + packed_optimized = factory_optimized.dump(obj) + packed_recursive = factory_recursive.dump(obj) + + # Wire format should be identical + expect(packed_optimized).to eq(packed_recursive) + + # Both should round-trip correctly + restored_optimized = factory_optimized.load(packed_optimized) + restored_recursive = factory_recursive.load(packed_recursive) + + # Compare field by field (Symbol keys become String keys in msgpack) + large_struct.members.each do |member| + expect(restored_optimized.send(member)).to eq(restored_recursive.send(member)) + end + end + end + + describe 'edge cases' do + it 'handles empty structs (0 fields)' do + empty_struct = Struct.new + + factory = MessagePack::Factory.new + factory.register_type(0x01, empty_struct, optimized_struct: true) + + obj = empty_struct.new + packed = factory.dump(obj) + restored = factory.load(packed) + + expect(restored).to be_a(empty_struct) + end + + it 'handles structs with many fields' do + # Create a struct with 100 fields to stress test RB_ALLOCV + field_names = (1..100).map { |i| :"field_#{i}" } + many_fields_struct = Struct.new(*field_names) + + factory = MessagePack::Factory.new + factory.register_type(0x01, many_fields_struct, optimized_struct: true) + + values = (1..100).to_a + obj = many_fields_struct.new(*values) + packed = factory.dump(obj) + restored = factory.load(packed) + + expect(restored).to be_a(many_fields_struct) + field_names.each_with_index do |field, i| + expect(restored.send(field)).to eq(i + 1) + end + end + + it 'works with Struct subclasses' do + base_struct = Struct.new(:x, :y) + subclass = Class.new(base_struct) do + def magnitude + Math.sqrt(x**2 + y**2) + end + end + + factory = MessagePack::Factory.new + factory.register_type(0x01, subclass, optimized_struct: true) + + obj = subclass.new(3, 4) + packed = factory.dump(obj) + restored = factory.load(packed) + + expect(restored).to be_a(subclass) + expect(restored.x).to eq(3) + expect(restored.y).to eq(4) + expect(restored.magnitude).to eq(5.0) + end + end +end