From f6d1bc6b99253c43e655fa8ac3fa77ad7d03ee1b Mon Sep 17 00:00:00 2001 From: Jean Boussier Date: Mon, 4 May 2026 16:56:31 +0200 Subject: [PATCH 1/9] [DOC] Add feature 21796 in NEWS.md --- NEWS.md | 3 +++ 1 file changed, 3 insertions(+) diff --git a/NEWS.md b/NEWS.md index bc72ba55c25831..77ec892b661406 100644 --- a/NEWS.md +++ b/NEWS.md @@ -15,6 +15,8 @@ Note: We're only listing outstanding class updates. * `Array#pack` accepts a new format `R` and `r` for unpacking unsigned and signed LEB128 encoded integers. [[Feature #21785]] + * `Array#pack` accepts a new format `^` that returns the current offset. + Useful when combined with variable width formats like LEB128. [[Feature #21796]] * ENV @@ -164,6 +166,7 @@ A lot of work has gone into making Ractors more stable, performant, and usable. [Feature #15330]: https://bugs.ruby-lang.org/issues/15330 [Feature #21390]: https://bugs.ruby-lang.org/issues/21390 [Feature #21785]: https://bugs.ruby-lang.org/issues/21785 +[Feature #21796]: https://bugs.ruby-lang.org/issues/21796 [Feature #21853]: https://bugs.ruby-lang.org/issues/21853 [Feature #21861]: https://bugs.ruby-lang.org/issues/21861 [Feature #21932]: https://bugs.ruby-lang.org/issues/21932 From b583dd68799dcd0ed73a6cbf6f61e3faf5194f0e Mon Sep 17 00:00:00 2001 From: Benoit Daloze Date: Mon, 4 May 2026 17:15:53 +0200 Subject: [PATCH 2/9] Fix typo and improve docs of Exception#exception --- error.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/error.c b/error.c index 52bd3629bf2d13..ed7394dbf5e756 100644 --- a/error.c +++ b/error.c @@ -1533,7 +1533,7 @@ exc_initialize(int argc, VALUE *argv, VALUE exc) * * x0 = StandardError.new('Boom') # => # * x1 = x0.exception # => # - * x0.__id__ == x1.__id__ # => true + * x0.equal?(x1) # => true * * With {string-convertible object}[rdoc-ref:implicit_conversion.rdoc@String-Convertible+Objects] * +message+ (even the same as the original message), @@ -1541,7 +1541,7 @@ exc_initialize(int argc, VALUE *argv, VALUE exc) * and whose message is the given +message+: * * x1 = x0.exception('Boom') # => # - * x0..equal?(x1) # => false + * x0.equal?(x1) # => false * */ From 0cbd210c3a3a9f2ed603f704d915ffc8dd51bc7f Mon Sep 17 00:00:00 2001 From: Earlopain <14981592+Earlopain@users.noreply.github.com> Date: Mon, 20 Apr 2026 19:16:56 +0200 Subject: [PATCH 3/9] [ruby/prism] Optimize ripper bounds Basically a port of https://github.com/ruby/ruby/commit/c45f781771314a71856c9b348c640ba532f54349 into ruby It's quite effective at ~97% hit rate for me. Speeds it up from ~6.77x slower to only 4.07x slower. For the lexer `on_sp` it also gives a bit of an improvement: 1.04x slower to 1.10x faster I guess the class may be universally useful but for now I just made it nodoc. https://github.com/ruby/prism/commit/3ad9db38fe --- lib/prism/lex_compat.rb | 11 +++++- lib/prism/parse_result.rb | 4 +- lib/prism/translation/ripper.rb | 69 ++++++++++++++++++++++++++++++--- 3 files changed, 74 insertions(+), 10 deletions(-) diff --git a/lib/prism/lex_compat.rb b/lib/prism/lex_compat.rb index e1b04fc6cea1e7..7aacec037da5e0 100644 --- a/lib/prism/lex_compat.rb +++ b/lib/prism/lex_compat.rb @@ -23,6 +23,12 @@ module Prism # def self.[]: (Integer value) -> State # end # end + # + # class LineAndColumnCache + # def initialize: (Source source) -> void + # + # def line_and_column: (Integer byte_offset) -> [Integer, Integer] + # end # end # end @@ -837,6 +843,8 @@ def post_process_tokens(tokens, source, data_loc, bom, eof_token) prev_token_state = Translation::Ripper::Lexer::State[Translation::Ripper::EXPR_BEG] prev_token_end = bom ? 3 : 0 + cache = Translation::Ripper::LineAndColumnCache.new(source) + tokens.each do |token| # Skip missing heredoc ends. next if token[1] == :on_heredoc_end && token[2] == "" @@ -851,8 +859,7 @@ def post_process_tokens(tokens, source, data_loc, bom, eof_token) if start_offset > prev_token_end sp_value = source.slice(prev_token_end, start_offset - prev_token_end) - sp_line = source.line(prev_token_end) - sp_column = source.column(prev_token_end) + sp_line, sp_column = cache.line_and_column(prev_token_end) # Ripper reports columns on line 1 without counting the BOM sp_column -= 3 if sp_line == 1 && bom continuation_index = sp_value.byteindex("\\") diff --git a/lib/prism/parse_result.rb b/lib/prism/parse_result.rb index 7cf6630f4453ca..93d3c006b72b65 100644 --- a/lib/prism/parse_result.rb +++ b/lib/prism/parse_result.rb @@ -225,9 +225,7 @@ def deep_freeze freeze end - private - - # Binary search through the offsets to find the line number for the given + # Binary search through the offsets to find the index for the given # byte offset. #-- #: (Integer byte_offset) -> Integer diff --git a/lib/prism/translation/ripper.rb b/lib/prism/translation/ripper.rb index 77ee2c337d9517..cbbc0b5db8fa77 100644 --- a/lib/prism/translation/ripper.rb +++ b/lib/prism/translation/ripper.rb @@ -445,6 +445,64 @@ def self.sexp_raw(src, filename = "-", lineno = 1, raise_errors: false) autoload :SexpBuilder, "prism/translation/ripper/sexp" autoload :SexpBuilderPP, "prism/translation/ripper/sexp" + # Provides optimized access to line and column information. + # Ripper bounds are mostly accessed in a linear fashion, so + # we can try a linear scan first and fall back to binary search. + class LineAndColumnCache # :nodoc: + # How many should it look ahead/behind before falling back to binary searching. + WINDOW = 8 + private_constant :WINDOW + + #: (Source source) -> void + def initialize(source) + @source = source + @offsets = source.offsets + @hint = 0 + end + + #: (Integer byte_offset) -> [Integer, Integer] + def line_and_column(byte_offset) + @hint = new_hint(byte_offset) || @source.find_line(byte_offset) + return [@hint + @source.start_line, byte_offset - @offsets[@hint]] + end + + private + + def new_hint(byte_offset) + if @offsets[@hint] <= byte_offset + # Same line? + if (@hint + 1 >= @offsets.size || @offsets[@hint + 1] > byte_offset) + return @hint + end + + # Scan forwards + limit = [@hint + WINDOW + 1, @offsets.size].min + idx = @hint + 1 + while idx < limit + if @offsets[idx] > byte_offset + return idx - 1 + end + if @offsets[idx] == byte_offset + return idx + end + idx += 1 + end + else + # Scan backwards + limit = @hint > WINDOW ? @hint - WINDOW : 0 + idx = @hint + while idx >= limit + 1 + if @offsets[idx - 1] <= byte_offset + return idx - 1 + end + idx -= 1 + end + end + + nil + end + end + # :stopdoc: # This is not part of the public API but used by some gems. @@ -488,6 +546,7 @@ def initialize(source, filename = "(ripper)", lineno = 1) @lineno = lineno @column = 0 @result = nil + @line_and_column_cache = nil end ########################################################################## @@ -4033,6 +4092,10 @@ def result @result ||= Prism.parse(source, partial_script: true, version: "current") end + def line_and_column_cache + @line_and_column_cache ||= LineAndColumnCache.new(result.source) + end + ########################################################################## # Helpers ########################################################################## @@ -4133,12 +4196,8 @@ def visit_write_value(node) # This method is responsible for updating lineno and column information # to reflect the current node. - # - # This method could be drastically improved with some caching on the start - # of every line, but for now it's good enough. def bounds(location) - @lineno = location.start_line - @column = location.start_column + @lineno, @column = line_and_column_cache.line_and_column(location.start_offset) end # :startdoc: From 1367f885923eb8b9698f20a446515aa9462893c8 Mon Sep 17 00:00:00 2001 From: Earlopain <14981592+Earlopain@users.noreply.github.com> Date: Mon, 20 Apr 2026 19:26:30 +0200 Subject: [PATCH 4/9] [ruby/prism] Freeze the parse result for the ripper translator It's a small, somewhat hacky performance boost. Locations are lazy, by freezing the result they don't have to be pack/unpacked redundantly. This gives about a 4% speed boost. Other changes are to not modify the frozen AST https://github.com/ruby/prism/commit/9e93bd6bd5 --- lib/prism/translation/ripper.rb | 18 +++++++++--------- 1 file changed, 9 insertions(+), 9 deletions(-) diff --git a/lib/prism/translation/ripper.rb b/lib/prism/translation/ripper.rb index cbbc0b5db8fa77..f9fcc57784e8ee 100644 --- a/lib/prism/translation/ripper.rb +++ b/lib/prism/translation/ripper.rb @@ -1020,7 +1020,7 @@ def visit_begin_node(node) on_stmts_add(on_stmts_new, on_void_stmt) else body = node.statements.body - body.unshift(nil) if void_stmt?(location, node.statements.body[0].location, allow_newline) + body = [nil, *body] if void_stmt?(location, node.statements.body[0].location, allow_newline) bounds(node.statements.location) visit_statements_node_body(body) @@ -1037,7 +1037,7 @@ def visit_begin_node(node) [nil] else body = else_clause_node.statements.body - body.unshift(nil) if void_stmt?(else_clause_node.else_keyword_loc, else_clause_node.statements.body[0].location, allow_newline) + body = [nil, *body] if void_stmt?(else_clause_node.else_keyword_loc, else_clause_node.statements.body[0].location, allow_newline) body end @@ -1059,7 +1059,7 @@ def visit_begin_node(node) on_bodystmt(visit_statements_node_body([nil]), nil, nil, nil) when StatementsNode body = [*node.body] - body.unshift(nil) if void_stmt?(location, body[0].location, allow_newline) + body = [nil, *body] if void_stmt?(location, body[0].location, allow_newline) stmts = visit_statements_node_body(body) bounds(node.body.first.location) @@ -1108,7 +1108,7 @@ def visit_block_node(node) braces ? stmts : on_bodystmt(stmts, nil, nil, nil) when StatementsNode stmts = node.body.body - stmts.unshift(nil) if void_stmt?(node.parameters&.location || node.opening_loc, node.body.location, false) + stmts = [nil, *stmts] if void_stmt?(node.parameters&.location || node.opening_loc, node.body.location, false) stmts = visit_statements_node_body(stmts) bounds(node.body.location) @@ -2035,7 +2035,7 @@ def visit_else_node(node) [nil] else body = node.statements.body - body.unshift(nil) if void_stmt?(node.else_keyword_loc, node.statements.body[0].location, false) + body = [nil, *body] if void_stmt?(node.else_keyword_loc, node.statements.body[0].location, false) body end @@ -2090,7 +2090,7 @@ def visit_ensure_node(node) [nil] else body = node.statements.body - body.unshift(nil) if void_stmt?(node.ensure_keyword_loc, body[0].location, false) + body = [nil, *body] if void_stmt?(node.ensure_keyword_loc, body[0].location, false) body end @@ -2873,7 +2873,7 @@ def visit_lambda_node(node) braces ? stmts : on_bodystmt(stmts, nil, nil, nil) when StatementsNode stmts = node.body.body - stmts.unshift(nil) if void_stmt?(node.parameters&.location || node.opening_loc, node.body.location, false) + stmts = [nil, *stmts] if void_stmt?(node.parameters&.location || node.opening_loc, node.body.location, false) stmts = visit_statements_node_body(stmts) bounds(node.body.location) @@ -3367,7 +3367,7 @@ def visit_pre_execution_node(node) # The top-level program node. def visit_program_node(node) body = node.statements.body - body << nil if body.empty? + body = [nil] if body.empty? statements = visit_statements_node_body(body) bounds(node.location) @@ -4089,7 +4089,7 @@ def visit_yield_node(node) # Lazily initialize the parse result. def result - @result ||= Prism.parse(source, partial_script: true, version: "current") + @result ||= Prism.parse(source, partial_script: true, version: "current", freeze: true) end def line_and_column_cache From 1aa62e897c344a68dae16511429daefdf6df4ecb Mon Sep 17 00:00:00 2001 From: Earlopain <14981592+Earlopain@users.noreply.github.com> Date: Tue, 21 Apr 2026 19:38:06 +0200 Subject: [PATCH 5/9] [ruby/prism] Optimize ripper `visit_token` It was showing up in profiles. So: * Don't splat `KEYWORDS` (also did the same for `BINARY_OPERATORS`) * Use `start_with?` if possible Overall gives a ~5% speed boost https://github.com/ruby/prism/commit/d611aa9d11 --- lib/prism/translation/ripper.rb | 25 ++++++++++++------------- 1 file changed, 12 insertions(+), 13 deletions(-) diff --git a/lib/prism/translation/ripper.rb b/lib/prism/translation/ripper.rb index f9fcc57784e8ee..0d79b244a6322b 100644 --- a/lib/prism/translation/ripper.rb +++ b/lib/prism/translation/ripper.rb @@ -346,7 +346,7 @@ def self.coerce_source(source) # :nodoc: "__ENCODING__", "__FILE__", "__LINE__" - ] + ].to_set # A list of all of the Ruby binary operators. BINARY_OPERATORS = [ @@ -371,7 +371,7 @@ def self.coerce_source(source) # :nodoc: :/, :*, :** - ] + ].to_set private_constant :KEYWORDS, :BINARY_OPERATORS @@ -1308,7 +1308,7 @@ def visit_call_node(node) bounds(node.location) on_unary(:!, receiver) end - when *BINARY_OPERATORS + when BINARY_OPERATORS receiver = visit(node.receiver) bounds(node.message_loc) @@ -4114,24 +4114,23 @@ def void_stmt?(left, right, allow_newline) # Visit the string content of a particular node. This method is used to # split into the various token types. def visit_token(token, allow_keywords = true) - case token - when "." + if token == "." on_period(token) - when "`" + elsif token == "`" on_backtick(token) - when *(allow_keywords ? KEYWORDS : []) + elsif allow_keywords && KEYWORDS.include?(token) on_kw(token) - when /^_/ + elsif token.start_with?("_") on_ident(token) - when /^[[:upper:]]\w*$/ + elsif token.match?(/^[[:upper:]]\w*$/) on_const(token) - when /^@@/ + elsif token.start_with?("@@") on_cvar(token) - when /^@/ + elsif token.start_with?("@") on_ivar(token) - when /^\$/ + elsif token.start_with?("$") on_gvar(token) - when /^[[:punct:]]/ + elsif token.match?(/^[[:punct:]]/) on_op(token) else on_ident(token) From 8d40d481482d7ca38457862f64feb9e1668bbd81 Mon Sep 17 00:00:00 2001 From: Jean Boussier Date: Mon, 4 May 2026 14:48:59 +0200 Subject: [PATCH 6/9] vm_insnhelper.c: share the cache revalidation logic It's the same code duplicated in 3 places. --- shape.h | 10 +++++- vm_insnhelper.c | 85 ++++++++++++++++++++----------------------------- 2 files changed, 43 insertions(+), 52 deletions(-) diff --git a/shape.h b/shape.h index 1e27c15fd6d574..1f4d7db256da77 100644 --- a/shape.h +++ b/shape.h @@ -55,10 +55,12 @@ enum shape_id_mask { SHAPE_ID_HAS_IVAR_MASK = SHAPE_ID_FL_TOO_COMPLEX | (SHAPE_ID_OFFSET_MASK - 1), }; -// The interpreter doesn't care about frozen status or slot size when reading ivars. +// The interpreter doesn't care about frozen status, slot size or object id when reading ivars. // So we normalize shape_id by clearing these bits to improve cache hits. // JITs however might care about some of it. #define SHAPE_ID_READ_ONLY_MASK (~(SHAPE_ID_FL_FROZEN | SHAPE_ID_HEAP_INDEX_MASK | SHAPE_ID_FL_HAS_OBJECT_ID)) +// For write it's the same, but there we do care about frozen status +#define SHAPE_ID_WRITE_MASK (~(SHAPE_ID_HEAP_INDEX_MASK | SHAPE_ID_FL_HAS_OBJECT_ID)) typedef uint32_t redblack_id_t; @@ -207,6 +209,12 @@ shape_id_t rb_shape_rebuild(shape_id_t initial_shape_id, shape_id_t dest_shape_i void rb_shape_copy_fields(VALUE dest, VALUE *dest_buf, shape_id_t dest_shape_id, VALUE *src_buf, shape_id_t src_shape_id); void rb_shape_copy_complex_ivars(VALUE dest, VALUE obj, shape_id_t src_shape_id, st_table *fields_table); +static inline bool +rb_shape_frozen_p(shape_id_t shape_id) +{ + return shape_id & SHAPE_ID_FL_FROZEN; +} + static inline bool rb_shape_too_complex_p(shape_id_t shape_id) { diff --git a/vm_insnhelper.c b/vm_insnhelper.c index eeb8888f18c04a..ce1e20032d68e4 100644 --- a/vm_insnhelper.c +++ b/vm_insnhelper.c @@ -1424,6 +1424,30 @@ populate_cache(attr_index_t index, shape_id_t next_shape_id, ID id, const rb_ise } } +static inline shape_id_t +revalidate_cache(shape_id_t shape_id, shape_id_t dest_shape_id, ID id, attr_index_t index) +{ + RUBY_ASSERT(shape_id != INVALID_SHAPE_ID); + + if (shape_id != dest_shape_id) { + if (!(dest_shape_id != INVALID_SHAPE_ID && + RSHAPE_DIRECT_CHILD_P(shape_id, dest_shape_id) && + RSHAPE_EDGE_NAME(dest_shape_id) == id && + index < RSHAPE_CAPACITY(shape_id))) { + return INVALID_SHAPE_ID; + } + } + + // Cache hit case + RUBY_ASSERT(!rb_shape_frozen_p(dest_shape_id)); + RUBY_ASSERT(!rb_shape_too_complex_p(dest_shape_id)); + RUBY_ASSERT(!rb_shape_frozen_p(shape_id)); + RUBY_ASSERT(!rb_shape_too_complex_p(shape_id)); + RUBY_ASSERT(RSHAPE_CAPACITY(shape_id) >= RSHAPE_LEN(dest_shape_id)); + + return dest_shape_id; +} + ALWAYS_INLINE(static VALUE vm_setivar_slowpath(VALUE obj, ID id, VALUE val, const rb_iseq_t *iseq, IVC ic, const struct rb_callcache *cc, int is_attr)); NOINLINE(static VALUE vm_setivar_slowpath_ivar(VALUE obj, ID id, VALUE val, const rb_iseq_t *iseq, IVC ic)); NOINLINE(static VALUE vm_setivar_slowpath_attr(VALUE obj, ID id, VALUE val, const struct rb_callcache *cc)); @@ -1476,20 +1500,8 @@ vm_setivar_class(VALUE obj, ID id, VALUE val, shape_id_t dest_shape_id, attr_ind } shape_id_t shape_id = RBASIC_SHAPE_ID(fields_obj); - - // Cache hit case - if (shape_id == dest_shape_id) { - RUBY_ASSERT(dest_shape_id != INVALID_SHAPE_ID && shape_id != INVALID_SHAPE_ID); - } - else if (dest_shape_id != INVALID_SHAPE_ID) { - if (RSHAPE_DIRECT_CHILD_P(shape_id, dest_shape_id) && RSHAPE_EDGE_NAME(dest_shape_id) == id && RSHAPE_CAPACITY(shape_id) == RSHAPE_CAPACITY(dest_shape_id)) { - RUBY_ASSERT(index < RSHAPE_CAPACITY(dest_shape_id)); - } - else { - return Qundef; - } - } - else { + dest_shape_id = revalidate_cache(shape_id, dest_shape_id, id, index); + if (UNLIKELY(dest_shape_id == INVALID_SHAPE_ID)) { return Qundef; } @@ -1510,20 +1522,8 @@ static VALUE vm_setivar_default(VALUE obj, ID id, VALUE val, shape_id_t dest_shape_id, attr_index_t index) { shape_id_t shape_id = RBASIC_SHAPE_ID(obj); - - // Cache hit case - if (shape_id == dest_shape_id) { - RUBY_ASSERT(dest_shape_id != INVALID_SHAPE_ID && shape_id != INVALID_SHAPE_ID); - } - else if (dest_shape_id != INVALID_SHAPE_ID) { - if (RSHAPE_DIRECT_CHILD_P(shape_id, dest_shape_id) && RSHAPE_EDGE_NAME(dest_shape_id) == id && RSHAPE_CAPACITY(shape_id) == RSHAPE_CAPACITY(dest_shape_id)) { - RUBY_ASSERT(index < RSHAPE_CAPACITY(dest_shape_id)); - } - else { - return Qundef; - } - } - else { + dest_shape_id = revalidate_cache(shape_id, dest_shape_id, id, index); + if (UNLIKELY(dest_shape_id == INVALID_SHAPE_ID)) { return Qundef; } @@ -1551,32 +1551,15 @@ vm_setivar(VALUE obj, ID id, VALUE val, shape_id_t dest_shape_id, attr_index_t i VM_ASSERT(!rb_ractor_shareable_p(obj) || rb_obj_frozen_p(obj)); shape_id_t shape_id = RBASIC_SHAPE_ID(obj); - RUBY_ASSERT(dest_shape_id == INVALID_SHAPE_ID || !rb_shape_too_complex_p(dest_shape_id)); - - if (LIKELY(shape_id == dest_shape_id)) { - RUBY_ASSERT(dest_shape_id != INVALID_SHAPE_ID && shape_id != INVALID_SHAPE_ID); - VM_ASSERT(!rb_ractor_shareable_p(obj)); - } - else if (dest_shape_id != INVALID_SHAPE_ID) { - if (RSHAPE_DIRECT_CHILD_P(shape_id, dest_shape_id) && RSHAPE_EDGE_NAME(dest_shape_id) == id && RSHAPE_CAPACITY(shape_id) == RSHAPE_CAPACITY(dest_shape_id)) { - RUBY_ASSERT(dest_shape_id != INVALID_SHAPE_ID && shape_id != INVALID_SHAPE_ID); - - RBASIC_SET_SHAPE_ID(obj, dest_shape_id); - - RUBY_ASSERT(index < RSHAPE_CAPACITY(dest_shape_id)); - } - else { - break; - } - } - else { + dest_shape_id = revalidate_cache(shape_id, dest_shape_id, id, index); + if (UNLIKELY(dest_shape_id == INVALID_SHAPE_ID)) { break; } - VALUE *ptr = ROBJECT_FIELDS(obj); - - RUBY_ASSERT(!rb_shape_obj_too_complex_p(obj)); - RB_OBJ_WRITE(obj, &ptr[index], val); + RB_OBJ_WRITE(obj, &ROBJECT_FIELDS(obj)[index], val); + if (shape_id != dest_shape_id) { + RBASIC_SET_SHAPE_ID(obj, dest_shape_id); + } RB_DEBUG_COUNTER_INC(ivar_set_ic_hit); RB_DEBUG_COUNTER_INC(ivar_set_obj_hit); From a2ba1ed43b9eb66a8c2d100644cd7bda7116efd6 Mon Sep 17 00:00:00 2001 From: Jean Boussier Date: Mon, 4 May 2026 15:55:41 +0200 Subject: [PATCH 7/9] vm_insnhelper.c: refactor and optimize setivar cache revalidation By only storing shape offsets in the cache, we're able to still match two equal objects that happen to be in different heaps, as well as to validate the object is neither frozen nor complex. --- shape.h | 16 ++++++++++++---- vm_insnhelper.c | 35 +++++++++++++++++++++++++++------- yjit/src/cruby_bindings.inc.rs | 2 +- zjit/src/cruby_bindings.inc.rs | 8 ++++---- 4 files changed, 45 insertions(+), 16 deletions(-) diff --git a/shape.h b/shape.h index 1f4d7db256da77..e41efde797e348 100644 --- a/shape.h +++ b/shape.h @@ -39,9 +39,9 @@ enum shape_id_fl_type { SHAPE_ID_HEAP_INDEX_MASK = ((1 << SHAPE_ID_HEAP_INDEX_BITS) - 1) << SHAPE_ID_HEAP_INDEX_OFFSET, - SHAPE_ID_FL_FROZEN = RBIMPL_SHAPE_ID_FL(0), - SHAPE_ID_FL_HAS_OBJECT_ID = RBIMPL_SHAPE_ID_FL(1), - SHAPE_ID_FL_TOO_COMPLEX = RBIMPL_SHAPE_ID_FL(2), + SHAPE_ID_FL_TOO_COMPLEX = RBIMPL_SHAPE_ID_FL(0), + SHAPE_ID_FL_FROZEN = RBIMPL_SHAPE_ID_FL(1), + SHAPE_ID_FL_HAS_OBJECT_ID = RBIMPL_SHAPE_ID_FL(2), SHAPE_ID_FL_NON_CANONICAL_MASK = SHAPE_ID_FL_FROZEN | SHAPE_ID_FL_HAS_OBJECT_ID, SHAPE_ID_FLAGS_MASK = SHAPE_ID_HEAP_INDEX_MASK | SHAPE_ID_FL_NON_CANONICAL_MASK | SHAPE_ID_FL_TOO_COMPLEX, @@ -59,7 +59,7 @@ enum shape_id_mask { // So we normalize shape_id by clearing these bits to improve cache hits. // JITs however might care about some of it. #define SHAPE_ID_READ_ONLY_MASK (~(SHAPE_ID_FL_FROZEN | SHAPE_ID_HEAP_INDEX_MASK | SHAPE_ID_FL_HAS_OBJECT_ID)) -// For write it's the same, but there we do care about frozen status +// For write it's the same idea, but here we do care about frozen status. #define SHAPE_ID_WRITE_MASK (~(SHAPE_ID_HEAP_INDEX_MASK | SHAPE_ID_FL_HAS_OBJECT_ID)) typedef uint32_t redblack_id_t; @@ -475,6 +475,14 @@ rb_shape_transition_complex(shape_id_t shape_id) return next_shape_id; } +static inline shape_id_t +rb_shape_transition_offset(shape_id_t shape_id, shape_id_t offset) +{ + offset = RSHAPE_OFFSET(offset); + RUBY_ASSERT(RSHAPE_OFFSET(shape_id) == offset || RSHAPE_DIRECT_CHILD_P(shape_id, offset)); + return RSHAPE_FLAGS(shape_id) | offset; +} + static inline shape_id_t rb_shape_transition_heap(shape_id_t shape_id, size_t heap_index) { diff --git a/vm_insnhelper.c b/vm_insnhelper.c index ce1e20032d68e4..846fd34f4ce93c 100644 --- a/vm_insnhelper.c +++ b/vm_insnhelper.c @@ -1414,6 +1414,10 @@ static void populate_cache(attr_index_t index, shape_id_t next_shape_id, ID id, const rb_iseq_t *iseq, IVC ic, const struct rb_callcache *cc, bool is_attr) { RUBY_ASSERT(!rb_shape_too_complex_p(next_shape_id)); + RUBY_ASSERT(next_shape_id != INVALID_SHAPE_ID); + + // We only care about the shape offset. + next_shape_id = RSHAPE_OFFSET(next_shape_id); // Cache population code if (is_attr) { @@ -1425,25 +1429,42 @@ populate_cache(attr_index_t index, shape_id_t next_shape_id, ID id, const rb_ise } static inline shape_id_t -revalidate_cache(shape_id_t shape_id, shape_id_t dest_shape_id, ID id, attr_index_t index) +revalidate_cache(shape_id_t shape_id, shape_id_t dest_shape_offset, ID id, attr_index_t index) { RUBY_ASSERT(shape_id != INVALID_SHAPE_ID); + RUBY_ASSERT(dest_shape_offset == INVALID_SHAPE_ID || dest_shape_offset == RSHAPE_OFFSET(dest_shape_offset)); - if (shape_id != dest_shape_id) { - if (!(dest_shape_id != INVALID_SHAPE_ID && - RSHAPE_DIRECT_CHILD_P(shape_id, dest_shape_id) && + shape_id_t dest_shape_id = shape_id; + shape_id_t normalized_shape_id = shape_id & SHAPE_ID_WRITE_MASK; + + if (normalized_shape_id == dest_shape_offset) { + // Perfect hit case, we're reassigning an existing ivar, the shape doesn't change. + // Also since `SHAPE_ID_WRITE_MASK` contains COMPLEX and FROZEN flags, by matching + // `dest_shape_offset` we also checked that the object is neither complex nor frozen. + } + else { + if (dest_shape_offset == INVALID_SHAPE_ID) { + // The cache is cold. + return INVALID_SHAPE_ID; + } + + // Child hit case, the cache offset is a direct child of the current shape + // and the ivar name matches. + // We additionally need to ensure that the object has sufficient capacity. + if (!(RSHAPE_DIRECT_CHILD_P(shape_id, dest_shape_id) && RSHAPE_EDGE_NAME(dest_shape_id) == id && index < RSHAPE_CAPACITY(shape_id))) { return INVALID_SHAPE_ID; } + + // We use the cached offset, but combined with the current shape flags. + dest_shape_id = rb_shape_transition_offset(shape_id, dest_shape_offset); } // Cache hit case - RUBY_ASSERT(!rb_shape_frozen_p(dest_shape_id)); - RUBY_ASSERT(!rb_shape_too_complex_p(dest_shape_id)); RUBY_ASSERT(!rb_shape_frozen_p(shape_id)); RUBY_ASSERT(!rb_shape_too_complex_p(shape_id)); - RUBY_ASSERT(RSHAPE_CAPACITY(shape_id) >= RSHAPE_LEN(dest_shape_id)); + RUBY_ASSERT(RSHAPE_CAPACITY(shape_id) >= RSHAPE_LEN(dest_shape_offset)); return dest_shape_id; } diff --git a/yjit/src/cruby_bindings.inc.rs b/yjit/src/cruby_bindings.inc.rs index eccee5852b03f6..52a05155149c51 100644 --- a/yjit/src/cruby_bindings.inc.rs +++ b/yjit/src/cruby_bindings.inc.rs @@ -636,7 +636,7 @@ pub const VM_ENV_FLAG_ISOLATED: vm_frame_env_flags = 16; pub type vm_frame_env_flags = u32; pub type attr_index_t = u16; pub type shape_id_t = u32; -pub const SHAPE_ID_HAS_IVAR_MASK: shape_id_mask = 34078718; +pub const SHAPE_ID_HAS_IVAR_MASK: shape_id_mask = 8912894; pub type shape_id_mask = u32; #[repr(C)] pub struct rb_cvar_class_tbl_entry { diff --git a/zjit/src/cruby_bindings.inc.rs b/zjit/src/cruby_bindings.inc.rs index 0bb4e275128e5b..bd6a4afa25aa6f 100644 --- a/zjit/src/cruby_bindings.inc.rs +++ b/zjit/src/cruby_bindings.inc.rs @@ -1485,10 +1485,10 @@ pub type vm_frame_env_flags = u32; pub type attr_index_t = u16; pub type shape_id_t = u32; pub const SHAPE_ID_HEAP_INDEX_MASK: shape_id_fl_type = 7864320; -pub const SHAPE_ID_FL_FROZEN: shape_id_fl_type = 8388608; -pub const SHAPE_ID_FL_HAS_OBJECT_ID: shape_id_fl_type = 16777216; -pub const SHAPE_ID_FL_TOO_COMPLEX: shape_id_fl_type = 33554432; -pub const SHAPE_ID_FL_NON_CANONICAL_MASK: shape_id_fl_type = 25165824; +pub const SHAPE_ID_FL_TOO_COMPLEX: shape_id_fl_type = 8388608; +pub const SHAPE_ID_FL_FROZEN: shape_id_fl_type = 16777216; +pub const SHAPE_ID_FL_HAS_OBJECT_ID: shape_id_fl_type = 33554432; +pub const SHAPE_ID_FL_NON_CANONICAL_MASK: shape_id_fl_type = 50331648; pub const SHAPE_ID_FLAGS_MASK: shape_id_fl_type = 66584576; pub type shape_id_fl_type = u32; pub const CONST_DEPRECATED: rb_const_flag_t = 256; From 880901afbb4687d6410e616fbc55e123141d34ca Mon Sep 17 00:00:00 2001 From: Jean Boussier Date: Mon, 4 May 2026 16:37:27 +0200 Subject: [PATCH 8/9] vm_insnhelper.c: Unify `populate_cache` and `fill_ivar_cache` They're the same code. --- vm_insnhelper.c | 41 +++++++++++++++-------------------------- 1 file changed, 15 insertions(+), 26 deletions(-) diff --git a/vm_insnhelper.c b/vm_insnhelper.c index 846fd34f4ce93c..8fc3f5be68f3d0 100644 --- a/vm_insnhelper.c +++ b/vm_insnhelper.c @@ -1233,10 +1233,17 @@ vm_get_cvar_base(const rb_cref_t *cref, const rb_control_frame_t *cfp, int top_l return klass; } -ALWAYS_INLINE(static void fill_ivar_cache(const rb_iseq_t *iseq, IVC ic, const struct rb_callcache *cc, int is_attr, attr_index_t index, shape_id_t shape_id)); +ALWAYS_INLINE(static void fill_ivar_cache(attr_index_t index, shape_id_t shape_id, const rb_iseq_t *iseq, IVC ic, const struct rb_callcache *cc, bool is_attr)); static inline void -fill_ivar_cache(const rb_iseq_t *iseq, IVC ic, const struct rb_callcache *cc, int is_attr, attr_index_t index, shape_id_t shape_id) +fill_ivar_cache(attr_index_t index, shape_id_t shape_id, const rb_iseq_t *iseq, IVC ic, const struct rb_callcache *cc, bool is_attr) { + RUBY_ASSERT(!rb_shape_too_complex_p(shape_id)); + RUBY_ASSERT(shape_id != INVALID_SHAPE_ID); + + // We only care about the shape offset. + shape_id = RSHAPE_OFFSET(shape_id); + + // Cache population code if (is_attr) { vm_cc_attr_index_set(cc, index, shape_id); } @@ -1367,7 +1374,7 @@ vm_getivar(VALUE obj, ID id, const rb_iseq_t *iseq, IVC ic, const struct rb_call // This fills in the cache with the shared cache object. // "ent" is the shared cache object if (cached_id != previous_cached_id) { - fill_ivar_cache(iseq, ic, cc, is_attr, index, cached_id); + fill_ivar_cache(index, cached_id, iseq, ic, cc, is_attr); } if (index == ATTR_INDEX_NOT_SET) { @@ -1410,26 +1417,8 @@ vm_getivar(VALUE obj, ID id, const rb_iseq_t *iseq, IVC ic, const struct rb_call } } -static void -populate_cache(attr_index_t index, shape_id_t next_shape_id, ID id, const rb_iseq_t *iseq, IVC ic, const struct rb_callcache *cc, bool is_attr) -{ - RUBY_ASSERT(!rb_shape_too_complex_p(next_shape_id)); - RUBY_ASSERT(next_shape_id != INVALID_SHAPE_ID); - - // We only care about the shape offset. - next_shape_id = RSHAPE_OFFSET(next_shape_id); - - // Cache population code - if (is_attr) { - vm_cc_attr_index_set(cc, index, next_shape_id); - } - else { - vm_ic_attr_index_set(iseq, ic, index, next_shape_id); - } -} - static inline shape_id_t -revalidate_cache(shape_id_t shape_id, shape_id_t dest_shape_offset, ID id, attr_index_t index) +revalidate_setivar_cache(shape_id_t shape_id, shape_id_t dest_shape_offset, ID id, attr_index_t index) { RUBY_ASSERT(shape_id != INVALID_SHAPE_ID); RUBY_ASSERT(dest_shape_offset == INVALID_SHAPE_ID || dest_shape_offset == RSHAPE_OFFSET(dest_shape_offset)); @@ -1485,7 +1474,7 @@ vm_setivar_slowpath(VALUE obj, ID id, VALUE val, const rb_iseq_t *iseq, IVC ic, shape_id_t next_shape_id = RBASIC_SHAPE_ID(obj); if (!rb_shape_too_complex_p(next_shape_id)) { - populate_cache(index, next_shape_id, id, iseq, ic, cc, is_attr); + fill_ivar_cache(index, next_shape_id, iseq, ic, cc, is_attr); } RB_DEBUG_COUNTER_INC(ivar_set_obj_miss); @@ -1521,7 +1510,7 @@ vm_setivar_class(VALUE obj, ID id, VALUE val, shape_id_t dest_shape_id, attr_ind } shape_id_t shape_id = RBASIC_SHAPE_ID(fields_obj); - dest_shape_id = revalidate_cache(shape_id, dest_shape_id, id, index); + dest_shape_id = revalidate_setivar_cache(shape_id, dest_shape_id, id, index); if (UNLIKELY(dest_shape_id == INVALID_SHAPE_ID)) { return Qundef; } @@ -1543,7 +1532,7 @@ static VALUE vm_setivar_default(VALUE obj, ID id, VALUE val, shape_id_t dest_shape_id, attr_index_t index) { shape_id_t shape_id = RBASIC_SHAPE_ID(obj); - dest_shape_id = revalidate_cache(shape_id, dest_shape_id, id, index); + dest_shape_id = revalidate_setivar_cache(shape_id, dest_shape_id, id, index); if (UNLIKELY(dest_shape_id == INVALID_SHAPE_ID)) { return Qundef; } @@ -1572,7 +1561,7 @@ vm_setivar(VALUE obj, ID id, VALUE val, shape_id_t dest_shape_id, attr_index_t i VM_ASSERT(!rb_ractor_shareable_p(obj) || rb_obj_frozen_p(obj)); shape_id_t shape_id = RBASIC_SHAPE_ID(obj); - dest_shape_id = revalidate_cache(shape_id, dest_shape_id, id, index); + dest_shape_id = revalidate_setivar_cache(shape_id, dest_shape_id, id, index); if (UNLIKELY(dest_shape_id == INVALID_SHAPE_ID)) { break; } From a440f23c708cf0e1ac97497fd16f6dc893e45bbd Mon Sep 17 00:00:00 2001 From: Jean Boussier Date: Mon, 4 May 2026 16:42:11 +0200 Subject: [PATCH 9/9] Always inline `revalidate_setivar_cache` --- vm_insnhelper.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/vm_insnhelper.c b/vm_insnhelper.c index 8fc3f5be68f3d0..6b51f88aa61d16 100644 --- a/vm_insnhelper.c +++ b/vm_insnhelper.c @@ -1417,7 +1417,8 @@ vm_getivar(VALUE obj, ID id, const rb_iseq_t *iseq, IVC ic, const struct rb_call } } -static inline shape_id_t +ALWAYS_INLINE(static shape_id_t revalidate_setivar_cache(shape_id_t shape_id, shape_id_t dest_shape_offset, ID id, attr_index_t index)); +static shape_id_t revalidate_setivar_cache(shape_id_t shape_id, shape_id_t dest_shape_offset, ID id, attr_index_t index) { RUBY_ASSERT(shape_id != INVALID_SHAPE_ID);