From 2f5b7464543be76be032e0cfa69bf34993ea222c Mon Sep 17 00:00:00 2001 From: Jean Boussier Date: Tue, 5 May 2026 17:06:42 +0200 Subject: [PATCH 1/5] Refactor setivar caches to include the source and destination shapes This is how caches used to work prior to 913979bede2a1b79109fa2072352882560d55fe0, however since inline cache need to be set atomically, they are limited to 64bits, which wasn't enough to store an `attr_index_t` and two `shape_id_t`. But now that we only cache the shape's offset, which is only 19B, we can store both the source and destination offsets along with the field index. Thanks to this, the cache validation is now much simpler, we only have to do an exact match on the source offset, whereas before if it wasn't an exact match, we had to try to match the parent, then check the field name. We however still have to validate the shape capacity, as the fast path doesn't handle resizes, but that could be handled in a followup. --- compile.c | 8 +-- shape.h | 114 +++++++++++++++++++++++++++++++--- vm_callinfo.h | 83 ++++++++++--------------- vm_core.h | 2 +- vm_insnhelper.c | 162 +++++++++++++----------------------------------- 5 files changed, 185 insertions(+), 184 deletions(-) diff --git a/compile.c b/compile.c index 7fc5e379e3296b..1d5381b5feb6df 100644 --- a/compile.c +++ b/compile.c @@ -2781,12 +2781,12 @@ iseq_set_sequence(rb_iseq_t *iseq, LINK_ANCHOR *const anchor) if (insn == BIN(setinstancevariable)) { cache->iv_set_name = SYM2ID(operands[j - 1]); + cache->value = IVAR_CACHE_INIT; } else { cache->iv_set_name = 0; + cache->value = rb_getivar_cache_pack(ROOT_SHAPE_ID, ATTR_INDEX_NOT_SET); } - - vm_ic_attr_index_initialize(cache, INVALID_SHAPE_ID); } case TS_ISE: /* inline storage entry: `once` insn */ case TS_ICVARC: /* inline cvar cache */ @@ -13124,12 +13124,12 @@ ibf_load_code(const struct ibf_load *load, rb_iseq_t *iseq, ibf_offset_t bytecod if (insn == BIN(setinstancevariable)) { ID iv_name = (ID)code[code_index - 1]; cache->iv_set_name = iv_name; + cache->value = IVAR_CACHE_INIT; } else { cache->iv_set_name = 0; + cache->value = rb_getivar_cache_pack(ROOT_SHAPE_ID, ATTR_INDEX_NOT_SET); } - - vm_ic_attr_index_initialize(cache, INVALID_SHAPE_ID); } } diff --git a/shape.h b/shape.h index b066bf3a15e05a..61b161e8dd9ed6 100644 --- a/shape.h +++ b/shape.h @@ -117,14 +117,6 @@ RUBY_SYMBOL_EXPORT_END size_t rb_shapes_cache_size(void); size_t rb_shapes_count(void); -union rb_attr_index_cache { - uint64_t pack; - struct { - shape_id_t shape_id; - attr_index_t index; - } unpack; -}; - static inline shape_id_t RBASIC_SHAPE_ID(VALUE obj) { @@ -267,8 +259,7 @@ RSHAPE_PARENT_OFFSET(shape_id_t shape_id) static inline bool RSHAPE_DIRECT_CHILD_P(shape_id_t parent_offset, shape_id_t child_id) { - return (RSHAPE_FLAGS(parent_offset) == RSHAPE_FLAGS(child_id) && - RSHAPE_PARENT_OFFSET(child_id) == RSHAPE_OFFSET(parent_offset)); + return RSHAPE_PARENT_OFFSET(child_id) == RSHAPE_OFFSET(parent_offset); } static inline enum shape_type @@ -529,4 +520,107 @@ size_t rb_shape_edges_count(shape_id_t shape_id); size_t rb_shape_depth(shape_id_t shape_id); RUBY_SYMBOL_EXPORT_END +// Inline cache helpers + +typedef struct { + attr_index_t index; + shape_id_t shape_offset; +} rb_getivar_cache; + +union rb_getivar_cache { + uint64_t pack; + rb_getivar_cache unpack; +}; +STATIC_ASSERT(rb_getivar_cache_size, sizeof(union rb_getivar_cache) <= sizeof(uint64_t)); + +#define IVAR_CACHE_INIT ((uint64_t)-1) +#define ATTR_INDEX_T_NUM_BITS (sizeof(attr_index_t) * CHAR_BIT) + +static inline rb_getivar_cache +rb_getivar_cache_unpack(uint64_t packed) +{ + union rb_getivar_cache cache = { + .pack = packed, + }; + + // Because caches may initialized with all bits set (IVAR_CACHE_INIT), and `shape_offset` if 32bits, + // we need to remove any potential extra bits set in the "padding". + cache.unpack.shape_offset &= SHAPE_ID_OFFSET_MASK; + return cache.unpack; +} + +static inline uint64_t +rb_getivar_cache_pack(shape_id_t shape_offset, attr_index_t index) +{ + RUBY_ASSERT(shape_offset == RSHAPE_OFFSET(shape_offset)); + RUBY_ASSERT(shape_offset != INVALID_SHAPE_ID); + + union rb_getivar_cache cache = { + .unpack = { + .shape_offset = shape_offset, + .index = index, + }, + }; + return cache.pack; +} + +typedef struct { + attr_index_t index; + shape_id_t source_shape_offset; + shape_id_t dest_shape_offset; +} rb_setivar_cache; + +static inline rb_setivar_cache +rb_setivar_cache_unpack(uint64_t packed) +{ + rb_setivar_cache cache = { + .index = (attr_index_t)packed, + .source_shape_offset = RSHAPE_OFFSET((shape_id_t)(packed >> ATTR_INDEX_T_NUM_BITS)), + .dest_shape_offset = RSHAPE_OFFSET((shape_id_t)(packed >> (ATTR_INDEX_T_NUM_BITS + SHAPE_ID_OFFSET_NUM_BITS))), + }; + return cache; +} + +static inline uint64_t +rb_setivar_cache_pack(shape_id_t shape_offset, shape_id_t dest_shape_offset, attr_index_t index) +{ + RUBY_ASSERT(shape_offset == RSHAPE_OFFSET(shape_offset)); + RUBY_ASSERT(dest_shape_offset == RSHAPE_OFFSET(dest_shape_offset)); + RUBY_ASSERT(shape_offset == dest_shape_offset || RSHAPE_DIRECT_CHILD_P(shape_offset, dest_shape_offset)); + + uint64_t packed_cache = (uint64_t)dest_shape_offset << (ATTR_INDEX_T_NUM_BITS + SHAPE_ID_OFFSET_NUM_BITS); + packed_cache |= (uint64_t)shape_offset << ATTR_INDEX_T_NUM_BITS; + packed_cache |= (uint64_t)index; + return packed_cache; +} + + +ALWAYS_INLINE(static shape_id_t rb_setivar_cache_revalidate(shape_id_t shape_id, rb_setivar_cache cache)); +static shape_id_t +rb_setivar_cache_revalidate(shape_id_t shape_id, rb_setivar_cache cache) +{ + RUBY_ASSERT(shape_id != INVALID_SHAPE_ID); + RUBY_ASSERT(cache.dest_shape_offset == INVALID_SHAPE_ID || cache.dest_shape_offset == RSHAPE_OFFSET(cache.dest_shape_offset)); + + shape_id_t normalized_shape_id = shape_id & SHAPE_ID_WRITE_MASK; + if (UNLIKELY(normalized_shape_id != cache.source_shape_offset)) { + return INVALID_SHAPE_ID; + } + + if (UNLIKELY(cache.index >= RSHAPE_CAPACITY(shape_id))) { + // That's still a hit in term of layout, but the object will need to be resized, + // so unfortunately we'll have to go through the slow path regardless... + return INVALID_SHAPE_ID; + } + + // Cache hit case + RUBY_ASSERT(cache.source_shape_offset == cache.dest_shape_offset || RSHAPE_DIRECT_CHILD_P(shape_id, cache.dest_shape_offset)); + RUBY_ASSERT(cache.index < RSHAPE_CAPACITY(shape_id)); + RUBY_ASSERT(!rb_shape_frozen_p(shape_id)); + RUBY_ASSERT(!rb_shape_too_complex_p(shape_id)); + + // We use the cached offset, but combined with the current shape flags. + return rb_shape_transition_offset(shape_id, cache.dest_shape_offset); +} + #endif diff --git a/vm_callinfo.h b/vm_callinfo.h index aad0d5f5849bf8..06215978d658f8 100644 --- a/vm_callinfo.h +++ b/vm_callinfo.h @@ -315,14 +315,6 @@ extern const struct rb_callcache *rb_vm_empty_cc_for_super(void); #define vm_cc_empty() rb_vm_empty_cc() -static inline void vm_cc_attr_index_set(const struct rb_callcache *cc, attr_index_t index, shape_id_t dest_shape_id); - -static inline void -vm_cc_attr_index_initialize(const struct rb_callcache *cc, shape_id_t shape_id) -{ - vm_cc_attr_index_set(cc, (attr_index_t)-1, shape_id); -} - static inline VALUE cc_check_class(VALUE klass) { @@ -334,6 +326,8 @@ VALUE rb_vm_cc_table_create(size_t capa); VALUE rb_vm_cc_table_dup(VALUE old_table); void rb_vm_cc_table_delete(VALUE table, ID mid); +static inline void vm_cc_attr_index_set(const struct rb_callcache *cc, uint64_t packed_cache); + static inline const struct rb_callcache * vm_cc_new(VALUE klass, const struct rb_callable_method_entry_struct *cme, @@ -360,8 +354,11 @@ vm_cc_new(VALUE klass, } if (cme) { - if (cme->def->type == VM_METHOD_TYPE_ATTRSET || cme->def->type == VM_METHOD_TYPE_IVAR) { - vm_cc_attr_index_initialize(cc, INVALID_SHAPE_ID); + if (cme->def->type == VM_METHOD_TYPE_ATTRSET) { + vm_cc_attr_index_set(cc, IVAR_CACHE_INIT); + } + else if (cme->def->type == VM_METHOD_TYPE_IVAR) { + vm_cc_attr_index_set(cc, rb_getivar_cache_pack(ROOT_SHAPE_ID, ATTR_INDEX_NOT_SET)); } } else { @@ -452,26 +449,27 @@ vm_cc_call(const struct rb_callcache *cc) return cc->call_; } -static inline void -vm_unpack_shape_and_index(const uint64_t cache_value, shape_id_t *shape_id, attr_index_t *index) +static inline uint64_t +vm_cc_atomic_cache_read(const struct rb_callcache *cc) { - union rb_attr_index_cache cache = { - .pack = cache_value, - }; - *shape_id = cache.unpack.shape_id; - *index = cache.unpack.index; + return ATOMIC_U64_LOAD_RELAXED(cc->aux_.attr.value); } -static inline void -vm_cc_atomic_shape_and_index(const struct rb_callcache *cc, shape_id_t *shape_id, attr_index_t *index) +static inline uint64_t +vm_ic_atomic_cache_read(const struct iseq_inline_iv_cache_entry *ic) { - vm_unpack_shape_and_index(ATOMIC_U64_LOAD_RELAXED(cc->aux_.attr.value), shape_id, index); + return ATOMIC_U64_LOAD_RELAXED(ic->value); } -static inline void -vm_ic_atomic_shape_and_index(const struct iseq_inline_iv_cache_entry *ic, shape_id_t *shape_id, attr_index_t *index) +static inline uint64_t +vm_cache_attr_index_atomic_read(bool is_attr, const struct iseq_inline_iv_cache_entry *ic, const struct rb_callcache *cc) { - vm_unpack_shape_and_index(ATOMIC_U64_LOAD_RELAXED(ic->value), shape_id, index); + if (is_attr) { + return vm_cc_atomic_cache_read(cc); + } + else { + return vm_ic_atomic_cache_read(ic); + } } static inline unsigned int @@ -508,48 +506,35 @@ set_vm_cc_ivar(const struct rb_callcache *cc) *(VALUE *)&cc->flags |= VM_CALLCACHE_IVAR; } -static inline uint64_t -vm_pack_shape_and_index(shape_id_t shape_id, attr_index_t index) +static inline bool +vm_cc_ivar_p(const struct rb_callcache *cc) { - union rb_attr_index_cache cache = { - .unpack = { - .shape_id = shape_id, - .index = index, - } - }; - return cache.pack; + return (cc->flags & VM_CALLCACHE_IVAR) != 0; } static inline void -vm_cc_attr_index_set(const struct rb_callcache *cc, attr_index_t index, shape_id_t dest_shape_id) +vm_cc_attr_index_set(const struct rb_callcache *cc, uint64_t packed_cache) { uint64_t *attr_value = (uint64_t *)&cc->aux_.attr.value; if (!vm_cc_markable(cc)) { - *attr_value = vm_pack_shape_and_index(INVALID_SHAPE_ID, ATTR_INDEX_NOT_SET); + *attr_value = IVAR_CACHE_INIT; return; } VM_ASSERT(IMEMO_TYPE_P(cc, imemo_callcache)); VM_ASSERT(cc != vm_cc_empty()); - *attr_value = vm_pack_shape_and_index(dest_shape_id, index); + *attr_value = packed_cache; set_vm_cc_ivar(cc); } -static inline bool -vm_cc_ivar_p(const struct rb_callcache *cc) -{ - return (cc->flags & VM_CALLCACHE_IVAR) != 0; -} - -static inline void -vm_ic_attr_index_set(const rb_iseq_t *iseq, struct iseq_inline_iv_cache_entry *ic, attr_index_t index, shape_id_t dest_shape_id) -{ - ATOMIC_U64_SET_RELAXED(ic->value, vm_pack_shape_and_index(dest_shape_id, index)); -} - static inline void -vm_ic_attr_index_initialize(struct iseq_inline_iv_cache_entry *ic, shape_id_t shape_id) +vm_cache_attr_index_set(bool is_attr, struct iseq_inline_iv_cache_entry *ic, const struct rb_callcache *cc, uint64_t packed_cache) { - ATOMIC_U64_SET_RELAXED(ic->value, vm_pack_shape_and_index(shape_id, ATTR_INDEX_NOT_SET)); + if (is_attr) { + vm_cc_attr_index_set(cc, packed_cache); + } + else { + ATOMIC_U64_SET_RELAXED(ic->value, packed_cache); + } } static inline void diff --git a/vm_core.h b/vm_core.h index a0044ccc852da4..89f80b52c75a37 100644 --- a/vm_core.h +++ b/vm_core.h @@ -286,7 +286,7 @@ struct iseq_inline_constant_cache { }; struct iseq_inline_iv_cache_entry { - uint64_t value; // dest_shape_id in former half, attr_index in latter half + uint64_t value; // Either rb_setivar_cache or rb_getivar_cache packed in a uint64_t. ID iv_set_name; }; diff --git a/vm_insnhelper.c b/vm_insnhelper.c index 6b51f88aa61d16..966a7a93eab35c 100644 --- a/vm_insnhelper.c +++ b/vm_insnhelper.c @@ -1233,25 +1233,6 @@ vm_get_cvar_base(const rb_cref_t *cref, const rb_control_frame_t *cfp, int top_l return klass; } -ALWAYS_INLINE(static void fill_ivar_cache(attr_index_t index, shape_id_t shape_id, const rb_iseq_t *iseq, IVC ic, const struct rb_callcache *cc, bool is_attr)); -static inline void -fill_ivar_cache(attr_index_t index, shape_id_t shape_id, const rb_iseq_t *iseq, IVC ic, const struct rb_callcache *cc, bool is_attr) -{ - RUBY_ASSERT(!rb_shape_too_complex_p(shape_id)); - RUBY_ASSERT(shape_id != INVALID_SHAPE_ID); - - // We only care about the shape offset. - shape_id = RSHAPE_OFFSET(shape_id); - - // Cache population code - if (is_attr) { - vm_cc_attr_index_set(cc, index, shape_id); - } - else { - vm_ic_attr_index_set(iseq, ic, index, shape_id); - } -} - #define ractor_incidental_shareable_p(cond, val) \ (!(cond) || rb_ractor_shareable_p(val)) #define ractor_object_incidental_shareable_p(obj, val) \ @@ -1306,24 +1287,14 @@ vm_getivar(VALUE obj, ID id, const rb_iseq_t *iseq, IVC ic, const struct rb_call shape_id_t shape_id = RBASIC_SHAPE_ID_FOR_READ(fields_obj); VALUE *ivar_list = rb_imemo_fields_ptr(fields_obj); - shape_id_t cached_id; - attr_index_t index; + rb_getivar_cache cache = rb_getivar_cache_unpack(vm_cache_attr_index_atomic_read(is_attr, ic, cc)); - if (is_attr) { - vm_cc_atomic_shape_and_index(cc, &cached_id, &index); - } - else { - vm_ic_atomic_shape_and_index(ic, &cached_id, &index); - } - - if (LIKELY(cached_id == shape_id)) { - RUBY_ASSERT(!rb_shape_too_complex_p(cached_id)); - - if (index == ATTR_INDEX_NOT_SET) { + if (LIKELY(cache.shape_offset == shape_id)) { + if (cache.index == ATTR_INDEX_NOT_SET) { return default_value; } - val = ivar_list[index]; + val = ivar_list[cache.index]; #if USE_DEBUG_COUNTER RB_DEBUG_COUNTER_INC(ivar_get_ic_hit); @@ -1336,7 +1307,7 @@ vm_getivar(VALUE obj, ID id, const rb_iseq_t *iseq, IVC ic, const struct rb_call else { // cache miss case #if USE_DEBUG_COUNTER if (is_attr) { - if (cached_id != INVALID_SHAPE_ID) { + if (cache.shape_offset != INVALID_SHAPE_ID) { RB_DEBUG_COUNTER_INC(ivar_get_cc_miss_set); } else { @@ -1344,7 +1315,7 @@ vm_getivar(VALUE obj, ID id, const rb_iseq_t *iseq, IVC ic, const struct rb_call } } else { - if (cached_id != INVALID_SHAPE_ID) { + if (cache.shape_offset != INVALID_SHAPE_ID) { RB_DEBUG_COUNTER_INC(ivar_get_ic_miss_set); } else { @@ -1369,31 +1340,27 @@ vm_getivar(VALUE obj, ID id, const rb_iseq_t *iseq, IVC ic, const struct rb_call } } else { - shape_id_t previous_cached_id = cached_id; - if (rb_shape_get_iv_index_with_hint(shape_id, id, &index, &cached_id)) { - // This fills in the cache with the shared cache object. - // "ent" is the shared cache object - if (cached_id != previous_cached_id) { - fill_ivar_cache(index, cached_id, iseq, ic, cc, is_attr); + shape_id_t previous_cached_offset = cache.shape_offset; + if (rb_shape_get_iv_index_with_hint(shape_id, id, &cache.index, &cache.shape_offset)) { + if (cache.shape_offset != previous_cached_offset) { + RUBY_ASSERT(!rb_shape_too_complex_p(cache.shape_offset)); + RUBY_ASSERT(cache.shape_offset != INVALID_SHAPE_ID); + + uint64_t packed_cache = rb_getivar_cache_pack(cache.shape_offset, cache.index); + vm_cache_attr_index_set(is_attr, ic, cc, packed_cache); } - if (index == ATTR_INDEX_NOT_SET) { + if (cache.index == ATTR_INDEX_NOT_SET) { val = default_value; } else { // We fetched the ivar list above - val = ivar_list[index]; + val = ivar_list[cache.index]; RUBY_ASSERT(!UNDEF_P(val)); } } else { - if (is_attr) { - vm_cc_attr_index_initialize(cc, shape_id); - } - else { - vm_ic_attr_index_initialize(ic, shape_id); - } - + vm_cache_attr_index_set(is_attr, ic, cc, rb_getivar_cache_pack(shape_id, ATTR_INDEX_NOT_SET)); val = default_value; } } @@ -1417,48 +1384,6 @@ vm_getivar(VALUE obj, ID id, const rb_iseq_t *iseq, IVC ic, const struct rb_call } } -ALWAYS_INLINE(static shape_id_t revalidate_setivar_cache(shape_id_t shape_id, shape_id_t dest_shape_offset, ID id, attr_index_t index)); -static shape_id_t -revalidate_setivar_cache(shape_id_t shape_id, shape_id_t dest_shape_offset, ID id, attr_index_t index) -{ - RUBY_ASSERT(shape_id != INVALID_SHAPE_ID); - RUBY_ASSERT(dest_shape_offset == INVALID_SHAPE_ID || dest_shape_offset == RSHAPE_OFFSET(dest_shape_offset)); - - shape_id_t dest_shape_id = shape_id; - shape_id_t normalized_shape_id = shape_id & SHAPE_ID_WRITE_MASK; - - if (normalized_shape_id == dest_shape_offset) { - // Perfect hit case, we're reassigning an existing ivar, the shape doesn't change. - // Also since `SHAPE_ID_WRITE_MASK` contains COMPLEX and FROZEN flags, by matching - // `dest_shape_offset` we also checked that the object is neither complex nor frozen. - } - else { - if (dest_shape_offset == INVALID_SHAPE_ID) { - // The cache is cold. - return INVALID_SHAPE_ID; - } - - // Child hit case, the cache offset is a direct child of the current shape - // and the ivar name matches. - // We additionally need to ensure that the object has sufficient capacity. - if (!(RSHAPE_DIRECT_CHILD_P(shape_id, dest_shape_id) && - RSHAPE_EDGE_NAME(dest_shape_id) == id && - index < RSHAPE_CAPACITY(shape_id))) { - return INVALID_SHAPE_ID; - } - - // We use the cached offset, but combined with the current shape flags. - dest_shape_id = rb_shape_transition_offset(shape_id, dest_shape_offset); - } - - // Cache hit case - RUBY_ASSERT(!rb_shape_frozen_p(shape_id)); - RUBY_ASSERT(!rb_shape_too_complex_p(shape_id)); - RUBY_ASSERT(RSHAPE_CAPACITY(shape_id) >= RSHAPE_LEN(dest_shape_offset)); - - return dest_shape_id; -} - ALWAYS_INLINE(static VALUE vm_setivar_slowpath(VALUE obj, ID id, VALUE val, const rb_iseq_t *iseq, IVC ic, const struct rb_callcache *cc, int is_attr)); NOINLINE(static VALUE vm_setivar_slowpath_ivar(VALUE obj, ID id, VALUE val, const rb_iseq_t *iseq, IVC ic)); NOINLINE(static VALUE vm_setivar_slowpath_attr(VALUE obj, ID id, VALUE val, const struct rb_callcache *cc)); @@ -1471,11 +1396,13 @@ vm_setivar_slowpath(VALUE obj, ID id, VALUE val, const rb_iseq_t *iseq, IVC ic, rb_check_frozen(obj); + shape_id_t previous_shape_id = RBASIC_SHAPE_ID(obj); attr_index_t index = rb_ivar_set_index(obj, id, val); shape_id_t next_shape_id = RBASIC_SHAPE_ID(obj); if (!rb_shape_too_complex_p(next_shape_id)) { - fill_ivar_cache(index, next_shape_id, iseq, ic, cc, is_attr); + uint64_t packed_cache = rb_setivar_cache_pack(RSHAPE_OFFSET(previous_shape_id), RSHAPE_OFFSET(next_shape_id), index); + vm_cache_attr_index_set(is_attr, ic, cc, packed_cache); } RB_DEBUG_COUNTER_INC(ivar_set_obj_miss); @@ -1497,9 +1424,9 @@ vm_setivar_slowpath_attr(VALUE obj, ID id, VALUE val, const struct rb_callcache return vm_setivar_slowpath(obj, id, val, NULL, NULL, cc, true); } -NOINLINE(static VALUE vm_setivar_class(VALUE obj, ID id, VALUE val, shape_id_t dest_shape_id, attr_index_t index)); +NOINLINE(static VALUE vm_setivar_class(VALUE obj, VALUE val, rb_setivar_cache cache)); static VALUE -vm_setivar_class(VALUE obj, ID id, VALUE val, shape_id_t dest_shape_id, attr_index_t index) +vm_setivar_class(VALUE obj, VALUE val, rb_setivar_cache cache) { if (UNLIKELY(!rb_ractor_main_p())) { return Qundef; @@ -1511,12 +1438,12 @@ vm_setivar_class(VALUE obj, ID id, VALUE val, shape_id_t dest_shape_id, attr_ind } shape_id_t shape_id = RBASIC_SHAPE_ID(fields_obj); - dest_shape_id = revalidate_setivar_cache(shape_id, dest_shape_id, id, index); + shape_id_t dest_shape_id = rb_setivar_cache_revalidate(shape_id, cache); if (UNLIKELY(dest_shape_id == INVALID_SHAPE_ID)) { return Qundef; } - RB_OBJ_WRITE(fields_obj, &rb_imemo_fields_ptr(fields_obj)[index], val); + RB_OBJ_WRITE(fields_obj, &rb_imemo_fields_ptr(fields_obj)[cache.index], val); if (shape_id != dest_shape_id) { RBASIC_SET_SHAPE_ID(obj, dest_shape_id); @@ -1528,19 +1455,19 @@ vm_setivar_class(VALUE obj, ID id, VALUE val, shape_id_t dest_shape_id, attr_ind return val; } -NOINLINE(static VALUE vm_setivar_default(VALUE obj, ID id, VALUE val, shape_id_t dest_shape_id, attr_index_t index)); +NOINLINE(static VALUE vm_setivar_default(VALUE obj, ID id, VALUE val, rb_setivar_cache cache)); static VALUE -vm_setivar_default(VALUE obj, ID id, VALUE val, shape_id_t dest_shape_id, attr_index_t index) +vm_setivar_default(VALUE obj, ID id, VALUE val, rb_setivar_cache cache) { shape_id_t shape_id = RBASIC_SHAPE_ID(obj); - dest_shape_id = revalidate_setivar_cache(shape_id, dest_shape_id, id, index); + shape_id_t dest_shape_id = rb_setivar_cache_revalidate(shape_id, cache); if (UNLIKELY(dest_shape_id == INVALID_SHAPE_ID)) { return Qundef; } VALUE fields_obj = rb_obj_fields(obj, id); RUBY_ASSERT(fields_obj); - RB_OBJ_WRITE(fields_obj, &rb_imemo_fields_ptr(fields_obj)[index], val); + RB_OBJ_WRITE(fields_obj, &rb_imemo_fields_ptr(fields_obj)[cache.index], val); if (shape_id != dest_shape_id) { RBASIC_SET_SHAPE_ID(obj, dest_shape_id); @@ -1553,7 +1480,7 @@ vm_setivar_default(VALUE obj, ID id, VALUE val, shape_id_t dest_shape_id, attr_i } static inline VALUE -vm_setivar(VALUE obj, ID id, VALUE val, shape_id_t dest_shape_id, attr_index_t index) +vm_setivar(VALUE obj, VALUE val, rb_setivar_cache cache) { #if OPT_IC_FOR_IVAR switch (BUILTIN_TYPE(obj)) { @@ -1562,12 +1489,12 @@ vm_setivar(VALUE obj, ID id, VALUE val, shape_id_t dest_shape_id, attr_index_t i VM_ASSERT(!rb_ractor_shareable_p(obj) || rb_obj_frozen_p(obj)); shape_id_t shape_id = RBASIC_SHAPE_ID(obj); - dest_shape_id = revalidate_setivar_cache(shape_id, dest_shape_id, id, index); + shape_id_t dest_shape_id = rb_setivar_cache_revalidate(shape_id, cache); if (UNLIKELY(dest_shape_id == INVALID_SHAPE_ID)) { break; } - RB_OBJ_WRITE(obj, &ROBJECT_FIELDS(obj)[index], val); + RB_OBJ_WRITE(obj, &ROBJECT_FIELDS(obj)[cache.index], val); if (shape_id != dest_shape_id) { RBASIC_SET_SHAPE_ID(obj, dest_shape_id); } @@ -1686,22 +1613,19 @@ vm_setinstancevariable(const rb_iseq_t *iseq, VALUE obj, ID id, VALUE val, IVC i return; } - shape_id_t dest_shape_id; - attr_index_t index; - vm_ic_atomic_shape_and_index(ic, &dest_shape_id, &index); - - if (UNLIKELY(UNDEF_P(vm_setivar(obj, id, val, dest_shape_id, index)))) { + rb_setivar_cache cache = rb_setivar_cache_unpack(vm_ic_atomic_cache_read(ic)); + if (UNLIKELY(UNDEF_P(vm_setivar(obj, val, cache)))) { switch (BUILTIN_TYPE(obj)) { case T_OBJECT: break; case T_CLASS: case T_MODULE: - if (!UNDEF_P(vm_setivar_class(obj, id, val, dest_shape_id, index))) { + if (!UNDEF_P(vm_setivar_class(obj, val, cache))) { return; } break; default: - if (!UNDEF_P(vm_setivar_default(obj, id, val, dest_shape_id, index))) { + if (!UNDEF_P(vm_setivar_default(obj, id, val, cache))) { return; } } @@ -4075,12 +3999,10 @@ vm_call_attrset_direct(rb_execution_context_t *ec, rb_control_frame_t *cfp, cons RB_DEBUG_COUNTER_INC(ccf_attrset); VALUE val = *(cfp->sp - 1); cfp->sp -= 2; - attr_index_t index; - shape_id_t dest_shape_id; - vm_cc_atomic_shape_and_index(cc, &dest_shape_id, &index); + rb_setivar_cache cache = rb_setivar_cache_unpack(vm_cc_atomic_cache_read(cc)); ID id = vm_cc_cme(cc)->def->body.attr.id; rb_check_frozen(obj); - VALUE res = vm_setivar(obj, id, val, dest_shape_id, index); + VALUE res = vm_setivar(obj, val, cache); if (UNDEF_P(res)) { switch (BUILTIN_TYPE(obj)) { case T_OBJECT: @@ -4088,7 +4010,7 @@ vm_call_attrset_direct(rb_execution_context_t *ec, rb_control_frame_t *cfp, cons case T_CLASS: case T_MODULE: { - res = vm_setivar_class(obj, id, val, dest_shape_id, index); + res = vm_setivar_class(obj, val, cache); if (!UNDEF_P(res)) { return res; } @@ -4096,7 +4018,7 @@ vm_call_attrset_direct(rb_execution_context_t *ec, rb_control_frame_t *cfp, cons break; default: { - res = vm_setivar_default(obj, id, val, dest_shape_id, index); + res = vm_setivar_default(obj, id, val, cache); if (!UNDEF_P(res)) { return res; } @@ -4898,7 +4820,7 @@ vm_call_method_each_type(rb_execution_context_t *ec, rb_control_frame_t *cfp, st const unsigned int aset_mask = (VM_CALL_ARGS_SPLAT | VM_CALL_KW_SPLAT | VM_CALL_KWARG | VM_CALL_FORWARDING); if (vm_cc_markable(cc)) { - vm_cc_attr_index_initialize(cc, INVALID_SHAPE_ID); + vm_cc_attr_index_set(cc, IVAR_CACHE_INIT); VM_CALL_METHOD_ATTR(v, vm_call_attrset_direct(ec, cfp, cc, calling->recv), CC_SET_FASTPATH(cc, vm_call_attrset, !(vm_ci_flag(ci) & aset_mask))); @@ -4914,7 +4836,7 @@ vm_call_method_each_type(rb_execution_context_t *ec, rb_control_frame_t *cfp, st .call_ = cc->call_, .aux_ = { .attr = { - .value = vm_pack_shape_and_index(INVALID_SHAPE_ID, ATTR_INDEX_NOT_SET), + .value = IVAR_CACHE_INIT, } }, }); @@ -4928,7 +4850,7 @@ vm_call_method_each_type(rb_execution_context_t *ec, rb_control_frame_t *cfp, st case VM_METHOD_TYPE_IVAR: CALLER_SETUP_ARG(cfp, calling, ci, 0); rb_check_arity(calling->argc, 0, 0); - vm_cc_attr_index_initialize(cc, INVALID_SHAPE_ID); + vm_cc_attr_index_set(cc, rb_getivar_cache_pack(ROOT_SHAPE_ID, ATTR_INDEX_NOT_SET)); const unsigned int ivar_mask = (VM_CALL_ARGS_SPLAT | VM_CALL_KW_SPLAT | VM_CALL_FORWARDING); VM_CALL_METHOD_ATTR(v, vm_call_ivar(ec, cfp, calling), From 9cee5c73cbeda67ba81a2e0997871bf9fe4285b0 Mon Sep 17 00:00:00 2001 From: Burdette Lamar Date: Tue, 5 May 2026 16:33:00 -0500 Subject: [PATCH 2/5] [DOC] Add Japanese and data to IO.read --- io.c | 37 +++++++++++++++++++++++++++++++------ 1 file changed, 31 insertions(+), 6 deletions(-) diff --git a/io.c b/io.c index 39131ee11115e1..f0fb4cf808012d 100644 --- a/io.c +++ b/io.c @@ -12250,8 +12250,12 @@ seek_before_access(VALUE argp) * With only argument +path+ given, reads in text mode and returns the entire content * of the file at the given path: * - * IO.read('t.txt') - * # => "First line\nSecond line\n\nThird line\nFourth line\n" + * File.read('t.txt') + * # => "First line\nSecond line\n\nFourth line\nFifth line\n" + * File.read('t.ja') + * # => "こんにちは" + * File.read('t.dat') + * # => "\xFE\xFF\x99\x90\x99\x91\x99\x92\x99\x93\x99\x94" * * On Windows, text mode can terminate reading and leave bytes in the file * unread when encountering certain special bytes. Consider using @@ -12259,15 +12263,36 @@ seek_before_access(VALUE argp) * * With argument +length+, returns +length+ bytes if available: * - * IO.read('t.txt', 7) # => "First l" - * IO.read('t.txt', 700) + * File.read('t.txt', 7) + * # => "First l" + * File.read('t.ja', 7) + * # => "\xE3\x81\x93\xE3\x82\x93\xE3" + * File.read('t.dat', 7) + * # => "\xFE\xFF\x99\x90\x99\x91\x99" + * + * Returns all bytes if +length+ is larger than the files size: + * + * File.read('t.txt', 700) * # => "First line\r\nSecond line\r\n\r\nFourth line\r\nFifth line\r\n" + * File.read('t.ja', 700) + * # => "\xE3\x81\x93\xE3\x82\x93\xE3\x81\xAB\xE3\x81\xA1\xE3\x81\xAF" + * File.read('t.dat', 700) + * # => "\xFE\xFF\x99\x90\x99\x91\x99\x92\x99\x93\x99\x94" * * With arguments +length+ and +offset+, returns +length+ bytes * if available, beginning at the given +offset+: * - * IO.read('t.txt', 10, 2) # => "rst line\nS" - * IO.read('t.txt', 10, 200) # => nil + * File.read('t.txt', 10, 2) + * # => "rst line\r\n" + * File.read('t.ja', 10, 2) + * # => "\x93\xE3\x82\x93\xE3\x81\xAB\xE3\x81\xA1" + * File.read('t.dat', 10, 2) + * # => "\x99\x90\x99\x91\x99\x92\x99\x93\x99\x94" + * + * Returns +nil+ if +offset+ is past the end of the stream: + * + * File.read('t.txt', 10, 200) + * # => nil * * Optional keyword arguments +opts+ specify: * From f56ee25ca99c6e16e835830ca3dae04771e1f4d8 Mon Sep 17 00:00:00 2001 From: BurdetteLamar Date: Tue, 5 May 2026 15:30:14 +0100 Subject: [PATCH 3/5] [DOC] Doc for Pathname#write --- pathname_builtin.rb | 36 +++++++++++++++++++++++++++++++++++- 1 file changed, 35 insertions(+), 1 deletion(-) diff --git a/pathname_builtin.rb b/pathname_builtin.rb index 015113bbfe6673..03b71f79868aac 100644 --- a/pathname_builtin.rb +++ b/pathname_builtin.rb @@ -1115,7 +1115,41 @@ def readlines(...) File.readlines(@path, ...) end # See File.sysopen. def sysopen(...) File.sysopen(@path, ...) end - # Writes +contents+ to the file. See File.write. + # call-seq: + # write(data, offset = 0, **opts) -> nonnegative_integer + # + # Opens the file at +self.to_s+, writes the given +data+ to it, + # and closes the file; returns the number of bytes written. + # + # With only argument +data+ given, writes the given data to the file: + # + # path = 't.tmp' + # pn = Pathname.new(path) + # pn.write('foo') # => 3 + # File.read(path) # => "foo" + # + # If +offset+ is zero (the default), the file is overwritten: + # + # pn.write('bar') + # File.read(path) # => "bar" + # + # If +offset+ in within the file content, the file is partly overwritten: + # + # pn.write('foobarbaz') + # pn.write('BAR', 3) + # File.read(path) # => "fooBARbaz" + # + # If +offset+ is outside the file content, + # the file is padded with null characters "\u0000": + # + # pn.write('bat', 12) + # File.read(path) # => "fooBARbaz\u0000\u0000\u0000bat" + # + # Optional keyword arguments +opts+ specify: + # + # - {Open Options}[rdoc-ref:IO@Open+Options]. + # - {Encoding options}[rdoc-ref:encodings.rdoc@Encoding+Options]. + # def write(...) File.write(@path, ...) end # Writes +contents+ to the file, opening it in binary mode. From d3c770dd67b6bf11efa9122659044e1b3661917f Mon Sep 17 00:00:00 2001 From: Burdette Lamar Date: Tue, 5 May 2026 16:43:53 -0500 Subject: [PATCH 4/5] [DOC] for IO.write (#16857) --- io.c | 44 +++++++++++++++++++++++++++++--------------- 1 file changed, 29 insertions(+), 15 deletions(-) diff --git a/io.c b/io.c index f0fb4cf808012d..9903693aeba538 100644 --- a/io.c +++ b/io.c @@ -12432,36 +12432,50 @@ io_s_write(int argc, VALUE *argv, VALUE klass, int binary) /* * call-seq: - * IO.write(path, data, offset = 0, **opts) -> integer + * IO.write(path, data, offset = 0, **opts) -> nonnegative_integer * * Opens the stream, writes the given +data+ to it, * and closes the stream; returns the number of bytes written. * * The first argument must be a string that is the path to a file. * - * With only argument +path+ given, writes the given +data+ to the file at that path: + * With only arguments +path+ and +data+ given, + * writes the given data to the file at that path: * - * IO.write('t.tmp', 'abc') # => 3 - * File.read('t.tmp') # => "abc" + * path = 't.tmp' + * File.write(path, "First line\nSecond line\n\nFourth line\nFifth line\n") # => 47 + * File.write(path, 'こんにちは') # => 15 + * File.write(path, "\xFE\xFF\x99\x90\x99\x91\x99\x92\x99\x93\x99\x94") # => 12 + * + * When +offset+ is zero (the default), the entire file content is overwritten: + * + * File.read(path) # => "\xFE\xFF\x99\x90\x99\x91\x99\x92\x99\x93\x99\x94" + * File.write(path, 'foo') + * File.read(path) # => "foo" * - * If +offset+ is zero (the default), the file is overwritten: + * When +offset+ in within the file content, the file content is partly overwritten, + * beginning at byte +offset+: * - * IO.write('t.tmp', 'A') # => 1 - * File.read('t.tmp') # => "A" + * File.write(path, "First line\nSecond line\n\nFourth line\nFifth line\n") + * File.write(path, 'LINE', 6) + * File.read(path) # => "First LINE\nSecond line\n\nFourth line\nFifth line\n" * - * If +offset+ in within the file content, the file is partly overwritten: + * When the file contains multi-byte characters, + * the effect of writing may disturb some characters: * - * IO.write('t.tmp', 'abcdef') # => 3 - * File.read('t.tmp') # => "abcdef" - * # Offset within content. - * IO.write('t.tmp', '012', 2) # => 3 - * File.read('t.tmp') # => "ab012f" + * File.write(path, "こんにちは") + * File.write(path, 'FOO', 3) # Replace one 3-byte character. + * File.read(path) # => "こFOOにちは" + * File.write(path, 'BAR', 7) # Replace bytes in two different 3-byte characters. + * File.read(path) # => "こFOO\xE3BAR\x81\xA1は" * * If +offset+ is outside the file content, * the file is padded with null characters "\u0000": * - * IO.write('t.tmp', 'xyz', 10) # => 3 - * File.read('t.tmp') # => "ab012f\u0000\u0000\u0000\u0000xyz" + * File.write(path, "First line\nSecond line\n\nFourth line\nFifth line\n") + * File.write(path, 'FOO', 55) + * File.read(path) + * # => "First line\nSecond line\n\nFourth line\nFifth line\n\u0000\u0000\u0000FOO" * * Optional keyword arguments +opts+ specify: * From 7ced2c881018d57208d0474789ce929ca8514353 Mon Sep 17 00:00:00 2001 From: Peter Zhu Date: Tue, 5 May 2026 17:40:09 -0400 Subject: [PATCH 5/5] [DOC] Add missing opts argument to IO.binwrite --- io.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/io.c b/io.c index 9903693aeba538..cb1c95460adc0b 100644 --- a/io.c +++ b/io.c @@ -12492,7 +12492,7 @@ rb_io_s_write(int argc, VALUE *argv, VALUE io) /* * call-seq: - * IO.binwrite(path, string, offset = 0) -> integer + * IO.binwrite(path, string, offset = 0, **opts) -> integer * * Behaves like IO.write, except that the stream is opened in binary mode * with ASCII-8BIT encoding.