turkdevops · pull · May 6, 2026 · May 5, 2026 · May 5, 2026 · May 5, 2026
diff --git a/compile.c b/compile.c
@@ -2781,12 +2781,12 @@ iseq_set_sequence(rb_iseq_t *iseq, LINK_ANCHOR *const anchor)
 
                             if (insn == BIN(setinstancevariable)) {
                                 cache->iv_set_name = SYM2ID(operands[j - 1]);
+                                cache->value = IVAR_CACHE_INIT;
                             }
                             else {
                                 cache->iv_set_name = 0;
+                                cache->value = rb_getivar_cache_pack(ROOT_SHAPE_ID, ATTR_INDEX_NOT_SET);
                             }
-
-                            vm_ic_attr_index_initialize(cache, INVALID_SHAPE_ID);
                         }
                       case TS_ISE: /* inline storage entry: `once` insn */
                       case TS_ICVARC: /* inline cvar cache */
@@ -13124,12 +13124,12 @@ ibf_load_code(const struct ibf_load *load, rb_iseq_t *iseq, ibf_offset_t bytecod
                         if (insn == BIN(setinstancevariable)) {
                             ID iv_name = (ID)code[code_index - 1];
                             cache->iv_set_name = iv_name;
+                            cache->value = IVAR_CACHE_INIT;
                         }
                         else {
                             cache->iv_set_name = 0;
+                            cache->value = rb_getivar_cache_pack(ROOT_SHAPE_ID, ATTR_INDEX_NOT_SET);
                         }
-
-                        vm_ic_attr_index_initialize(cache, INVALID_SHAPE_ID);
                     }
 
                 }

diff --git a/io.c b/io.c
@@ -12250,24 +12250,49 @@ seek_before_access(VALUE argp)
  *  With only argument +path+ given, reads in text mode and returns the entire content
  *  of the file at the given path:
  *
- *    IO.read('t.txt')
- *    # => "First line\nSecond line\n\nThird line\nFourth line\n"
+ *    File.read('t.txt')
+ *    # => "First line\nSecond line\n\nFourth line\nFifth line\n"
+ *    File.read('t.ja')
+ *    # => "こんにちは"
+ *    File.read('t.dat')
+ *    # => "\xFE\xFF\x99\x90\x99\x91\x99\x92\x99\x93\x99\x94"
  *
  *  On Windows, text mode can terminate reading and leave bytes in the file
  *  unread when encountering certain special bytes. Consider using
  *  IO.binread if all bytes in the file should be read.
  *
  *  With argument +length+, returns +length+ bytes if available:
  *
- *    IO.read('t.txt', 7) # => "First l"
- *    IO.read('t.txt', 700)
+ *    File.read('t.txt', 7)
+ *    # => "First l"
+ *    File.read('t.ja', 7)
+ *    # => "\xE3\x81\x93\xE3\x82\x93\xE3"
+ *    File.read('t.dat', 7)
+ *    # => "\xFE\xFF\x99\x90\x99\x91\x99"
+ *
+ *  Returns all bytes if +length+ is larger than the files size:
+ *
+ *    File.read('t.txt', 700)
  *    # => "First line\r\nSecond line\r\n\r\nFourth line\r\nFifth line\r\n"
+ *    File.read('t.ja', 700)
+ *    # => "\xE3\x81\x93\xE3\x82\x93\xE3\x81\xAB\xE3\x81\xA1\xE3\x81\xAF"
+ *    File.read('t.dat', 700)
+ *    # => "\xFE\xFF\x99\x90\x99\x91\x99\x92\x99\x93\x99\x94"
  *
  *  With arguments +length+ and +offset+, returns +length+ bytes
  *  if available, beginning at the given +offset+:
  *
- *    IO.read('t.txt', 10, 2)   # => "rst line\nS"
- *    IO.read('t.txt', 10, 200) # => nil
+ *    File.read('t.txt', 10, 2)
+ *    # => "rst line\r\n"
+ *    File.read('t.ja', 10, 2)
+ *    # => "\x93\xE3\x82\x93\xE3\x81\xAB\xE3\x81\xA1"
+ *    File.read('t.dat', 10, 2)
+ *    # => "\x99\x90\x99\x91\x99\x92\x99\x93\x99\x94"
+ *
+ *  Returns +nil+ if +offset+ is past the end of the stream:
+ *
+ *    File.read('t.txt', 10, 200)
+ *    # => nil
  *
  *  Optional keyword arguments +opts+ specify:
  *
@@ -12407,36 +12432,50 @@ io_s_write(int argc, VALUE *argv, VALUE klass, int binary)
 
 /*
  *  call-seq:
- *    IO.write(path, data, offset = 0, **opts)    -> integer
+ *    IO.write(path, data, offset = 0, **opts) -> nonnegative_integer
  *
  *  Opens the stream, writes the given +data+ to it,
  *  and closes the stream; returns the number of bytes written.
  *
  *  The first argument must be a string that is the path to a file.
  *
- *  With only argument +path+ given, writes the given +data+ to the file at that path:
+ *  With only arguments +path+ and +data+ given,
+ *  writes the given data to the file at that path:
+ *
+ *    path = 't.tmp'
+ *    File.write(path, "First line\nSecond line\n\nFourth line\nFifth line\n") # => 47
+ *    File.write(path, 'こんにちは')                                             # => 15
+ *    File.write(path, "\xFE\xFF\x99\x90\x99\x91\x99\x92\x99\x93\x99\x94")     # => 12
+ *
+ *  When +offset+ is zero (the default), the entire file content is overwritten:
  *
- *    IO.write('t.tmp', 'abc')    # => 3
- *    File.read('t.tmp')          # => "abc"
+ *    File.read(path) # => "\xFE\xFF\x99\x90\x99\x91\x99\x92\x99\x93\x99\x94"
+ *    File.write(path, 'foo')
+ *    File.read(path) # => "foo"
  *
- *  If +offset+ is zero (the default), the file is overwritten:
+ *  When +offset+ in within the file content, the file content is partly overwritten,
+ *  beginning at byte +offset+:
  *
- *    IO.write('t.tmp', 'A')      # => 1
- *    File.read('t.tmp')          # => "A"
+ *    File.write(path, "First line\nSecond line\n\nFourth line\nFifth line\n")
+ *    File.write(path, 'LINE', 6)
+ *    File.read(path) # => "First LINE\nSecond line\n\nFourth line\nFifth line\n"
  *
- *  If +offset+ in within the file content, the file is partly overwritten:
+ *  When the file contains multi-byte characters,
+ *  the effect of writing may disturb some characters:
  *
- *    IO.write('t.tmp', 'abcdef') # => 3
- *    File.read('t.tmp')          # => "abcdef"
- *    # Offset within content.
- *    IO.write('t.tmp', '012', 2) # => 3
- *    File.read('t.tmp')          # => "ab012f"
+ *    File.write(path, "こんにちは")
+ *    File.write(path, 'FOO', 3)  # Replace one 3-byte character.
+ *    File.read(path) # => "こFOOにちは"
+ *    File.write(path, 'BAR', 7)  # Replace bytes in two different 3-byte characters.
+ *    File.read(path) # => "こFOO\xE3BAR\x81\xA1は"
  *
  *  If +offset+ is outside the file content,
  *  the file is padded with null characters <tt>"\u0000"</tt>:
  *
- *    IO.write('t.tmp', 'xyz', 10) # => 3
- *    File.read('t.tmp')           # => "ab012f\u0000\u0000\u0000\u0000xyz"
+ *    File.write(path, "First line\nSecond line\n\nFourth line\nFifth line\n")
+ *    File.write(path, 'FOO', 55)
+ *    File.read(path)
+ *    # => "First line\nSecond line\n\nFourth line\nFifth line\n\u0000\u0000\u0000FOO"
  *
  *  Optional keyword arguments +opts+ specify:
  *
@@ -12453,7 +12492,7 @@ rb_io_s_write(int argc, VALUE *argv, VALUE io)
 
 /*
  *  call-seq:
- *    IO.binwrite(path, string, offset = 0)    -> integer
+ *    IO.binwrite(path, string, offset = 0, **opts)    -> integer
  *
  *  Behaves like IO.write, except that the stream is opened in binary mode
  *  with ASCII-8BIT encoding.

diff --git a/pathname_builtin.rb b/pathname_builtin.rb
@@ -1115,7 +1115,41 @@ def readlines(...) File.readlines(@path, ...) end
   # See <tt>File.sysopen</tt>.
   def sysopen(...) File.sysopen(@path, ...) end
 
-  # Writes +contents+ to the file. See <tt>File.write</tt>.
+  # call-seq:
+  #   write(data, offset = 0, **opts) -> nonnegative_integer
+  #
+  # Opens the file at +self.to_s+, writes the given +data+ to it,
+  # and closes the file; returns the number of bytes written.
+  #
+  # With only argument +data+ given, writes the given data to the file:
+  #
+  #   path = 't.tmp'
+  #   pn = Pathname.new(path)
+  #   pn.write('foo') # => 3
+  #   File.read(path) # => "foo"
+  #
+  # If +offset+ is zero (the default), the file is overwritten:
+  #
+  #   pn.write('bar')
+  #   File.read(path) # => "bar"
+  #
+  # If +offset+ in within the file content, the file is partly overwritten:
+  #
+  #   pn.write('foobarbaz')
+  #   pn.write('BAR', 3)
+  #   File.read(path) # => "fooBARbaz"
+  #
+  # If +offset+ is outside the file content,
+  # the file is padded with null characters <tt>"\u0000"</tt>:
+  #
+  #   pn.write('bat', 12)
+  #   File.read(path) # => "fooBARbaz\u0000\u0000\u0000bat"
+  #
+  # Optional keyword arguments +opts+ specify:
+  #
+  # - {Open Options}[rdoc-ref:IO@Open+Options].
+  # - {Encoding options}[rdoc-ref:encodings.rdoc@Encoding+Options].
+  #
   def write(...) File.write(@path, ...) end
 
   # Writes +contents+ to the file, opening it in binary mode.

diff --git a/shape.h b/shape.h
@@ -117,14 +117,6 @@ RUBY_SYMBOL_EXPORT_END
 size_t rb_shapes_cache_size(void);
 size_t rb_shapes_count(void);
 
-union rb_attr_index_cache {
-    uint64_t pack;
-    struct {
-        shape_id_t shape_id;
-        attr_index_t index;
-    } unpack;
-};
-
 static inline shape_id_t
 RBASIC_SHAPE_ID(VALUE obj)
 {
@@ -267,8 +259,7 @@ RSHAPE_PARENT_OFFSET(shape_id_t shape_id)
 static inline bool
 RSHAPE_DIRECT_CHILD_P(shape_id_t parent_offset, shape_id_t child_id)
 {
-    return (RSHAPE_FLAGS(parent_offset) == RSHAPE_FLAGS(child_id) &&
-        RSHAPE_PARENT_OFFSET(child_id) == RSHAPE_OFFSET(parent_offset));
+    return RSHAPE_PARENT_OFFSET(child_id) == RSHAPE_OFFSET(parent_offset);
 }
 
 static inline enum shape_type
@@ -529,4 +520,107 @@ size_t rb_shape_edges_count(shape_id_t shape_id);
 size_t rb_shape_depth(shape_id_t shape_id);
 RUBY_SYMBOL_EXPORT_END
 
+// Inline cache helpers
+
+typedef struct {
+    attr_index_t index;
+    shape_id_t shape_offset;
+} rb_getivar_cache;
+
+union rb_getivar_cache {
+    uint64_t pack;
+    rb_getivar_cache unpack;
+};
+STATIC_ASSERT(rb_getivar_cache_size, sizeof(union rb_getivar_cache) <= sizeof(uint64_t));
+
+#define IVAR_CACHE_INIT ((uint64_t)-1)
+#define ATTR_INDEX_T_NUM_BITS (sizeof(attr_index_t) * CHAR_BIT)
+
+static inline rb_getivar_cache
+rb_getivar_cache_unpack(uint64_t packed)
+{
+    union rb_getivar_cache cache = {
+        .pack = packed,
+    };
+
+    // Because caches may initialized with all bits set (IVAR_CACHE_INIT), and `shape_offset` if 32bits,
+    // we need to remove any potential extra bits set in the "padding".
+    cache.unpack.shape_offset &= SHAPE_ID_OFFSET_MASK;
+    return cache.unpack;
+}
+
+static inline uint64_t
+rb_getivar_cache_pack(shape_id_t shape_offset, attr_index_t index)
+{
+    RUBY_ASSERT(shape_offset == RSHAPE_OFFSET(shape_offset));
+    RUBY_ASSERT(shape_offset != INVALID_SHAPE_ID);
+
+    union rb_getivar_cache cache = {
+        .unpack = {
+            .shape_offset = shape_offset,
+            .index = index,
+        },
+    };
+    return cache.pack;
+}
+
+typedef struct {
+    attr_index_t index;
+    shape_id_t source_shape_offset;
+    shape_id_t dest_shape_offset;
+} rb_setivar_cache;
+
+static inline rb_setivar_cache
+rb_setivar_cache_unpack(uint64_t packed)
+{
+    rb_setivar_cache cache = {
+        .index = (attr_index_t)packed,
+        .source_shape_offset = RSHAPE_OFFSET((shape_id_t)(packed >> ATTR_INDEX_T_NUM_BITS)),
+        .dest_shape_offset = RSHAPE_OFFSET((shape_id_t)(packed >> (ATTR_INDEX_T_NUM_BITS + SHAPE_ID_OFFSET_NUM_BITS))),
+    };
+    return cache;
+}
+
+static inline uint64_t
+rb_setivar_cache_pack(shape_id_t shape_offset, shape_id_t dest_shape_offset, attr_index_t index)
+{
+    RUBY_ASSERT(shape_offset == RSHAPE_OFFSET(shape_offset));
+    RUBY_ASSERT(dest_shape_offset == RSHAPE_OFFSET(dest_shape_offset));
+    RUBY_ASSERT(shape_offset == dest_shape_offset || RSHAPE_DIRECT_CHILD_P(shape_offset, dest_shape_offset));
+
+    uint64_t packed_cache = (uint64_t)dest_shape_offset << (ATTR_INDEX_T_NUM_BITS + SHAPE_ID_OFFSET_NUM_BITS);
+    packed_cache |= (uint64_t)shape_offset << ATTR_INDEX_T_NUM_BITS;
+    packed_cache |= (uint64_t)index;
+    return packed_cache;
+}
+
+
+ALWAYS_INLINE(static shape_id_t rb_setivar_cache_revalidate(shape_id_t shape_id, rb_setivar_cache cache));
+static shape_id_t
+rb_setivar_cache_revalidate(shape_id_t shape_id, rb_setivar_cache cache)
+{
+    RUBY_ASSERT(shape_id != INVALID_SHAPE_ID);
+    RUBY_ASSERT(cache.dest_shape_offset == INVALID_SHAPE_ID || cache.dest_shape_offset == RSHAPE_OFFSET(cache.dest_shape_offset));
+
+    shape_id_t normalized_shape_id = shape_id & SHAPE_ID_WRITE_MASK;
+    if (UNLIKELY(normalized_shape_id != cache.source_shape_offset)) {
+        return INVALID_SHAPE_ID;
+    }
+
+    if (UNLIKELY(cache.index >= RSHAPE_CAPACITY(shape_id))) {
+        // That's still a hit in term of layout, but the object will need to be resized,
+        // so unfortunately we'll have to go through the slow path regardless...
+        return INVALID_SHAPE_ID;
+    }
+
+    // Cache hit case
+    RUBY_ASSERT(cache.source_shape_offset == cache.dest_shape_offset || RSHAPE_DIRECT_CHILD_P(shape_id, cache.dest_shape_offset));
+    RUBY_ASSERT(cache.index < RSHAPE_CAPACITY(shape_id));
+    RUBY_ASSERT(!rb_shape_frozen_p(shape_id));
+    RUBY_ASSERT(!rb_shape_too_complex_p(shape_id));
+
+    // We use the cached offset, but combined with the current shape flags.
+    return rb_shape_transition_offset(shape_id, cache.dest_shape_offset);
+}
+
 #endif