Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
8 changes: 4 additions & 4 deletions compile.c
Original file line number Diff line number Diff line change
Expand Up @@ -2781,12 +2781,12 @@ iseq_set_sequence(rb_iseq_t *iseq, LINK_ANCHOR *const anchor)

if (insn == BIN(setinstancevariable)) {
cache->iv_set_name = SYM2ID(operands[j - 1]);
cache->value = IVAR_CACHE_INIT;
}
else {
cache->iv_set_name = 0;
cache->value = rb_getivar_cache_pack(ROOT_SHAPE_ID, ATTR_INDEX_NOT_SET);
}

vm_ic_attr_index_initialize(cache, INVALID_SHAPE_ID);
}
case TS_ISE: /* inline storage entry: `once` insn */
case TS_ICVARC: /* inline cvar cache */
Expand Down Expand Up @@ -13124,12 +13124,12 @@ ibf_load_code(const struct ibf_load *load, rb_iseq_t *iseq, ibf_offset_t bytecod
if (insn == BIN(setinstancevariable)) {
ID iv_name = (ID)code[code_index - 1];
cache->iv_set_name = iv_name;
cache->value = IVAR_CACHE_INIT;
}
else {
cache->iv_set_name = 0;
cache->value = rb_getivar_cache_pack(ROOT_SHAPE_ID, ATTR_INDEX_NOT_SET);
}

vm_ic_attr_index_initialize(cache, INVALID_SHAPE_ID);
}

}
Expand Down
83 changes: 61 additions & 22 deletions io.c
Original file line number Diff line number Diff line change
Expand Up @@ -12250,24 +12250,49 @@ seek_before_access(VALUE argp)
* With only argument +path+ given, reads in text mode and returns the entire content
* of the file at the given path:
*
* IO.read('t.txt')
* # => "First line\nSecond line\n\nThird line\nFourth line\n"
* File.read('t.txt')
* # => "First line\nSecond line\n\nFourth line\nFifth line\n"
* File.read('t.ja')
* # => "こんにちは"
* File.read('t.dat')
* # => "\xFE\xFF\x99\x90\x99\x91\x99\x92\x99\x93\x99\x94"
*
* On Windows, text mode can terminate reading and leave bytes in the file
* unread when encountering certain special bytes. Consider using
* IO.binread if all bytes in the file should be read.
*
* With argument +length+, returns +length+ bytes if available:
*
* IO.read('t.txt', 7) # => "First l"
* IO.read('t.txt', 700)
* File.read('t.txt', 7)
* # => "First l"
* File.read('t.ja', 7)
* # => "\xE3\x81\x93\xE3\x82\x93\xE3"
* File.read('t.dat', 7)
* # => "\xFE\xFF\x99\x90\x99\x91\x99"
*
* Returns all bytes if +length+ is larger than the files size:
*
* File.read('t.txt', 700)
* # => "First line\r\nSecond line\r\n\r\nFourth line\r\nFifth line\r\n"
* File.read('t.ja', 700)
* # => "\xE3\x81\x93\xE3\x82\x93\xE3\x81\xAB\xE3\x81\xA1\xE3\x81\xAF"
* File.read('t.dat', 700)
* # => "\xFE\xFF\x99\x90\x99\x91\x99\x92\x99\x93\x99\x94"
*
* With arguments +length+ and +offset+, returns +length+ bytes
* if available, beginning at the given +offset+:
*
* IO.read('t.txt', 10, 2) # => "rst line\nS"
* IO.read('t.txt', 10, 200) # => nil
* File.read('t.txt', 10, 2)
* # => "rst line\r\n"
* File.read('t.ja', 10, 2)
* # => "\x93\xE3\x82\x93\xE3\x81\xAB\xE3\x81\xA1"
* File.read('t.dat', 10, 2)
* # => "\x99\x90\x99\x91\x99\x92\x99\x93\x99\x94"
*
* Returns +nil+ if +offset+ is past the end of the stream:
*
* File.read('t.txt', 10, 200)
* # => nil
*
* Optional keyword arguments +opts+ specify:
*
Expand Down Expand Up @@ -12407,36 +12432,50 @@ io_s_write(int argc, VALUE *argv, VALUE klass, int binary)

/*
* call-seq:
* IO.write(path, data, offset = 0, **opts) -> integer
* IO.write(path, data, offset = 0, **opts) -> nonnegative_integer
*
* Opens the stream, writes the given +data+ to it,
* and closes the stream; returns the number of bytes written.
*
* The first argument must be a string that is the path to a file.
*
* With only argument +path+ given, writes the given +data+ to the file at that path:
* With only arguments +path+ and +data+ given,
* writes the given data to the file at that path:
*
* path = 't.tmp'
* File.write(path, "First line\nSecond line\n\nFourth line\nFifth line\n") # => 47
* File.write(path, 'こんにちは') # => 15
* File.write(path, "\xFE\xFF\x99\x90\x99\x91\x99\x92\x99\x93\x99\x94") # => 12
*
* When +offset+ is zero (the default), the entire file content is overwritten:
*
* IO.write('t.tmp', 'abc') # => 3
* File.read('t.tmp') # => "abc"
* File.read(path) # => "\xFE\xFF\x99\x90\x99\x91\x99\x92\x99\x93\x99\x94"
* File.write(path, 'foo')
* File.read(path) # => "foo"
*
* If +offset+ is zero (the default), the file is overwritten:
* When +offset+ in within the file content, the file content is partly overwritten,
* beginning at byte +offset+:
*
* IO.write('t.tmp', 'A') # => 1
* File.read('t.tmp') # => "A"
* File.write(path, "First line\nSecond line\n\nFourth line\nFifth line\n")
* File.write(path, 'LINE', 6)
* File.read(path) # => "First LINE\nSecond line\n\nFourth line\nFifth line\n"
*
* If +offset+ in within the file content, the file is partly overwritten:
* When the file contains multi-byte characters,
* the effect of writing may disturb some characters:
*
* IO.write('t.tmp', 'abcdef') # => 3
* File.read('t.tmp') # => "abcdef"
* # Offset within content.
* IO.write('t.tmp', '012', 2) # => 3
* File.read('t.tmp') # => "ab012f"
* File.write(path, "こんにちは")
* File.write(path, 'FOO', 3) # Replace one 3-byte character.
* File.read(path) # => "こFOOにちは"
* File.write(path, 'BAR', 7) # Replace bytes in two different 3-byte characters.
* File.read(path) # => "こFOO\xE3BAR\x81\xA1は"
*
* If +offset+ is outside the file content,
* the file is padded with null characters <tt>"\u0000"</tt>:
*
* IO.write('t.tmp', 'xyz', 10) # => 3
* File.read('t.tmp') # => "ab012f\u0000\u0000\u0000\u0000xyz"
* File.write(path, "First line\nSecond line\n\nFourth line\nFifth line\n")
* File.write(path, 'FOO', 55)
* File.read(path)
* # => "First line\nSecond line\n\nFourth line\nFifth line\n\u0000\u0000\u0000FOO"
*
* Optional keyword arguments +opts+ specify:
*
Expand All @@ -12453,7 +12492,7 @@ rb_io_s_write(int argc, VALUE *argv, VALUE io)

/*
* call-seq:
* IO.binwrite(path, string, offset = 0) -> integer
* IO.binwrite(path, string, offset = 0, **opts) -> integer
*
* Behaves like IO.write, except that the stream is opened in binary mode
* with ASCII-8BIT encoding.
Expand Down
36 changes: 35 additions & 1 deletion pathname_builtin.rb
Original file line number Diff line number Diff line change
Expand Up @@ -1115,7 +1115,41 @@ def readlines(...) File.readlines(@path, ...) end
# See <tt>File.sysopen</tt>.
def sysopen(...) File.sysopen(@path, ...) end

# Writes +contents+ to the file. See <tt>File.write</tt>.
# call-seq:
# write(data, offset = 0, **opts) -> nonnegative_integer
#
# Opens the file at +self.to_s+, writes the given +data+ to it,
# and closes the file; returns the number of bytes written.
#
# With only argument +data+ given, writes the given data to the file:
#
# path = 't.tmp'
# pn = Pathname.new(path)
# pn.write('foo') # => 3
# File.read(path) # => "foo"
#
# If +offset+ is zero (the default), the file is overwritten:
#
# pn.write('bar')
# File.read(path) # => "bar"
#
# If +offset+ in within the file content, the file is partly overwritten:
#
# pn.write('foobarbaz')
# pn.write('BAR', 3)
# File.read(path) # => "fooBARbaz"
#
# If +offset+ is outside the file content,
# the file is padded with null characters <tt>"\u0000"</tt>:
#
# pn.write('bat', 12)
# File.read(path) # => "fooBARbaz\u0000\u0000\u0000bat"
#
# Optional keyword arguments +opts+ specify:
#
# - {Open Options}[rdoc-ref:IO@Open+Options].
# - {Encoding options}[rdoc-ref:encodings.rdoc@Encoding+Options].
#
def write(...) File.write(@path, ...) end

# Writes +contents+ to the file, opening it in binary mode.
Expand Down
114 changes: 104 additions & 10 deletions shape.h
Original file line number Diff line number Diff line change
Expand Up @@ -117,14 +117,6 @@ RUBY_SYMBOL_EXPORT_END
size_t rb_shapes_cache_size(void);
size_t rb_shapes_count(void);

union rb_attr_index_cache {
uint64_t pack;
struct {
shape_id_t shape_id;
attr_index_t index;
} unpack;
};

static inline shape_id_t
RBASIC_SHAPE_ID(VALUE obj)
{
Expand Down Expand Up @@ -267,8 +259,7 @@ RSHAPE_PARENT_OFFSET(shape_id_t shape_id)
static inline bool
RSHAPE_DIRECT_CHILD_P(shape_id_t parent_offset, shape_id_t child_id)
{
return (RSHAPE_FLAGS(parent_offset) == RSHAPE_FLAGS(child_id) &&
RSHAPE_PARENT_OFFSET(child_id) == RSHAPE_OFFSET(parent_offset));
return RSHAPE_PARENT_OFFSET(child_id) == RSHAPE_OFFSET(parent_offset);
}

static inline enum shape_type
Expand Down Expand Up @@ -529,4 +520,107 @@ size_t rb_shape_edges_count(shape_id_t shape_id);
size_t rb_shape_depth(shape_id_t shape_id);
RUBY_SYMBOL_EXPORT_END

// Inline cache helpers

typedef struct {
attr_index_t index;
shape_id_t shape_offset;
} rb_getivar_cache;

union rb_getivar_cache {
uint64_t pack;
rb_getivar_cache unpack;
};
STATIC_ASSERT(rb_getivar_cache_size, sizeof(union rb_getivar_cache) <= sizeof(uint64_t));

#define IVAR_CACHE_INIT ((uint64_t)-1)
#define ATTR_INDEX_T_NUM_BITS (sizeof(attr_index_t) * CHAR_BIT)

static inline rb_getivar_cache
rb_getivar_cache_unpack(uint64_t packed)
{
union rb_getivar_cache cache = {
.pack = packed,
};

// Because caches may initialized with all bits set (IVAR_CACHE_INIT), and `shape_offset` if 32bits,
// we need to remove any potential extra bits set in the "padding".
cache.unpack.shape_offset &= SHAPE_ID_OFFSET_MASK;
return cache.unpack;
}

static inline uint64_t
rb_getivar_cache_pack(shape_id_t shape_offset, attr_index_t index)
{
RUBY_ASSERT(shape_offset == RSHAPE_OFFSET(shape_offset));
RUBY_ASSERT(shape_offset != INVALID_SHAPE_ID);

union rb_getivar_cache cache = {
.unpack = {
.shape_offset = shape_offset,
.index = index,
},
};
return cache.pack;
}

typedef struct {
attr_index_t index;
shape_id_t source_shape_offset;
shape_id_t dest_shape_offset;
} rb_setivar_cache;

static inline rb_setivar_cache
rb_setivar_cache_unpack(uint64_t packed)
{
rb_setivar_cache cache = {
.index = (attr_index_t)packed,
.source_shape_offset = RSHAPE_OFFSET((shape_id_t)(packed >> ATTR_INDEX_T_NUM_BITS)),
.dest_shape_offset = RSHAPE_OFFSET((shape_id_t)(packed >> (ATTR_INDEX_T_NUM_BITS + SHAPE_ID_OFFSET_NUM_BITS))),
};
return cache;
}

static inline uint64_t
rb_setivar_cache_pack(shape_id_t shape_offset, shape_id_t dest_shape_offset, attr_index_t index)
{
RUBY_ASSERT(shape_offset == RSHAPE_OFFSET(shape_offset));
RUBY_ASSERT(dest_shape_offset == RSHAPE_OFFSET(dest_shape_offset));
RUBY_ASSERT(shape_offset == dest_shape_offset || RSHAPE_DIRECT_CHILD_P(shape_offset, dest_shape_offset));

uint64_t packed_cache = (uint64_t)dest_shape_offset << (ATTR_INDEX_T_NUM_BITS + SHAPE_ID_OFFSET_NUM_BITS);
packed_cache |= (uint64_t)shape_offset << ATTR_INDEX_T_NUM_BITS;
packed_cache |= (uint64_t)index;
return packed_cache;
}


ALWAYS_INLINE(static shape_id_t rb_setivar_cache_revalidate(shape_id_t shape_id, rb_setivar_cache cache));
static shape_id_t
rb_setivar_cache_revalidate(shape_id_t shape_id, rb_setivar_cache cache)
{
RUBY_ASSERT(shape_id != INVALID_SHAPE_ID);
RUBY_ASSERT(cache.dest_shape_offset == INVALID_SHAPE_ID || cache.dest_shape_offset == RSHAPE_OFFSET(cache.dest_shape_offset));

shape_id_t normalized_shape_id = shape_id & SHAPE_ID_WRITE_MASK;
if (UNLIKELY(normalized_shape_id != cache.source_shape_offset)) {
return INVALID_SHAPE_ID;
}

if (UNLIKELY(cache.index >= RSHAPE_CAPACITY(shape_id))) {
// That's still a hit in term of layout, but the object will need to be resized,
// so unfortunately we'll have to go through the slow path regardless...
return INVALID_SHAPE_ID;
}

// Cache hit case
RUBY_ASSERT(cache.source_shape_offset == cache.dest_shape_offset || RSHAPE_DIRECT_CHILD_P(shape_id, cache.dest_shape_offset));
RUBY_ASSERT(cache.index < RSHAPE_CAPACITY(shape_id));
RUBY_ASSERT(!rb_shape_frozen_p(shape_id));
RUBY_ASSERT(!rb_shape_too_complex_p(shape_id));

// We use the cached offset, but combined with the current shape flags.
return rb_shape_transition_offset(shape_id, cache.dest_shape_offset);
}

#endif
Loading