diff --git a/mypy/typeshed/stubs/librt/librt/strings.pyi b/mypy/typeshed/stubs/librt/librt/strings.pyi index 241f6a6fba5b..448b06bdebcf 100644 --- a/mypy/typeshed/stubs/librt/librt/strings.pyi +++ b/mypy/typeshed/stubs/librt/librt/strings.pyi @@ -1,13 +1,21 @@ from typing import final -from mypy_extensions import i64, u8 +from mypy_extensions import i64, i32, u8 @final class BytesWriter: def append(self, /, x: int) -> None: ... - def write(self, /, b: bytes | bytearray) -> None: ... + def write(self, b: bytes | bytearray, /) -> None: ... def getvalue(self) -> bytes: ... - def truncate(self, /, size: i64) -> None: ... + def truncate(self, size: i64, /) -> None: ... def __len__(self) -> i64: ... - def __getitem__(self, /, i: i64) -> u8: ... - def __setitem__(self, /, i: i64, x: u8) -> None: ... + def __getitem__(self, i: i64, /) -> u8: ... + def __setitem__(self, i: i64, x: u8, /) -> None: ... + +@final +class StringWriter: + def append(self, x: int, /) -> None: ... + def write(self, s: str, /) -> None: ... + def getvalue(self) -> str: ... + def __len__(self) -> i64: ... + def __getitem__(self, i: i64, /) -> i32: ... diff --git a/mypyc/lib-rt/librt_strings.c b/mypyc/lib-rt/librt_strings.c index 1acaefa77fef..788d27070114 100644 --- a/mypyc/lib-rt/librt_strings.c +++ b/mypyc/lib-rt/librt_strings.c @@ -12,7 +12,7 @@ // BytesWriter // -#define _WRITE(data, type, v) \ +#define _WRITE_BYTES(data, type, v) \ do { \ *(type *)(((BytesWriterObject *)data)->buf + ((BytesWriterObject *)data)->len) = v; \ ((BytesWriterObject *)data)->len += sizeof(type); \ @@ -290,7 +290,7 @@ static inline char BytesWriter_append_internal(BytesWriterObject *self, uint8_t value) { if (!ensure_bytes_writer_size(self, 1)) return CPY_NONE_ERROR; - _WRITE(self, uint8_t, value); + _WRITE_BYTES(self, uint8_t, value); return CPY_NONE; } @@ -379,6 +379,457 @@ BytesWriter_len_internal(PyObject *self) { return writer->len << 1; } +// +// StringWriter +// + +static PyTypeObject StringWriterType; + +static void convert_string_data_in_place(char *buf, Py_ssize_t len, + char old_kind, char new_kind); + +// Helper to grow string buffer and optionally convert to new kind +// Returns true on success, false on failure (with PyErr set) +// Updates self->buf, self->capacity, and self->kind +static bool +grow_string_buffer_helper(StringWriterObject *self, Py_ssize_t target_capacity, char new_kind) { + char old_kind = self->kind; + Py_ssize_t new_capacity = self->capacity; + + while (target_capacity >= new_capacity) { + new_capacity *= 2; + } + + Py_ssize_t size_bytes = new_capacity * new_kind; + char *new_buf; + bool from_embedded = (self->buf == self->data); + + if (from_embedded) { + // Move from embedded buffer to heap-allocated buffer + new_buf = PyMem_Malloc(size_bytes); + if (new_buf != NULL) { + // Copy existing data from embedded buffer + memcpy(new_buf, self->data, self->len * old_kind); + } + } else { + // Realloc existing heap buffer + new_buf = PyMem_Realloc(self->buf, size_bytes); + } + + if (unlikely(new_buf == NULL)) { + PyErr_NoMemory(); + return false; + } + + // Convert data if kind changed + if (old_kind != new_kind) { + convert_string_data_in_place(new_buf, self->len, old_kind, new_kind); + } + + self->buf = new_buf; + self->capacity = new_capacity; + self->kind = new_kind; + return true; +} + +static bool grow_string_buffer(StringWriterObject *data, Py_ssize_t n) { + return grow_string_buffer_helper(data, data->len + n, data->kind); +} + +static inline bool +ensure_string_writer_size(StringWriterObject *data, Py_ssize_t n) { + if (likely(data->capacity - data->len >= n)) { + return true; + } else { + // Don't inline the grow function since this is slow path and we + // want to keep this as short as possible for better inlining + return grow_string_buffer(data, n); + } +} + +static inline void +StringWriter_init_internal(StringWriterObject *self) { + self->buf = self->data; + self->kind = 1; + self->len = 0; + self->capacity = WRITER_EMBEDDED_BUF_LEN; +} + +static PyObject* +StringWriter_new(PyTypeObject *type, PyObject *args, PyObject *kwds) +{ + if (type != &StringWriterType) { + PyErr_SetString(PyExc_TypeError, "StringWriter cannot be subclassed"); + return NULL; + } + + StringWriterObject *self = (StringWriterObject *)type->tp_alloc(type, 0); + if (self != NULL) { + StringWriter_init_internal(self); + } + return (PyObject *)self; +} + +static PyObject * +StringWriter_internal(void) { + StringWriterObject *self = (StringWriterObject *)StringWriterType.tp_alloc(&StringWriterType, 0); + if (self == NULL) + return NULL; + StringWriter_init_internal(self); + return (PyObject *)self; +} + +static int +StringWriter_init(StringWriterObject *self, PyObject *args, PyObject *kwds) +{ + if (!PyArg_ParseTuple(args, "")) { + return -1; + } + + if (kwds != NULL && PyDict_Size(kwds) > 0) { + PyErr_SetString(PyExc_TypeError, + "StringWriter() takes no keyword arguments"); + return -1; + } + + StringWriter_init_internal(self); + return 0; +} + +static void +StringWriter_dealloc(StringWriterObject *self) +{ + if (self->buf != self->data) { + PyMem_Free(self->buf); + self->buf = NULL; + } + Py_TYPE(self)->tp_free((PyObject *)self); +} + +static PyObject* +StringWriter_getvalue_internal(PyObject *self) +{ + StringWriterObject *obj = (StringWriterObject *)self; + return PyUnicode_FromKindAndData(obj->kind, obj->buf, obj->len); +} + +static PyObject* +StringWriter_repr(StringWriterObject *self) +{ + PyObject *value = StringWriter_getvalue_internal((PyObject *)self); + if (value == NULL) { + return NULL; + } + PyObject *value_repr = PyObject_Repr(value); + Py_DECREF(value); + if (value_repr == NULL) { + return NULL; + } + PyObject *result = PyUnicode_FromFormat("StringWriter(%U)", value_repr); + Py_DECREF(value_repr); + return result; +} + +static PyObject* +StringWriter_getvalue(StringWriterObject *self, PyObject *Py_UNUSED(ignored)) +{ + return PyUnicode_FromKindAndData(self->kind, self->buf, self->len); +} + +static Py_ssize_t +StringWriter_length(StringWriterObject *self) +{ + return self->len; +} + +static PyObject* +StringWriter_item(StringWriterObject *self, Py_ssize_t index) +{ + Py_ssize_t length = self->len; + + // Check bounds + if (index < 0 || index >= length) { + PyErr_SetString(PyExc_IndexError, "StringWriter index out of range"); + return NULL; + } + + // Read the character at the given index based on kind using memcpy + uint32_t value; + if (self->kind == 1) { + uint8_t val; + memcpy(&val, self->buf + index, 1); + value = val; + } else if (self->kind == 2) { + uint16_t val; + memcpy(&val, self->buf + index * 2, 2); + value = val; + } else { + memcpy(&value, self->buf + index * 4, 4); + } + return PyLong_FromLong(value); +} + +static PySequenceMethods StringWriter_as_sequence = { + .sq_length = (lenfunc)StringWriter_length, + .sq_item = (ssizeargfunc)StringWriter_item, +}; + +static PyObject* StringWriter_append(PyObject *self, PyObject *const *args, size_t nargs, PyObject *kwnames); +static PyObject* StringWriter_write(PyObject *self, PyObject *const *args, size_t nargs, PyObject *kwnames); + +static PyMethodDef StringWriter_methods[] = { + {"append", (PyCFunction) StringWriter_append, METH_FASTCALL | METH_KEYWORDS, + PyDoc_STR("Append a single character (as int codepoint) to the buffer") + }, + {"write", (PyCFunction) StringWriter_write, METH_FASTCALL | METH_KEYWORDS, + PyDoc_STR("Append a string to the buffer") + }, + {"getvalue", (PyCFunction) StringWriter_getvalue, METH_NOARGS, + "Return the buffer content as str object" + }, + {NULL} /* Sentinel */ +}; + +static PyTypeObject StringWriterType = { + .ob_base = PyVarObject_HEAD_INIT(NULL, 0) + .tp_name = "StringWriter", + .tp_doc = PyDoc_STR("Memory buffer for building string objects from parts"), + .tp_basicsize = sizeof(StringWriterObject), + .tp_itemsize = 0, + .tp_flags = Py_TPFLAGS_DEFAULT, + .tp_new = StringWriter_new, + .tp_init = (initproc) StringWriter_init, + .tp_dealloc = (destructor) StringWriter_dealloc, + .tp_methods = StringWriter_methods, + .tp_as_sequence = &StringWriter_as_sequence, + .tp_repr = (reprfunc)StringWriter_repr, +}; + +static inline bool +check_string_writer(PyObject *data) { + if (unlikely(Py_TYPE(data) != &StringWriterType)) { + PyErr_Format( + PyExc_TypeError, "data must be a StringWriter object, got %s", Py_TYPE(data)->tp_name + ); + return false; + } + return true; +} + +static char string_writer_switch_kind(StringWriterObject *self, int32_t value); + +static char +StringWriter_write_internal(StringWriterObject *self, PyObject *value) { + Py_ssize_t str_len = PyUnicode_GET_LENGTH(value); + if (str_len == 0) { + return CPY_NONE; + } + + int src_kind = PyUnicode_KIND(value); + void *src_data = PyUnicode_DATA(value); + int self_kind = self->kind; + + // Switch kind if source requires wider characters + if (src_kind > self_kind) { + // Use value in the source kind range to trigger proper kind switch + int32_t codepoint = (src_kind == 2) ? 0x100 : 0x10000; + if (string_writer_switch_kind(self, codepoint) == CPY_NONE_ERROR) { + return CPY_NONE_ERROR; + } + self_kind = self->kind; + } + + // Ensure we have enough space + if (!ensure_string_writer_size(self, str_len)) { + return CPY_NONE_ERROR; + } + + // Copy data - ASCII/Latin1 (kind 1) are handled uniformly + if (self_kind == src_kind) { + // Same kind, direct copy + memcpy(self->buf + self->len * self_kind, src_data, str_len * src_kind); + } else { + // Different kinds, convert character by character + for (Py_ssize_t i = 0; i < str_len; i++) { + Py_UCS4 ch = PyUnicode_READ(src_kind, src_data, i); + PyUnicode_WRITE(self_kind, self->buf, self->len + i, ch); + } + } + + self->len += str_len; + return CPY_NONE; +} + +static PyObject* +StringWriter_write(PyObject *self, PyObject *const *args, size_t nargs, PyObject *kwnames) { + static const char * const kwlist[] = {"value", 0}; + static CPyArg_Parser parser = {"O:write", kwlist, 0}; + PyObject *value; + if (unlikely(!CPyArg_ParseStackAndKeywordsSimple(args, nargs, kwnames, &parser, &value))) { + return NULL; + } + if (!check_string_writer(self)) { + return NULL; + } + if (unlikely(!PyUnicode_Check(value))) { + PyErr_SetString(PyExc_TypeError, "value must be a str object"); + return NULL; + } + if (unlikely(StringWriter_write_internal((StringWriterObject *)self, value) == CPY_NONE_ERROR)) { + return NULL; + } + Py_INCREF(Py_None); + return Py_None; +} + +// Convert string data to next larger kind (1->2 or 2->4) +static void convert_string_data_in_place(char *buf, Py_ssize_t len, + char old_kind, char new_kind) { + if (old_kind == 1 && new_kind == 2) { + // Convert backwards to avoid overwriting + for (Py_ssize_t i = len - 1; i >= 0; i--) { + uint8_t val = (uint8_t)buf[i]; + uint16_t expanded = val; + memcpy(buf + i * 2, &expanded, 2); + } + } else if (old_kind == 2 && new_kind == 4) { + // Convert backwards to avoid overwriting + for (Py_ssize_t i = len - 1; i >= 0; i--) { + uint16_t val; + memcpy(&val, buf + i * 2, 2); + uint32_t expanded = val; + memcpy(buf + i * 4, &expanded, 4); + } + } else { + assert(false); + } +} + +static char convert_string_buffer_kind(StringWriterObject *self, char old_kind, char new_kind) { + // Current buffer size in bytes + Py_ssize_t current_buf_size = (self->buf == self->data) ? WRITER_EMBEDDED_BUF_LEN : (self->capacity * old_kind); + // Needed buffer size in bytes for new kind + Py_ssize_t needed_size = self->len * new_kind; + + if (current_buf_size >= needed_size) { + // Convert in place + convert_string_data_in_place(self->buf, self->len, old_kind, new_kind); + self->kind = new_kind; + self->capacity = current_buf_size / new_kind; + } else { + // Need to allocate new buffer + if (!grow_string_buffer_helper(self, self->len, new_kind)) { + return CPY_NONE_ERROR; + } + } + return CPY_NONE; +} + +static char string_writer_switch_kind(StringWriterObject *self, int32_t value) { + if (self->kind == 1) { + // Either kind 1 -> 2 or 1 -> 4. First switch to kind 2. + if (convert_string_buffer_kind(self, 1, 2) == CPY_NONE_ERROR) + return CPY_NONE_ERROR; + if ((uint32_t)value > 0xffff) { + // Call recursively to switch from kind 2 to 4 + return string_writer_switch_kind(self, value); + } + return CPY_NONE; + } else { + // Must be kind 2 -> 4 + assert(self->kind == 2); + assert((uint32_t)value > 0xffff); + return convert_string_buffer_kind(self, 2, 4); + } +} + +// Handle all append cases except for append that stays within kind 1 +static char string_append_slow_path(StringWriterObject *self, int32_t value) { + if (self->kind == 2) { + if ((uint32_t)value <= 0xffff) { + // Kind stays the same + if (!ensure_string_writer_size(self, 1)) + return CPY_NONE_ERROR; + // Copy 2-byte character to buffer + uint16_t val16 = (uint16_t)value; + memcpy(self->buf + self->len * 2, &val16, 2); + self->len++; + return CPY_NONE; + } + if (string_writer_switch_kind(self, value) == CPY_NONE_ERROR) + return CPY_NONE_ERROR; + return string_append_slow_path(self, value); + } else if (self->kind == 1) { + // Check precondition -- this must only be used on slow path + assert((uint32_t)value > 0xff); + if (string_writer_switch_kind(self, value) == CPY_NONE_ERROR) + return CPY_NONE_ERROR; + return string_append_slow_path(self, value); + } + assert(self->kind == 4); + if ((uint32_t)value <= 0x10FFFF) { + if (!ensure_string_writer_size(self, 1)) + return CPY_NONE_ERROR; + // Copy 4-byte character to buffer + uint32_t val32 = (uint32_t)value; + memcpy(self->buf + self->len * 4, &val32, 4); + self->len++; + return CPY_NONE; + } + // Code point is out of valid Unicode range + PyErr_Format(PyExc_ValueError, "code point %d is outside valid Unicode range (0-1114111)", value); + return CPY_NONE_ERROR; +} + +static inline char +StringWriter_append_internal(StringWriterObject *self, int32_t value) { + char kind = self->kind; + if (kind == 1 && (uint32_t)value < 256) { + if (!ensure_string_writer_size(self, 1)) + return CPY_NONE_ERROR; + self->buf[self->len++] = value; + self->kind = kind; + return CPY_NONE; + } + return string_append_slow_path(self, value); +} + +static PyObject* +StringWriter_append(PyObject *self, PyObject *const *args, size_t nargs, PyObject *kwnames) { + static const char * const kwlist[] = {"value", 0}; + static CPyArg_Parser parser = {"O:append", kwlist, 0}; + PyObject *value; + if (unlikely(!CPyArg_ParseStackAndKeywordsSimple(args, nargs, kwnames, &parser, &value))) { + return NULL; + } + if (!check_string_writer(self)) { + return NULL; + } + int32_t unboxed = CPyLong_AsInt32(value); + if (unlikely(unboxed == CPY_LL_INT_ERROR && PyErr_Occurred())) { + CPy_TypeError("i32", value); + return NULL; + } + if (unlikely(StringWriter_append_internal((StringWriterObject *)self, unboxed) == CPY_NONE_ERROR)) { + return NULL; + } + Py_INCREF(Py_None); + return Py_None; +} + +static PyTypeObject * +StringWriter_type_internal(void) { + return &StringWriterType; // Return borrowed reference +}; + +static CPyTagged +StringWriter_len_internal(PyObject *self) { + StringWriterObject *writer = (StringWriterObject *)self; + return writer->len << 1; +} + +// End of StringWriter + #endif static PyMethodDef librt_strings_module_methods[] = { @@ -406,9 +857,15 @@ librt_strings_module_exec(PyObject *m) if (PyType_Ready(&BytesWriterType) < 0) { return -1; } + if (PyType_Ready(&StringWriterType) < 0) { + return -1; + } if (PyModule_AddObjectRef(m, "BytesWriter", (PyObject *) &BytesWriterType) < 0) { return -1; } + if (PyModule_AddObjectRef(m, "StringWriter", (PyObject *) &StringWriterType) < 0) { + return -1; + } // Export mypy internal C API, be careful with the order! static void *librt_strings_api[LIBRT_STRINGS_API_LEN] = { diff --git a/mypyc/lib-rt/librt_strings.h b/mypyc/lib-rt/librt_strings.h index 069bec84a311..c45e3d3e9875 100644 --- a/mypyc/lib-rt/librt_strings.h +++ b/mypyc/lib-rt/librt_strings.h @@ -41,6 +41,15 @@ typedef struct { char data[WRITER_EMBEDDED_BUF_LEN]; // Default buffer } BytesWriterObject; +typedef struct { + PyObject_HEAD + char *buf; // Beginning of the buffer + char kind; // Bytes per code point (1, 2 or 4) + Py_ssize_t len; // Current length (number of code points written) + Py_ssize_t capacity; // Total capacity of the buffer (number of code points) + char data[WRITER_EMBEDDED_BUF_LEN]; // Default buffer +} StringWriterObject; + #define LibRTStrings_ABIVersion (*(int (*)(void)) LibRTStrings_API[0]) #define LibRTStrings_APIVersion (*(int (*)(void)) LibRTStrings_API[1]) #define LibRTStrings_BytesWriter_internal (*(PyObject* (*)(void)) LibRTStrings_API[2]) diff --git a/mypyc/test-data/run-librt-strings.test b/mypyc/test-data/run-librt-strings.test index f3e0b7b13100..251466ff6d44 100644 --- a/mypyc/test-data/run-librt-strings.test +++ b/mypyc/test-data/run-librt-strings.test @@ -4,7 +4,7 @@ import base64 import binascii import random -from librt.strings import BytesWriter +from librt.strings import BytesWriter, StringWriter from testutil import assertRaises @@ -189,6 +189,316 @@ def test_bytes_writer_wrapper_functions() -> None: with assertRaises(TypeError): b[0] = 256 +def test_string_writer_basics() -> None: + w = StringWriter() + assert w.getvalue() == "" + +def test_string_writer_repr() -> None: + # Kind 1 (ASCII) + w = StringWriter() + assert repr(w) == "StringWriter('')" + w.append(ord('h')) + w.append(ord('i')) + assert repr(w) == "StringWriter('hi')" + + # Kind 2 (UCS-2) + w2 = StringWriter() + w2.append(0x100) + w2.append(0x200) + assert repr(w2) == "StringWriter('" + chr(0x100) + chr(0x200) + "')" + + # Kind 4 (UCS-4) + w3 = StringWriter() + w3.append(0x10000) + expected = "StringWriter('" + chr(0x10000) + "')" + assert repr(w3) == expected + +def test_string_writer_repr_escaping() -> None: + # Kind 1: Test escaping of newline, nul, tab, backslash + w = StringWriter() + w.append(ord('a')) + w.append(ord('\n')) + w.append(0) + w.append(ord('\t')) + w.append(ord('\\')) + assert repr(w) == "StringWriter('a\\n\\x00\\t\\\\')" + + # Kind 2: escaping with UCS-2 + w2 = StringWriter() + w2.append(0x100) + w2.append(ord('\n')) + assert repr(w2) == "StringWriter('" + chr(0x100) + "\\n')" + + # Kind 4: escaping with UCS-4 + w3 = StringWriter() + w3.append(0x10000) + w3.append(0) + assert repr(w3) == "StringWriter('" + chr(0x10000) + "\\x00')" + +def test_string_writer_len() -> None: + # Kind 1 (ASCII) + w = StringWriter() + assert len(w) == 0 + w.append(ord('a')) + assert len(w) == 1 + w.append(ord('b')) + w.append(ord('c')) + assert len(w) == 3 + + # Kind 2 (UCS-2) + w2 = StringWriter() + w2.append(0x100) + assert len(w2) == 1 + for i in range(10): + w2.append(0x200 + i) + assert len(w2) == 11 + + # Kind 4 (UCS-4) + w3 = StringWriter() + w3.append(0x10000) + assert len(w3) == 1 + w3.append(0x10001) + w3.append(0x10002) + assert len(w3) == 3 + + # Test len after growing buffer + w4 = StringWriter() + for i in range(500): + w4.append(ord('X')) + assert len(w4) == 500 + +def test_string_writer_get_item() -> None: + # Kind 1 (ASCII) + w = StringWriter() + w.append(ord('f')) + w.append(ord('o')) + w.append(ord('o')) + assert w[0 + int()] == ord('f') + assert w[1 + int()] == ord('o') + assert w[2 + int()] == ord('o') + assert w[-1 + int()] == ord('o') + assert w[-2 + int()] == ord('o') + assert w[-3 + int()] == ord('f') + + with assertRaises(IndexError): + w[3 + int()] + with assertRaises(IndexError): + w[-4 + int()] + with assertRaises(IndexError): + w[1 << 50] + with assertRaises(IndexError): + w[-(1 << 50)] + + # Kind 2 (UCS-2) + w2 = StringWriter() + w2.append(0x100) + w2.append(0x200) + w2.append(0x300) + assert w2[0 + int()] == 0x100 + assert w2[1 + int()] == 0x200 + assert w2[2 + int()] == 0x300 + assert w2[-1 + int()] == 0x300 + assert w2[-2 + int()] == 0x200 + assert w2[-3 + int()] == 0x100 + + with assertRaises(IndexError): + w2[3 + int()] + with assertRaises(IndexError): + w2[-4 + int()] + + # Kind 4 (UCS-4) + w3 = StringWriter() + w3.append(0x10000) + w3.append(0x10001) + w3.append(0x10002) + assert w3[0 + int()] == 0x10000 + assert w3[1 + int()] == 0x10001 + assert w3[2 + int()] == 0x10002 + assert w3[-1 + int()] == 0x10002 + assert w3[-2 + int()] == 0x10001 + assert w3[-3 + int()] == 0x10000 + + with assertRaises(IndexError): + w3[3 + int()] + with assertRaises(IndexError): + w3[-4 + int()] + + # Test get_item after buffer growth + w4 = StringWriter() + for i in range(1000): + w4.append(ord('a') + (i % 26)) + assert w4[0 + int()] == ord('a') + assert w4[999 + int()] == ord('a') + (999 % 26) + assert w4[500 + int()] == ord('a') + (500 % 26) + assert w4[-1 + int()] == ord('a') + (999 % 26) + assert w4[-1000 + int()] == ord('a') + +def test_string_writer_append() -> None: + w = StringWriter() + w.append(ord('a')) + assert w.getvalue() == "a" + w.append(0xff) + assert w.getvalue() == "a\xff" + + # Switch kind 1->2 + w.append(0x100) + assert w.getvalue() == "a\xff\u0100", w.getvalue() + w.append(0xffff) + assert w.getvalue() == "a\xff\u0100\uffff" + + # Switch kind 2->4 + w.append(0x10000) + assert w.getvalue() == "a\xff\u0100\uffff" + chr(0x10000) + + # Maximum valid Unicode code point (0x10FFFF = 1114111) + w2 = StringWriter() + w2.append(0x10FFFF) + assert w2.getvalue() == chr(0x10FFFF) + + # Invalid code points + w3 = StringWriter() + with assertRaises(ValueError, "code point 1114112 is outside valid Unicode range (0-1114111)"): + w3.append(0x110000) + + w4 = StringWriter() + with assertRaises(ValueError, "code point -1 is outside valid Unicode range (0-1114111)"): + w4.append(-1) + + w5 = StringWriter() + with assertRaises(ValueError, "code point 2097152 is outside valid Unicode range (0-1114111)"): + w5.append(0x200000) + +def test_string_writer_write() -> None: + # Kind 1: Write ASCII strings + w = StringWriter() + w.write("hello") + assert w.getvalue() == "hello" + w.write(" world") + assert w.getvalue() == "hello world" + + # Write empty string + w.write("") + assert w.getvalue() == "hello world" + + # Kind 1 -> Kind 2: Write string with UCS-2 characters + w2 = StringWriter() + w2.write("abc") + assert w2.getvalue() == "abc" + w2.write(chr(0x100) + chr(0x200)) + assert w2.getvalue() == "abc" + chr(0x100) + chr(0x200) + w2.write("xyz") + assert w2.getvalue() == "abc" + chr(0x100) + chr(0x200) + "xyz" + + # Kind 2: Write all UCS-2 + w3 = StringWriter() + w3.append(0x100) + w3.write(chr(0x200) + chr(0x300)) + assert w3.getvalue() == chr(0x100) + chr(0x200) + chr(0x300) + + # Kind 2 -> Kind 4: Write string with UCS-4 characters + w4 = StringWriter() + w4.write(chr(0x100)) + w4.write(chr(0x10000)) + assert w4.getvalue() == chr(0x100) + chr(0x10000) + + # Kind 4: Write mixed + w5 = StringWriter() + w5.append(0x10000) + w5.write("abc") + w5.write(chr(0x200)) + w5.write(chr(0x10001)) + assert w5.getvalue() == chr(0x10000) + "abc" + chr(0x200) + chr(0x10001) + + # Test with longer strings to trigger buffer growth + w6 = StringWriter() + for _ in range(100): + w6.write("hello") + assert w6.getvalue() == "hello" * 100 + assert len(w6) == 500 + +def test_string_writer_append_grow_same_kind() -> None: + # Test growing buffer while staying in kind 1 (ASCII) + w = StringWriter() + # Append enough ASCII characters to grow beyond embedded buffer + for i in range(1000): + w.append(ord('a') + (i % 26)) + assert len(w) == i + 1 + result = w.getvalue() + assert len(result) == 1000 + for i in range(1000): + assert result[i] == chr(ord('a') + (i % 26)) + + # Test growing buffer while staying in kind 2 + w2 = StringWriter() + w2.append(0x100) # Switch to kind 2 + for i in range(1000): + w2.append(0x100 + (i % 100)) + assert len(w2) == i + 2 + result2 = w2.getvalue() + assert len(result2) == 1001 + assert result2[0] == chr(0x100) + for i in range(1000): + assert result2[i + 1] == chr(0x100 + (i % 100)) + + # Test growing buffer while staying in kind 4 + w3 = StringWriter() + w3.append(0x10000) # Switch to kind 4 + for i in range(500): + w3.append(0x10000 + (i % 100)) + assert len(w3) == i + 2 + result3 = w3.getvalue() + assert len(result3) == 501 + assert result3[0] == chr(0x10000) + for i in range(500): + assert result3[i + 1] == chr(0x10000 + (i % 100)) + +def test_string_writer_append_grow_and_switch_kind() -> None: + # Test growing buffer AND switching from kind 1 to kind 2 + w = StringWriter() + # Fill with ASCII to grow buffer + for i in range(500): + w.append(ord('A')) + assert len(w) == 500 + # Now append non-ASCII that requires kind 2, triggering both grow and kind switch + for i in range(500): + w.append(0x100 + i) + result = w.getvalue() + assert len(result) == 1000 + for i in range(500): + assert result[i] == 'A' + for i in range(500): + assert result[500 + i] == chr(0x100 + i) + + # Test growing buffer AND switching from kind 2 to kind 4 + w2 = StringWriter() + w2.append(0x100) # Switch to kind 2 + # Fill with kind 2 characters to grow buffer + for i in range(300): + w2.append(0x200 + (i % 100)) + assert len(w2) == 301 + # Now append characters that require kind 4, triggering both grow and kind switch + for i in range(300): + w2.append(0x10000 + i) + result2 = w2.getvalue() + assert len(result2) == 601 + assert result2[0] == chr(0x100) + for i in range(300): + assert result2[1 + i] == chr(0x200 + (i % 100)) + for i in range(300): + assert result2[301 + i] == chr(0x10000 + i) + + # Test switching kind 1->4 with buffer growth + w3 = StringWriter() + for i in range(300): + w3.append(ord('X')) + # Jump directly to kind 4 + w3.append(0x10000) + result3 = w3.getvalue() + assert len(result3) == 301 + for i in range(300): + assert result3[i] == 'X' + assert result3[300] == chr(0x10000) + [case testStringsFeaturesNotAvailableInNonExperimentalBuild_librt] # This also ensures librt.strings can be built without experimental features import librt.strings