diff --git a/Doc/deprecations/pending-removal-in-3.20.rst b/Doc/deprecations/pending-removal-in-3.20.rst index 8372432a34daa5..d59f6881adf00b 100644 --- a/Doc/deprecations/pending-removal-in-3.20.rst +++ b/Doc/deprecations/pending-removal-in-3.20.rst @@ -1,6 +1,13 @@ Pending removal in Python 3.20 ------------------------------ +* Calling the ``Struct.__new__()`` without required argument now is + deprecated and will be removed in Python 3.20. Calling + :meth:`~object.__init__` method on initialized :class:`~struct.Struct` + objects is deprecated and will be removed in Python 3.20. + + (Contributed by Sergey B Kirpichev and Serhiy Storchaka in :gh:`143715`.) + * The ``__version__``, ``version`` and ``VERSION`` attributes have been deprecated in these standard library modules and will be removed in Python 3.20. Use :py:data:`sys.version_info` instead. diff --git a/Doc/whatsnew/3.15.rst b/Doc/whatsnew/3.15.rst index 163d50d7e20e20..0dc0024b7848ab 100644 --- a/Doc/whatsnew/3.15.rst +++ b/Doc/whatsnew/3.15.rst @@ -1527,6 +1527,15 @@ New deprecations (Contributed by Bénédikt Tran in :gh:`134978`.) +* :mod:`struct`: + + * Calling the ``Struct.__new__()`` without required argument now is + deprecated and will be removed in Python 3.20. Calling + :meth:`~object.__init__` method on initialized :class:`~struct.Struct` + objects is deprecated and will be removed in Python 3.20. + + (Contributed by Sergey B Kirpichev and Serhiy Storchaka in :gh:`143715`.) + * ``__version__`` * The ``__version__``, ``version`` and ``VERSION`` attributes have been diff --git a/Lib/test/test_struct.py b/Lib/test/test_struct.py index aa793a2c223de9..611659a6a0e3e6 100644 --- a/Lib/test/test_struct.py +++ b/Lib/test/test_struct.py @@ -585,11 +585,16 @@ def test_Struct_reinitialization(self): # Struct instance. This test can be used to detect the leak # when running with regrtest -L. s = struct.Struct('i') - s.__init__('ii') + with self.assertWarns(DeprecationWarning): + s.__init__('ii') + self.assertEqual(s.format, 'ii') + packed = b'\x01\x00\x00\x00\x02\x00\x00\x00' + self.assertEqual(s.pack(1, 2), packed) + self.assertEqual(s.unpack(packed), (1, 2)) def check_sizeof(self, format_str, number_of_codes): # The size of 'PyStructObject' - totalsize = support.calcobjsize('2n3P') + totalsize = support.calcobjsize('2n3P?0P') # The size taken up by the 'formatcode' dynamic array totalsize += struct.calcsize('P3n0P') * (number_of_codes + 1) support.check_sizeof(self, struct.Struct(format_str), totalsize) @@ -787,14 +792,150 @@ def test_error_propagation(fmt_str): test_error_propagation('N') test_error_propagation('n') - def test_struct_subclass_instantiation(self): + def test_custom_struct_init(self): # Regression test for https://github.com/python/cpython/issues/112358 class MyStruct(struct.Struct): - def __init__(self): + def __init__(self, *args, **kwargs): super().__init__('>h') + my_struct = MyStruct('>h') + self.assertEqual(my_struct.pack(12345), b'\x30\x39') + my_struct = MyStruct(format='>h') + self.assertEqual(my_struct.pack(12345), b'\x30\x39') + + warnmsg = r"Different format arguments for __new__\(\) and __init__\(\) methods of Struct" + with self.assertWarnsRegex(FutureWarning, warnmsg): + my_struct = MyStruct('h') + self.assertEqual(my_struct.pack(12345), b'\x30\x39') + + warnmsg = r"Struct\(\) takes at most 1 argument \(2 given\)" + with self.assertWarnsRegex(DeprecationWarning, warnmsg): + my_struct = MyStruct('>h', 42) + self.assertEqual(my_struct.pack(12345), b'\x30\x39') + with self.assertWarnsRegex(DeprecationWarning, warnmsg): + my_struct = MyStruct('>h', arg=42) + self.assertEqual(my_struct.pack(12345), b'\x30\x39') + with self.assertWarnsRegex(DeprecationWarning, warnmsg): + my_struct = MyStruct('>h', format=42) + self.assertEqual(my_struct.pack(12345), b'\x30\x39') + with self.assertWarnsRegex(DeprecationWarning, warnmsg): + my_struct = MyStruct(format='>h', arg=42) + self.assertEqual(my_struct.pack(12345), b'\x30\x39') + + warnmsg = r"Invalid 'format' argument for Struct\.__new__\(\): " + with self.assertWarnsRegex(DeprecationWarning, warnmsg + '.*must be'): + my_struct = MyStruct(42) + self.assertEqual(my_struct.pack(12345), b'\x30\x39') + with self.assertWarnsRegex(DeprecationWarning, warnmsg + '.*must be'): + my_struct = MyStruct(format=42) + self.assertEqual(my_struct.pack(12345), b'\x30\x39') + with self.assertWarnsRegex(DeprecationWarning, warnmsg + 'bad char'): + my_struct = MyStruct('$') + self.assertEqual(my_struct.pack(12345), b'\x30\x39') + with self.assertWarnsRegex(DeprecationWarning, warnmsg + 'bad char'): + my_struct = MyStruct(format='$') + self.assertEqual(my_struct.pack(12345), b'\x30\x39') + with self.assertWarnsRegex(DeprecationWarning, warnmsg + ".*can't encode"): + my_struct = MyStruct('\u20ac') + self.assertEqual(my_struct.pack(12345), b'\x30\x39') + with self.assertWarnsRegex(DeprecationWarning, warnmsg + ".*can't encode"): + my_struct = MyStruct(format='\u20ac') + self.assertEqual(my_struct.pack(12345), b'\x30\x39') + + def test_custom_struct_new(self): + # New way, no warnings: + class MyStruct(struct.Struct): + def __new__(cls, *args, **kwargs): + return super().__new__(cls, '>h') + + my_struct = MyStruct('>h') + self.assertEqual(my_struct.pack(12345), b'\x30\x39') + my_struct = MyStruct('h',), ('>h',)) + self.assertEqual(my_struct.pack(12345), b'\x30\x39') + with self.assertRaises(TypeError): + MyStruct((), ()) + with self.assertRaises(TypeError): + MyStruct(('>h',), ()) + with self.assertRaises(TypeError): + MyStruct((), ('>h',)) + with self.assertRaises(TypeError): + MyStruct((42,), ('>h',)) + with self.assertRaises(TypeError): + with self.assertWarns(FutureWarning): + MyStruct(('>h',), (42,)) + with self.assertRaises(struct.error): + MyStruct(('$',), ('>h',)) + with self.assertRaises(struct.error): + with self.assertWarns(FutureWarning): + MyStruct(('>h',), ('$',)) + with self.assertRaises(UnicodeEncodeError): + MyStruct(('\u20ac',), ('>h',)) + with self.assertRaises(UnicodeEncodeError): + with self.assertWarns(FutureWarning): + MyStruct(('>h',), ('\u20ac',)) + with self.assertWarns(FutureWarning): + my_struct = MyStruct(('>h',), ('h') + self.assertEqual(my_struct.pack(12345), b'\x30\x39') + my_struct = MyStruct(format='>h') + self.assertEqual(my_struct.pack(12345), b'\x30\x39') + with self.assertRaises(TypeError): + MyStruct() + with self.assertRaises(TypeError): + MyStruct(42) + with self.assertRaises(struct.error): + MyStruct('$') + with self.assertRaises(UnicodeEncodeError): + MyStruct('\u20ac') + with self.assertRaises(TypeError): + MyStruct('>h', 42) + with self.assertRaises(TypeError): + MyStruct('>h', arg=42) + with self.assertRaises(TypeError): + MyStruct(arg=42) + with self.assertRaises(TypeError): + MyStruct('>h', format='>h') def test_repr(self): s = struct.Struct('=i2H') diff --git a/Misc/NEWS.d/next/Library/2026-01-10-16-23-21.gh-issue-143715.HZrfSA.rst b/Misc/NEWS.d/next/Library/2026-01-10-16-23-21.gh-issue-143715.HZrfSA.rst new file mode 100644 index 00000000000000..90aae6bee835f0 --- /dev/null +++ b/Misc/NEWS.d/next/Library/2026-01-10-16-23-21.gh-issue-143715.HZrfSA.rst @@ -0,0 +1,3 @@ +Calling the ``Struct.__new__()`` without required argument now is deprecated. +Calling :meth:`~object.__init__` method on initialized :class:`~struct.Struct` +objects is deprecated. diff --git a/Modules/_struct.c b/Modules/_struct.c index ae8a8ffb3c005a..281c6d0bcb1612 100644 --- a/Modules/_struct.c +++ b/Modules/_struct.c @@ -70,6 +70,7 @@ typedef struct { formatcode *s_codes; PyObject *s_format; PyObject *weakreflist; /* List of weak references */ + bool init_called; } PyStructObject; #define PyStructObject_CAST(op) ((PyStructObject *)(op)) @@ -1765,24 +1766,153 @@ prepare_s(PyStructObject *self) return -1; } +/* This should be moved to Struct_impl() when Struct___init__() and + * s_new() will be removed (see gh-143715 and gh-94532). */ +static int +set_format(PyStructObject *self, PyObject *format) +{ + if (PyUnicode_Check(format)) { + format = PyUnicode_AsASCIIString(format); + if (format == NULL) + return -1; + } + else { + Py_INCREF(format); + } + if (!PyBytes_Check(format)) { + PyErr_Format(PyExc_TypeError, + "Struct() argument 1 must be a str or bytes object, " + "not %T", format); + Py_DECREF(format); + return -1; + } + Py_SETREF(self->s_format, format); + if (prepare_s(self)) { + return -1; + } + return 0; +} + +/*[clinic input] +@classmethod +Struct.__new__ + + format: object + +Create a compiled struct object. + +Return a new Struct object which writes and reads binary data according +to the format string. See help(struct) for more on format strings. +[clinic start generated code]*/ + +static PyObject * +Struct_impl(PyTypeObject *type, PyObject *format) +/*[clinic end generated code: output=49468b044e334308 input=8381a9796f20f24e]*/ +{ + PyStructObject *self = (PyStructObject *)type->tp_alloc(type, 0); + if (self == NULL) { + return NULL; + } + self->s_format = Py_NewRef(Py_None); + self->s_codes = NULL; + self->s_size = -1; + self->s_len = -1; + self->init_called = false; + if (set_format(self, format) < 0) { + Py_DECREF(self); + return NULL; + } + return (PyObject *)self; +} + + +static int +s_init(PyObject *self, PyObject *args, PyObject *kwargs); + static PyObject * s_new(PyTypeObject *type, PyObject *args, PyObject *kwds) { - PyObject *self; + if (type->tp_new != s_new) { + /* Struct.__new__() was called explicitly in a subclass' __new__(). */ + return Struct(type, args, kwds); + } + + PyObject *format = NULL; + if (PyTuple_GET_SIZE(args) == 1 && (kwds == NULL || PyDict_GET_SIZE(kwds) == 0)) { + format = Py_NewRef(PyTuple_GET_ITEM(args, 0)); + } + else if (PyTuple_GET_SIZE(args) == 0 && kwds != NULL && PyDict_GET_SIZE(kwds) == 1) { + if (PyDict_GetItemStringRef(kwds, "format", &format) < 0) { + return NULL; + } + } + if (format == NULL && type->tp_init != s_init) { + Py_ssize_t nargs = PyTuple_GET_SIZE(args) + (kwds ? PyDict_GET_SIZE(kwds) : 0); + if (nargs > 1) { + if (PyErr_WarnFormat(PyExc_DeprecationWarning, 1, + "Struct() takes at most 1 argument (%zd given)", nargs)) + { + Py_XDECREF(format); + return NULL; + } + } + else { + if (PyErr_WarnEx(PyExc_DeprecationWarning, + "Struct() missing required argument 'format' (pos 1)", 1)) + { + Py_XDECREF(format); + return NULL; + } + } + } - assert(type != NULL); - allocfunc alloc_func = PyType_GetSlot(type, Py_tp_alloc); - assert(alloc_func != NULL); + PyStructObject *self = (PyStructObject *)type->tp_alloc(type, 0); + if (self == NULL) { + return NULL; + } + self->s_format = Py_NewRef(Py_None); + self->s_codes = NULL; + self->s_size = -1; + self->s_len = -1; + self->init_called = false; + if (format && set_format(self, format) < 0) { + if (type->tp_init == s_init) { + /* No custom __init__() method, so __new__() should do + * all the work. */ + Py_DECREF(format); + Py_DECREF(self); + return NULL; + } + PyObject *exc = PyErr_GetRaisedException(); + Py_SETREF(self->s_format, Py_NewRef(Py_None)); + if (PyErr_WarnFormat(PyExc_DeprecationWarning, 1, + "Invalid 'format' argument for Struct.__new__(): %S", exc)) + { + Py_DECREF(exc); + Py_DECREF(format); + Py_DECREF(self); + return NULL; + } + Py_DECREF(exc); + } + Py_XDECREF(format); + return (PyObject *)self; +} - self = alloc_func(type, 0); - if (self != NULL) { - PyStructObject *s = (PyStructObject*)self; - s->s_format = Py_NewRef(Py_None); - s->s_codes = NULL; - s->s_size = -1; - s->s_len = -1; +static bool +same_format(PyStructObject *s, PyObject *format) +{ + Py_ssize_t size = PyBytes_GET_SIZE(s->s_format); + const void *data = PyBytes_AS_STRING(s->s_format); + if (PyUnicode_Check(format) && PyUnicode_IS_ASCII(format)) { + return PyUnicode_GET_LENGTH(format) == size + && memcmp(PyUnicode_1BYTE_DATA(format), data, size) == 0; } - return self; + if (PyBytes_Check(format)) { + return PyBytes_GET_SIZE(format) == size + && memcmp(PyBytes_AS_STRING(format), data, size) == 0; + } + return false; } /*[clinic input] @@ -1800,30 +1930,49 @@ static int Struct___init___impl(PyStructObject *self, PyObject *format) /*[clinic end generated code: output=b8e80862444e92d0 input=1af78a5f57d82cec]*/ { - int ret = 0; - - if (PyUnicode_Check(format)) { - format = PyUnicode_AsASCIIString(format); - if (format == NULL) + if (self->s_format == Py_None) { + if (set_format(self, format) < 0) { return -1; + } } - else { - Py_INCREF(format); + else if (!same_format(self, format)) { + if (!self->init_called) { + if (PyErr_WarnEx(PyExc_FutureWarning, + "Different format arguments for __new__() " + "and __init__() methods of Struct", 1)) + { + return -1; + } + } + if (set_format(self, format) < 0) { + return -1; + } } + self->init_called = true; + return 0; +} - if (!PyBytes_Check(format)) { - Py_DECREF(format); - PyErr_Format(PyExc_TypeError, - "Struct() argument 1 must be a str or bytes object, " - "not %.200s", - _PyType_Name(Py_TYPE(format))); - return -1; +static int +s_init(PyObject *self, PyObject *args, PyObject *kwargs) +{ + if (((PyStructObject *)self)->init_called) { + if (PyErr_WarnEx(PyExc_DeprecationWarning, + "Explicit call of __init__() on " + "initialized Struct is deprecated", 1)) + { + return -1; + } } - - Py_SETREF(self->s_format, format); - - ret = prepare_s(self); - return ret; + else if (Py_TYPE(self)->tp_init == s_init + && ((PyStructObject *)self)->s_format != Py_None) + { + /* Struct.__init__() was called implicitly. + * __new__() already did all the work. */ + ((PyStructObject *)self)->init_called = true; + return 0; + } + /* Struct.__init__() was called explicitly. */ + return Struct___init__(self, args, kwargs); } static int @@ -2437,10 +2586,8 @@ static PyType_Slot PyStructType_slots[] = { {Py_tp_methods, s_methods}, {Py_tp_members, s_members}, {Py_tp_getset, s_getsetlist}, - {Py_tp_init, Struct___init__}, - {Py_tp_alloc, PyType_GenericAlloc}, + {Py_tp_init, s_init}, {Py_tp_new, s_new}, - {Py_tp_free, PyObject_GC_Del}, {0, 0}, }; diff --git a/Modules/clinic/_struct.c.h b/Modules/clinic/_struct.c.h index 9c9d29748fcf28..e75698e3ed00cc 100644 --- a/Modules/clinic/_struct.c.h +++ b/Modules/clinic/_struct.c.h @@ -9,6 +9,66 @@ preserve #include "pycore_abstract.h" // _PyNumber_Index() #include "pycore_modsupport.h" // _PyArg_UnpackKeywords() +PyDoc_STRVAR(Struct__doc__, +"Struct(format)\n" +"--\n" +"\n" +"Create a compiled struct object.\n" +"\n" +"Return a new Struct object which writes and reads binary data according\n" +"to the format string. See help(struct) for more on format strings."); + +static PyObject * +Struct_impl(PyTypeObject *type, PyObject *format); + +static PyObject * +Struct(PyTypeObject *type, PyObject *args, PyObject *kwargs) +{ + PyObject *return_value = NULL; + #if defined(Py_BUILD_CORE) && !defined(Py_BUILD_CORE_MODULE) + + #define NUM_KEYWORDS 1 + static struct { + PyGC_Head _this_is_not_used; + PyObject_VAR_HEAD + Py_hash_t ob_hash; + PyObject *ob_item[NUM_KEYWORDS]; + } _kwtuple = { + .ob_base = PyVarObject_HEAD_INIT(&PyTuple_Type, NUM_KEYWORDS) + .ob_hash = -1, + .ob_item = { &_Py_ID(format), }, + }; + #undef NUM_KEYWORDS + #define KWTUPLE (&_kwtuple.ob_base.ob_base) + + #else // !Py_BUILD_CORE + # define KWTUPLE NULL + #endif // !Py_BUILD_CORE + + static const char * const _keywords[] = {"format", NULL}; + static _PyArg_Parser _parser = { + .keywords = _keywords, + .fname = "Struct", + .kwtuple = KWTUPLE, + }; + #undef KWTUPLE + PyObject *argsbuf[1]; + PyObject * const *fastargs; + Py_ssize_t nargs = PyTuple_GET_SIZE(args); + PyObject *format; + + fastargs = _PyArg_UnpackKeywords(_PyTuple_CAST(args)->ob_item, nargs, kwargs, NULL, &_parser, + /*minpos*/ 1, /*maxpos*/ 1, /*minkw*/ 0, /*varpos*/ 0, argsbuf); + if (!fastargs) { + goto exit; + } + format = fastargs[0]; + return_value = Struct_impl(type, format); + +exit: + return return_value; +} + PyDoc_STRVAR(Struct___init____doc__, "Struct(format)\n" "--\n" @@ -664,4 +724,4 @@ iter_unpack(PyObject *module, PyObject *const *args, Py_ssize_t nargs) return return_value; } -/*[clinic end generated code: output=09ee4ac45b7e709b input=a9049054013a1b77]*/ +/*[clinic end generated code: output=0f417d43a2a387c8 input=a9049054013a1b77]*/