diff --git a/Doc/library/struct.rst b/Doc/library/struct.rst index fa0fb19d852f86..f504f931f0fa20 100644 --- a/Doc/library/struct.rst +++ b/Doc/library/struct.rst @@ -227,32 +227,32 @@ platform-dependent. +--------+--------------------------+--------------------+----------------+------------+ | ``c`` | :c:expr:`char` | bytes of length 1 | 1 | | +--------+--------------------------+--------------------+----------------+------------+ -| ``b`` | :c:expr:`signed char` | integer | 1 | \(1), \(2) | +| ``b`` | :c:expr:`signed char` | int | 1 | \(2) | +--------+--------------------------+--------------------+----------------+------------+ -| ``B`` | :c:expr:`unsigned char` | integer | 1 | \(2) | +| ``B`` | :c:expr:`unsigned char` | int | 1 | \(2) | +--------+--------------------------+--------------------+----------------+------------+ | ``?`` | :c:expr:`_Bool` | bool | 1 | \(1) | +--------+--------------------------+--------------------+----------------+------------+ -| ``h`` | :c:expr:`short` | integer | 2 | \(2) | +| ``h`` | :c:expr:`short` | int | 2 | \(2) | +--------+--------------------------+--------------------+----------------+------------+ -| ``H`` | :c:expr:`unsigned short` | integer | 2 | \(2) | +| ``H`` | :c:expr:`unsigned short` | int | 2 | \(2) | +--------+--------------------------+--------------------+----------------+------------+ -| ``i`` | :c:expr:`int` | integer | 4 | \(2) | +| ``i`` | :c:expr:`int` | int | 4 | \(2) | +--------+--------------------------+--------------------+----------------+------------+ -| ``I`` | :c:expr:`unsigned int` | integer | 4 | \(2) | +| ``I`` | :c:expr:`unsigned int` | int | 4 | \(2) | +--------+--------------------------+--------------------+----------------+------------+ -| ``l`` | :c:expr:`long` | integer | 4 | \(2) | +| ``l`` | :c:expr:`long` | int | 4 | \(2) | +--------+--------------------------+--------------------+----------------+------------+ -| ``L`` | :c:expr:`unsigned long` | integer | 4 | \(2) | +| ``L`` | :c:expr:`unsigned long` | int | 4 | \(2) | +--------+--------------------------+--------------------+----------------+------------+ -| ``q`` | :c:expr:`long long` | integer | 8 | \(2) | +| ``q`` | :c:expr:`long long` | int | 8 | \(2) | +--------+--------------------------+--------------------+----------------+------------+ -| ``Q`` | :c:expr:`unsigned long | integer | 8 | \(2) | +| ``Q`` | :c:expr:`unsigned long | int | 8 | \(2) | | | long` | | | | +--------+--------------------------+--------------------+----------------+------------+ -| ``n`` | :c:type:`ssize_t` | integer | | \(3) | +| ``n`` | :c:type:`ssize_t` | int | | \(2), \(3) | +--------+--------------------------+--------------------+----------------+------------+ -| ``N`` | :c:type:`size_t` | integer | | \(3) | +| ``N`` | :c:type:`size_t` | int | | \(2), \(3) | +--------+--------------------------+--------------------+----------------+------------+ | ``e`` | :c:expr:`_Float16` | float | 2 | \(4), \(6) | +--------+--------------------------+--------------------+----------------+------------+ @@ -268,7 +268,7 @@ platform-dependent. +--------+--------------------------+--------------------+----------------+------------+ | ``p`` | :c:expr:`char[]` | bytes | | \(8) | +--------+--------------------------+--------------------+----------------+------------+ -| ``P`` | :c:expr:`void \*` | integer | | \(5) | +| ``P`` | :c:expr:`void \*` | int | | \(2), \(5) | +--------+--------------------------+--------------------+----------------+------------+ .. versionchanged:: 3.3 @@ -342,27 +342,31 @@ Notes: The ``'p'`` format character encodes a "Pascal string", meaning a short variable-length string stored in a *fixed number of bytes*, given by the count. The first byte stored is the length of the string, or 255, whichever is - smaller. The bytes of the string follow. If the string passed in to + smaller. The bytes of the string follow. If the byte string passed in to :func:`pack` is too long (longer than the count minus 1), only the leading - ``count-1`` bytes of the string are stored. If the string is shorter than + ``count-1`` bytes of the string are stored. If the byte string is shorter than ``count-1``, it is padded with null bytes so that exactly count bytes in all are used. Note that for :func:`unpack`, the ``'p'`` format character consumes - ``count`` bytes, but that the string returned can never contain more than 255 + ``count`` bytes, but that the :class:`!bytes` object returned can never contain more than 255 bytes. + When packing, arguments of types :class:`bytes` and :class:`bytearray` + are accepted. (9) For the ``'s'`` format character, the count is interpreted as the length of the - bytes, not a repeat count like for the other format characters; for example, + byte string, not a repeat count like for the other format characters; for example, ``'10s'`` means a single 10-byte string mapping to or from a single Python byte string, while ``'10c'`` means 10 separate one byte character elements (e.g., ``cccccccccc``) mapping to or from ten different Python byte objects. (See :ref:`struct-examples` for a concrete demonstration of the difference.) - If a count is not given, it defaults to 1. For packing, the string is + If a count is not given, it defaults to 1. For packing, the byte string is truncated or padded with null bytes as appropriate to make it fit. For - unpacking, the resulting bytes object always has exactly the specified number - of bytes. As a special case, ``'0s'`` means a single, empty string (while + unpacking, the resulting :class:`!bytes` object always has exactly the specified number + of bytes. As a special case, ``'0s'`` means a single, empty byte string (while ``'0c'`` means 0 characters). + When packing, arguments of types :class:`bytes` and :class:`bytearray` + are accepted. (10) For the ``'F'`` and ``'D'`` format characters, the packed representation uses diff --git a/Lib/shutil.py b/Lib/shutil.py index 44ccdbb503d4fb..45cbe4c855b462 100644 --- a/Lib/shutil.py +++ b/Lib/shutil.py @@ -1317,27 +1317,9 @@ def _unpack_zipfile(filename, extract_dir): if not zipfile.is_zipfile(filename): raise ReadError("%s is not a zip file" % filename) - zip = zipfile.ZipFile(filename) - try: - for info in zip.infolist(): - name = info.filename - - # don't extract absolute paths or ones with .. in them - if name.startswith('/') or '..' in name: - continue - - targetpath = os.path.join(extract_dir, *name.split('/')) - if not targetpath: - continue - - _ensure_directory(targetpath) - if not name.endswith('/'): - # file - with zip.open(name, 'r') as source, \ - open(targetpath, 'wb') as target: - copyfileobj(source, target) - finally: - zip.close() + with zipfile.ZipFile(filename) as zip: + zip._ignore_invalid_names = True + zip.extractall(extract_dir) def _unpack_tarfile(filename, extract_dir, *, filter=None): """Unpack tar/tar.gz/tar.bz2/tar.xz/tar.zst `filename` to `extract_dir` diff --git a/Lib/test/test_shutil.py b/Lib/test/test_shutil.py index a4bd113bc7f1fc..13a3487382dfcf 100644 --- a/Lib/test/test_shutil.py +++ b/Lib/test/test_shutil.py @@ -2136,8 +2136,6 @@ def test_make_zipfile_rootdir_nodir(self): def check_unpack_archive(self, format, **kwargs): self.check_unpack_archive_with_converter( format, lambda path: path, **kwargs) - self.check_unpack_archive_with_converter( - format, FakePath, **kwargs) self.check_unpack_archive_with_converter(format, FakePath, **kwargs) def check_unpack_archive_with_converter(self, format, converter, **kwargs): @@ -2194,6 +2192,71 @@ def test_unpack_archive_zip(self): with self.assertRaises(TypeError): self.check_unpack_archive('zip', filter='data') + def test_unpack_archive_zip_badpaths(self): + srcdir = self.mkdtemp() + zipname = os.path.join(srcdir, 'test.zip') + abspath = os.path.join(srcdir, 'abspath') + with zipfile.ZipFile(zipname, 'w') as zf: + zf.writestr(abspath, 'badfile') + zf.writestr(os.sep + abspath, 'badfile') + zf.writestr('/abspath', 'badfile') + zf.writestr('C:/abspath', 'badfile') + zf.writestr('D:\\abspath', 'badfile') + zf.writestr('E:abspath', 'badfile') + zf.writestr('F:/G:/abspath', 'badfile') + zf.writestr('//server/share/abspath', 'badfile') + zf.writestr('\\\\server2\\share\\abspath', 'badfile') + zf.writestr('../relpath', 'badfile') + zf.writestr(os.pardir + os.sep + 'relpath2', 'badfile') + zf.writestr('good/file', 'goodfile') + zf.writestr('good..file', 'goodfile') + + dstdir = os.path.join(self.mkdtemp(), 'dst') + unpack_archive(zipname, dstdir) + self.assertTrue(os.path.isfile(os.path.join(dstdir, 'good', 'file'))) + self.assertTrue(os.path.isfile(os.path.join(dstdir, 'good..file'))) + self.assertFalse(os.path.exists(abspath)) + self.assertFalse(os.path.exists(os.path.join(dstdir, 'abspath'))) + self.assertFalse(os.path.exists(os.path.join(dstdir, 'G_'))) + self.assertFalse(os.path.exists(os.path.join(dstdir, 'server'))) + if os.name != 'nt': + self.assertTrue(os.path.isfile(os.path.join(dstdir, 'C:', 'abspath'))) + self.assertTrue(os.path.isfile(os.path.join(dstdir, 'D:\\abspath'))) + self.assertTrue(os.path.isfile(os.path.join(dstdir, 'E:abspath'))) + self.assertTrue(os.path.isfile(os.path.join(dstdir, 'F:', 'G:', 'abspath'))) + self.assertTrue(os.path.isfile(os.path.join(dstdir, '\\\\server2\\share\\abspath'))) + if os.pardir == '..': + self.assertFalse(os.path.exists(os.path.join(dstdir, '..', 'relpath'))) + self.assertFalse(os.path.exists(os.path.join(dstdir, 'relpath'))) + else: + self.assertTrue(os.path.isfile(os.path.join(dstdir, '..', 'relpath'))) + self.assertFalse(os.path.exists(os.path.join(dstdir, os.pardir, 'relpath2'))) + self.assertFalse(os.path.exists(os.path.join(dstdir, 'relpath2'))) + + dstdir2 = os.path.join(self.mkdtemp(), 'dst') + os.mkdir(dstdir2) + with os_helper.change_cwd(dstdir2): + unpack_archive(zipname, '') + self.assertTrue(os.path.isfile(os.path.join('good', 'file'))) + self.assertTrue(os.path.isfile('good..file')) + self.assertFalse(os.path.exists(abspath)) + self.assertFalse(os.path.exists('abspath')) + self.assertFalse(os.path.exists('C_')) + self.assertFalse(os.path.exists('server')) + if os.name != 'nt': + self.assertTrue(os.path.isfile(os.path.join('C:', 'abspath'))) + self.assertTrue(os.path.isfile('D:\\abspath')) + self.assertTrue(os.path.isfile('E:abspath')) + self.assertTrue(os.path.isfile(os.path.join('F:', 'G:', 'abspath'))) + self.assertTrue(os.path.isfile('\\\\server2\\share\\abspath')) + if os.pardir == '..': + self.assertFalse(os.path.exists(os.path.join('..', 'relpath'))) + self.assertFalse(os.path.exists('relpath')) + else: + self.assertTrue(os.path.isfile(os.path.join('..', 'relpath'))) + self.assertFalse(os.path.exists(os.path.join(os.pardir, 'relpath2'))) + self.assertFalse(os.path.exists('relpath2')) + def test_unpack_registry(self): formats = get_unpack_formats() diff --git a/Lib/zipfile/__init__.py b/Lib/zipfile/__init__.py index 51e0ce9fa36d7e..1e0cc5f6234f28 100644 --- a/Lib/zipfile/__init__.py +++ b/Lib/zipfile/__init__.py @@ -1410,6 +1410,7 @@ class ZipFile: fp = None # Set here since __del__ checks it _windows_illegal_name_trans_table = None + _ignore_invalid_names = False def __init__(self, file, mode="r", compression=ZIP_STORED, allowZip64=True, compresslevel=None, *, strict_timestamps=True, metadata_encoding=None): @@ -1890,21 +1891,31 @@ def _extract_member(self, member, targetpath, pwd): # build the destination pathname, replacing # forward slashes to platform specific separators. - arcname = member.filename.replace('/', os.path.sep) - - if os.path.altsep: + arcname = member.filename + if os.path.sep != '/': + arcname = arcname.replace('/', os.path.sep) + if os.path.altsep and os.path.altsep != '/': arcname = arcname.replace(os.path.altsep, os.path.sep) # interpret absolute pathname as relative, remove drive letter or # UNC path, redundant separators, "." and ".." components. - arcname = os.path.splitdrive(arcname)[1] + drive, root, arcname = os.path.splitroot(arcname) + if self._ignore_invalid_names and (drive or root): + return None + if self._ignore_invalid_names and os.path.pardir in arcname.split(os.path.sep): + return None invalid_path_parts = ('', os.path.curdir, os.path.pardir) arcname = os.path.sep.join(x for x in arcname.split(os.path.sep) if x not in invalid_path_parts) if os.path.sep == '\\': # filter illegal characters on Windows - arcname = self._sanitize_windows_name(arcname, os.path.sep) + arcname2 = self._sanitize_windows_name(arcname, os.path.sep) + if self._ignore_invalid_names and arcname2 != arcname: + return None + arcname = arcname2 if not arcname and not member.is_dir(): + if self._ignore_invalid_names: + return None raise ValueError("Empty filename.") targetpath = os.path.join(targetpath, arcname) diff --git a/Misc/NEWS.d/next/Security/2026-03-29-12-51-33.gh-issue-146581.4vZfB0.rst b/Misc/NEWS.d/next/Security/2026-03-29-12-51-33.gh-issue-146581.4vZfB0.rst new file mode 100644 index 00000000000000..98e65549d79016 --- /dev/null +++ b/Misc/NEWS.d/next/Security/2026-03-29-12-51-33.gh-issue-146581.4vZfB0.rst @@ -0,0 +1,5 @@ +Fix vulnerability in :func:`shutil.unpack_archive` for ZIP files on Windows +which allowed to write files outside of the destination tree if the patch in +the archive contains a Windows drive prefix. Now such invalid paths will be +skipped. Files containing ".." in the name (like "foo..bar") are no longer +skipped. diff --git a/Misc/NEWS.d/next/Security/2026-04-24-23-15-42.gh-issue-148252.8BLmzd.rst b/Misc/NEWS.d/next/Security/2026-04-24-23-15-42.gh-issue-148252.8BLmzd.rst new file mode 100644 index 00000000000000..531ea2348ffdef --- /dev/null +++ b/Misc/NEWS.d/next/Security/2026-04-24-23-15-42.gh-issue-148252.8BLmzd.rst @@ -0,0 +1,3 @@ +Fixed string table and sample record bounds checks in :mod:`!_remote_debugging` +when decoding certain ``.pyb`` inputs on 32-bit builds. Patch by Maurycy +Pawłowski-Wieroński. diff --git a/Misc/NEWS.d/next/Security/2026-04-26-17-49-58.gh-issue-149017.EiVFPo.rst b/Misc/NEWS.d/next/Security/2026-04-26-17-49-58.gh-issue-149017.EiVFPo.rst new file mode 100644 index 00000000000000..6aa7efb68a1981 --- /dev/null +++ b/Misc/NEWS.d/next/Security/2026-04-26-17-49-58.gh-issue-149017.EiVFPo.rst @@ -0,0 +1 @@ +Update bundled `libexpat `_ to version 2.8.0. diff --git a/Misc/sbom.spdx.json b/Misc/sbom.spdx.json index ed9c08016808bb..aaeffd58e799ed 100644 --- a/Misc/sbom.spdx.json +++ b/Misc/sbom.spdx.json @@ -48,11 +48,11 @@ "checksums": [ { "algorithm": "SHA1", - "checksumValue": "9dfd09a3be37618cbcea380c2374b2b8f0288f57" + "checksumValue": "5343adc95840915b022b1d4524d0acb66b369ba2" }, { "algorithm": "SHA256", - "checksumValue": "26805a0d1a7a6a5cd8ead9cf7f4da29f63f0547a9ad41e80dba4ed9fe1943140" + "checksumValue": "1ec3bad08b6864c2c479e1fd941038c2dcd24c6d9a16400f4da54912d95aa321" } ], "fileName": "Modules/expat/expat.h" @@ -62,11 +62,11 @@ "checksums": [ { "algorithm": "SHA1", - "checksumValue": "da0328279276800cc747ea7da23886a3f402ccb3" + "checksumValue": "d8f9211d52ff0384e229e4d4d56adae5db2d7f91" }, { "algorithm": "SHA256", - "checksumValue": "15a80e414e9e7c43edba64b1608a77c724387070138693f9e9bcca49c78a2df7" + "checksumValue": "b77f8192baf90aaa41f7023bc68fd1f22ab2552f98758271a1e090544537def5" } ], "fileName": "Modules/expat/expat_external.h" @@ -90,11 +90,11 @@ "checksums": [ { "algorithm": "SHA1", - "checksumValue": "6a4a232233ba1034c3f2b459159d502e9b2d413b" + "checksumValue": "2555e70b29c1efc0af40879daafd12f8b36aca2c" }, { "algorithm": "SHA256", - "checksumValue": "c803935722f0dbdeeede7f040028fb119135e96dfad949479f8a5304b885bdd6" + "checksumValue": "4feb1df53898a48ae0ae04b5d0352c90395c8e693e5c2675f8ced41903d6fa94" } ], "fileName": "Modules/expat/internal.h" @@ -174,11 +174,11 @@ "checksums": [ { "algorithm": "SHA1", - "checksumValue": "0c74fbd48dd515c58eeb65b7e71b29da94be4694" + "checksumValue": "cb0af01558ec7b6474d2bd0c9386380c82618e8f" }, { "algorithm": "SHA256", - "checksumValue": "861e7a50ce81f9f16b42d32a9caa4f817d962b274b2929b579511c6f76d348d4" + "checksumValue": "6745a6b8cdd7344d4bd8f27f605363ed746e57ff02d4ebce3eb1806579cd030f" } ], "fileName": "Modules/expat/xmlparse.c" @@ -188,11 +188,11 @@ "checksums": [ { "algorithm": "SHA1", - "checksumValue": "7cff4d7210f046144f5fa635113f9c26f30fe3d3" + "checksumValue": "c8769fcb93f00272a6e6ca560be633649c817ff7" }, { "algorithm": "SHA256", - "checksumValue": "eaa6c327f9db4a5cec768d0c01927fea212d3ef4d4f970ebc0a98b9f3602784c" + "checksumValue": "5b81f0eb0e144b611dbd1bc9e6037075a16bff94f823d57a81eb2a3e4999e91a" } ], "fileName": "Modules/expat/xmlrole.c" @@ -216,11 +216,11 @@ "checksums": [ { "algorithm": "SHA1", - "checksumValue": "48b7aa6503302d4157c61a8763629f3236c23502" + "checksumValue": "63e4766a09e63760c6518670509198f8d638f4ad" }, { "algorithm": "SHA256", - "checksumValue": "75da65603e99837fd3116f1453372efd556f9f97d8de73364594dd78b3c8ec54" + "checksumValue": "0ad3f915f2748dc91bf4e4b4a50cf40bf2c95769d0eca7e3b293a230d82bb779" } ], "fileName": "Modules/expat/xmltok.c" @@ -272,11 +272,11 @@ "checksums": [ { "algorithm": "SHA1", - "checksumValue": "705842f8a09b09cc021d82a71ab03344bfd07b0a" + "checksumValue": "41b8c8fc275882c76d4210b7d40a18e506b07147" }, { "algorithm": "SHA256", - "checksumValue": "f95a2b4b7efda40f5faf366537cb20a57dddbad9655859d2e304f5e75f6907cc" + "checksumValue": "b2188c7e5fa5b33e355cf6cf342dfb8f6e23859f2a6b1ddf79841d7f84f7b196" } ], "fileName": "Modules/expat/xmltok_ns.c" @@ -1730,14 +1730,14 @@ "checksums": [ { "algorithm": "SHA256", - "checksumValue": "9931f9860d18e6cf72d183eb8f309bfb96196c00e1d40caa978e95bc9aa978b6" + "checksumValue": "c7cec5f60ea3a42e7780781c6745255c19aa3dbfeeae58646b7132f88dc24780" } ], - "downloadLocation": "https://github.com/libexpat/libexpat/releases/download/R_2_7_5/expat-2.7.5.tar.gz", + "downloadLocation": "https://github.com/libexpat/libexpat/releases/download/R_2_8_0/expat-2.8.0.tar.gz", "externalRefs": [ { "referenceCategory": "SECURITY", - "referenceLocator": "cpe:2.3:a:libexpat_project:libexpat:2.7.5:*:*:*:*:*:*:*", + "referenceLocator": "cpe:2.3:a:libexpat_project:libexpat:2.8.0:*:*:*:*:*:*:*", "referenceType": "cpe23Type" } ], @@ -1745,7 +1745,7 @@ "name": "expat", "originator": "Organization: Expat development team", "primaryPackagePurpose": "SOURCE", - "versionInfo": "2.7.5" + "versionInfo": "2.8.0" }, { "SPDXID": "SPDXRef-PACKAGE-hacl-star", diff --git a/Modules/_remote_debugging/binary_io.h b/Modules/_remote_debugging/binary_io.h index d90546078bf68c..18f989f672e103 100644 --- a/Modules/_remote_debugging/binary_io.h +++ b/Modules/_remote_debugging/binary_io.h @@ -61,6 +61,7 @@ extern "C" { #define HDR_SIZE_COMPRESSION 4 #define FILE_HEADER_SIZE (HDR_OFF_COMPRESSION + HDR_SIZE_COMPRESSION) #define FILE_HEADER_PLACEHOLDER_SIZE 64 +#define SAMPLE_HEADER_FIXED_SIZE (sizeof(uint64_t) + sizeof(uint32_t) + 1) static_assert(FILE_HEADER_SIZE <= FILE_HEADER_PLACEHOLDER_SIZE, "FILE_HEADER_SIZE exceeds FILE_HEADER_PLACEHOLDER_SIZE"); diff --git a/Modules/_remote_debugging/binary_io_reader.c b/Modules/_remote_debugging/binary_io_reader.c index aca93e9cb1a30e..6c32ef70ac3f65 100644 --- a/Modules/_remote_debugging/binary_io_reader.c +++ b/Modules/_remote_debugging/binary_io_reader.c @@ -258,7 +258,7 @@ reader_parse_string_table(BinaryReader *reader, const uint8_t *data, size_t file PyErr_SetString(PyExc_ValueError, "Malformed varint in string table"); return -1; } - if (offset + str_len > file_size) { + if (offset > file_size || str_len > file_size - offset) { PyErr_SetString(PyExc_ValueError, "String table overflow"); return -1; } @@ -976,8 +976,8 @@ binary_reader_replay(BinaryReader *reader, PyObject *collector, PyObject *progre } while (offset < reader->sample_data_size) { - /* Read thread_id (8 bytes) + interpreter_id (4 bytes) */ - if (offset + 13 > reader->sample_data_size) { + /* Read thread_id (8 bytes) + interpreter_id (4 bytes) + encoding byte */ + if (reader->sample_data_size - offset < SAMPLE_HEADER_FIXED_SIZE) { break; /* End of data */ } diff --git a/Modules/_remote_debugging/binary_io_writer.c b/Modules/_remote_debugging/binary_io_writer.c index c129c93efe23c5..0ac6c88d0373a7 100644 --- a/Modules/_remote_debugging/binary_io_writer.c +++ b/Modules/_remote_debugging/binary_io_writer.c @@ -23,7 +23,6 @@ * ============================================================================ */ /* Sample header sizes */ -#define SAMPLE_HEADER_FIXED_SIZE 13 /* thread_id(8) + interpreter_id(4) + encoding(1) */ #define SAMPLE_HEADER_MAX_SIZE 26 /* fixed + max_varint(10) + status(1) + margin */ #define MAX_VARINT_SIZE 10 /* Maximum bytes for a varint64 */ #define MAX_VARINT_SIZE_U32 5 /* Maximum bytes for a varint32 */ @@ -653,10 +652,13 @@ write_sample_with_encoding(BinaryWriter *writer, ThreadEntry *entry, memcpy(header_buf, &entry->thread_id, 8); memcpy(header_buf + 8, &entry->interpreter_id, 4); header_buf[12] = (uint8_t)encoding_type; - size_t varint_len = encode_varint_u64(header_buf + 13, timestamp_delta); - header_buf[13 + varint_len] = status; + size_t varint_len = encode_varint_u64( + header_buf + SAMPLE_HEADER_FIXED_SIZE, + timestamp_delta); + header_buf[SAMPLE_HEADER_FIXED_SIZE + varint_len] = status; - if (writer_write_bytes(writer, header_buf, 14 + varint_len) < 0) { + if (writer_write_bytes(writer, header_buf, + SAMPLE_HEADER_FIXED_SIZE + varint_len + 1) < 0) { return -1; } diff --git a/Modules/_struct.c b/Modules/_struct.c index c235e27a415543..47119cb3961e3c 100644 --- a/Modules/_struct.c +++ b/Modules/_struct.c @@ -1,7 +1,7 @@ /* struct module -- pack values into and (out of) bytes objects */ /* New version supporting byte order, alignment and size options, - character strings, and unsigned numbers */ + byte strings, and unsigned numbers */ #ifndef Py_BUILD_CORE_BUILTIN # define Py_BUILD_CORE_MODULE 1 @@ -2297,7 +2297,7 @@ Struct_iter_unpack_impl(PyStructObject *self, PyObject *buffer) * * Takes a struct object, a tuple of arguments, and offset in that tuple of * argument for where to start processing the arguments for packing, and a - * character buffer for writing the packed string. The caller must insure + * character buffer for writing the packed data. The caller must ensure * that the buffer may contain the required length for packing the arguments. * 0 is returned on success, 1 is returned if there is an error. * @@ -2774,8 +2774,8 @@ static struct PyMethodDef module_functions[] = { PyDoc_STRVAR(module_doc, "Functions to convert between Python values and C structs.\n\ -Python bytes objects are used to hold the data representing the C struct\n\ -and also as format strings (explained below) to describe the layout of data\n\ +Python bytes objects are used to hold the data representing the C struct.\n\ +The format string (explained below) describes the layout of data\n\ in the C struct.\n\ \n\ The optional first format char indicates byte order, size and alignment:\n\ @@ -2785,19 +2785,18 @@ The optional first format char indicates byte order, size and alignment:\n\ >: big-endian, std. size & alignment\n\ !: same as >\n\ \n\ -The remaining chars indicate types of args and must match exactly;\n\ +The remaining characters indicate types of args and must match exactly;\n\ these can be preceded by a decimal repeat count:\n\ - x: pad byte (no data); c:char; b:signed byte; B:unsigned byte;\n\ - ?:_Bool; h:short; H:unsigned short; i:int; I:unsigned int;\n\ - l:long; L:unsigned long; f:float; d:double; e:half-float.\n\ - F:float complex; D:double complex.\n\ + x: pad byte (no data); c: char; b: signed byte; B: unsigned byte;\n\ + ?: _Bool; h: short; H: unsigned short; i: int; I: unsigned int;\n\ + l: long; L: unsigned long; q: long long; Q: unsigned long long;\n\ + f: float; d: double; e: half-float;\n\ + F: float complex; D: double complex.\n\ Special cases (preceding decimal count indicates length):\n\ - s:string (array of char); p: pascal string (with count byte).\n\ + s: byte string (array of char); p: Pascal string (with count byte).\n\ Special cases (only available in native format):\n\ - n:ssize_t; N:size_t;\n\ - P:an integer type that is wide enough to hold a pointer.\n\ -Special case (not in native mode unless 'long long' in platform C):\n\ - q:long long; Q:unsigned long long\n\ + n: ssize_t; N: size_t;\n\ + P: an integer type that is wide enough to hold a pointer.\n\ Whitespace between formats is ignored.\n\ \n\ The variable struct.error is an exception raised on errors.\n"); diff --git a/Modules/expat/expat.h b/Modules/expat/expat.h index 18dbaebde293bc..79c609f19aa4cf 100644 --- a/Modules/expat/expat.h +++ b/Modules/expat/expat.h @@ -45,6 +45,7 @@ #ifndef Expat_INCLUDED # define Expat_INCLUDED 1 +# include // for uint8_t # include # include "expat_external.h" @@ -917,10 +918,21 @@ XML_SetParamEntityParsing(XML_Parser parser, function behavior. This must be called before parsing is started. Returns 1 if successful, 0 when called after parsing has started. Note: If parser == NULL, the function will do nothing and return 0. + DEPRECATED since Expat 2.8.0. */ XMLPARSEAPI(int) XML_SetHashSalt(XML_Parser parser, unsigned long hash_salt); +/* Sets the hash salt to use for internal hash calculations. + Helps in preventing DoS attacks based on predicting hash function behavior. + This must be called before parsing is started. + Returns XML_TRUE if successful, XML_FALSE when called after parsing has + started or when parser is NULL. + Added in Expat 2.8.0. +*/ +XMLPARSEAPI(XML_Bool) +XML_SetHashSalt16Bytes(XML_Parser parser, const uint8_t entropy[16]); + /* If XML_Parse or XML_ParseBuffer have returned XML_STATUS_ERROR, then XML_GetErrorCode returns information about the error. */ @@ -1081,8 +1093,8 @@ XML_SetReparseDeferralEnabled(XML_Parser parser, XML_Bool enabled); See https://semver.org */ # define XML_MAJOR_VERSION 2 -# define XML_MINOR_VERSION 7 -# define XML_MICRO_VERSION 5 +# define XML_MINOR_VERSION 8 +# define XML_MICRO_VERSION 0 # ifdef __cplusplus } diff --git a/Modules/expat/expat_config.h b/Modules/expat/expat_config.h index 09d3161dbc0fb2..70df73c8e00a5f 100644 --- a/Modules/expat/expat_config.h +++ b/Modules/expat/expat_config.h @@ -22,5 +22,10 @@ // bpo-30947: Python uses best available entropy sources to // call XML_SetHashSalt(), expat entropy sources are not needed #define XML_POOR_ENTROPY 1 +#undef HAVE_ARC4RANDOM +#undef HAVE_ARC4RANDOM_BUF +#undef HAVE_GETENTROPY +#undef HAVE_GETRANDOM +#undef HAVE_SYSCALL_GETRANDOM #endif /* EXPAT_CONFIG_H */ diff --git a/Modules/expat/expat_external.h b/Modules/expat/expat_external.h index cf4d445e68b00c..cc945c424e471f 100644 --- a/Modules/expat/expat_external.h +++ b/Modules/expat/expat_external.h @@ -12,9 +12,10 @@ Copyright (c) 2001-2002 Greg Stein Copyright (c) 2002-2006 Karl Waclawek Copyright (c) 2016 Cristian Rodríguez - Copyright (c) 2016-2026 Sebastian Pipping + Copyright (c) 2016-2025 Sebastian Pipping Copyright (c) 2017 Rhodri James Copyright (c) 2018 Yury Gribov + Copyright (c) 2026 Matthew Fernandez Licensed under the MIT license: Permission is hereby granted, free of charge, to any person obtaining @@ -48,7 +49,7 @@ /* Expat tries very hard to make the API boundary very specifically defined. There are two macros defined to control this boundary; each of these can be defined before including this header to - achieve some different behavior, but doing so it not recommended or + achieve some different behavior, but doing so is not recommended or tested frequently. XMLCALL - The calling convention to use for all calls across the diff --git a/Modules/expat/internal.h b/Modules/expat/internal.h index 61266ebb7723d1..420d4217a569b1 100644 --- a/Modules/expat/internal.h +++ b/Modules/expat/internal.h @@ -28,7 +28,7 @@ Copyright (c) 2002-2003 Fred L. Drake, Jr. Copyright (c) 2002-2006 Karl Waclawek Copyright (c) 2003 Greg Stein - Copyright (c) 2016-2025 Sebastian Pipping + Copyright (c) 2016-2026 Sebastian Pipping Copyright (c) 2018 Yury Gribov Copyright (c) 2019 David Loffredo Copyright (c) 2023-2024 Sony Corporation / Snild Dolkow @@ -113,6 +113,7 @@ #if defined(_WIN32) \ && (! defined(__USE_MINGW_ANSI_STDIO) \ || (1 - __USE_MINGW_ANSI_STDIO - 1 == 0)) +# define EXPAT_FMT_LLX(midpart) "%" midpart "I64x" # define EXPAT_FMT_ULL(midpart) "%" midpart "I64u" # if defined(_WIN64) // Note: modifiers "td" and "zu" do not work for MinGW # define EXPAT_FMT_PTRDIFF_T(midpart) "%" midpart "I64d" @@ -122,6 +123,7 @@ # define EXPAT_FMT_SIZE_T(midpart) "%" midpart "u" # endif #else +# define EXPAT_FMT_LLX(midpart) "%" midpart "llx" # define EXPAT_FMT_ULL(midpart) "%" midpart "llu" # if ! defined(ULONG_MAX) # error Compiler did not define ULONG_MAX for us diff --git a/Modules/expat/refresh.sh b/Modules/expat/refresh.sh index 779929fc6ed33c..774e0b89d94c0e 100755 --- a/Modules/expat/refresh.sh +++ b/Modules/expat/refresh.sh @@ -12,9 +12,9 @@ fi # Update this when updating to a new version after verifying that the changes # the update brings in are good. These values are used for verifying the SBOM, too. -expected_libexpat_tag="R_2_7_5" -expected_libexpat_version="2.7.5" -expected_libexpat_sha256="9931f9860d18e6cf72d183eb8f309bfb96196c00e1d40caa978e95bc9aa978b6" +expected_libexpat_tag="R_2_8_0" +expected_libexpat_version="2.8.0" +expected_libexpat_sha256="c7cec5f60ea3a42e7780781c6745255c19aa3dbfeeae58646b7132f88dc24780" expat_dir="$(realpath "$(dirname -- "${BASH_SOURCE[0]}")")" cd ${expat_dir} @@ -64,6 +64,18 @@ This may be due to source changes and will require updating this script" >&2 exit 1 fi +# Step 4: Skip the Windows rand_s entropy path in xmlparse.c when +# XML_POOR_ENTROPY is set. +sed -z -i 's|#if defined(_WIN32)\n# include "random_rand_s\.h"\n#endif /\* defined(_WIN32) \*/|#if defined(_WIN32) \&\& ! defined(XML_POOR_ENTROPY)\n# include "random_rand_s.h"\n#endif /* defined(_WIN32) \&\& ! defined(XML_POOR_ENTROPY) */|' xmlparse.c +sed -z -i 's|# ifdef _WIN32\n if (writeRandomBytes_rand_s|# if defined(_WIN32) \&\& ! defined(XML_POOR_ENTROPY)\n if (writeRandomBytes_rand_s|' xmlparse.c + +if ! grep -q '#if defined(_WIN32) && ! defined(XML_POOR_ENTROPY)' xmlparse.c; then + echo " +Error: rand_s gate not patched in xmlparse.c; +This may be due to source changes and will require updating this script" >&2 + exit 1 +fi + echo ' Updated! next steps: - Verify all is okay: diff --git a/Modules/expat/xmlparse.c b/Modules/expat/xmlparse.c index 0248b6651ffbff..e6842f3f0bf750 100644 --- a/Modules/expat/xmlparse.c +++ b/Modules/expat/xmlparse.c @@ -1,4 +1,4 @@ -/* 93c1caa66e2b0310459482516af05505b57c5cb7b96df777105308fc585c85d1 (2.7.5+) +/* a5d18f6a50f536615ac1c70304f87d94f99cc85a86b502188952440610ccf0f8 (2.8.0+) __ __ _ ___\ \/ /_ __ __ _| |_ / _ \\ /| '_ \ / _` | __| @@ -41,10 +41,12 @@ Copyright (c) 2023-2024 Sony Corporation / Snild Dolkow Copyright (c) 2024-2025 Berkay Eren Ürün Copyright (c) 2024 Hanno Böck - Copyright (c) 2025 Matthew Fernandez + Copyright (c) 2025-2026 Matthew Fernandez Copyright (c) 2025 Atrem Borovik Copyright (c) 2025 Alfonso Gregory Copyright (c) 2026 Rosen Penev + Copyright (c) 2026 Francesco Bertolaccini + Copyright (c) 2026 Christian Ng Licensed under the MIT license: Permission is hereby granted, free of charge, to any person obtaining @@ -84,28 +86,16 @@ # error XML_CONTEXT_BYTES must be defined, non-empty and >=0 (0 to disable, >=1 to enable; 1024 is a common default) #endif -#if defined(HAVE_SYSCALL_GETRANDOM) -# if ! defined(_GNU_SOURCE) -# define _GNU_SOURCE 1 /* syscall prototype */ -# endif -#endif - -#ifdef _WIN32 -/* force stdlib to define rand_s() */ -# if ! defined(_CRT_RAND_S) -# define _CRT_RAND_S -# endif -#endif - #include #include #include /* memset(), memcpy() */ #include #include /* INT_MAX, UINT_MAX */ #include /* fprintf */ -#include /* getenv, rand_s */ +#include /* getenv */ #include /* SIZE_MAX, uintptr_t */ #include /* isnan */ +#include #ifdef _WIN32 # define getpid GetCurrentProcessId @@ -125,26 +115,34 @@ #include "expat.h" #include "siphash.h" +#if defined(HAVE_ARC4RANDOM) +# include "random_arc4random.h" +#endif /* defined(HAVE_ARC4RANDOM) */ + +#if defined(HAVE_ARC4RANDOM_BUF) +# include "random_arc4random_buf.h" +#endif // defined(HAVE_ARC4RANDOM_BUF) + +#if defined(XML_DEV_URANDOM) +# include "random_dev_urandom.h" +#endif /* defined(XML_DEV_URANDOM) */ + +#if defined(HAVE_GETENTROPY) +# include "random_getentropy.h" +#endif // defined(HAVE_GETENTROPY) + #if defined(HAVE_GETRANDOM) || defined(HAVE_SYSCALL_GETRANDOM) -# if defined(HAVE_GETRANDOM) -# include /* getrandom */ -# else -# include /* syscall */ -# include /* SYS_getrandom */ -# endif -# if ! defined(GRND_NONBLOCK) -# define GRND_NONBLOCK 0x0001 -# endif /* defined(GRND_NONBLOCK) */ -#endif /* defined(HAVE_GETRANDOM) || defined(HAVE_SYSCALL_GETRANDOM) */ +# include "random_getrandom.h" +#endif /* defined(HAVE_GETRANDOM) || defined(HAVE_SYSCALL_GETRANDOM) */ -#if defined(_WIN32) && ! defined(LOAD_LIBRARY_SEARCH_SYSTEM32) -# define LOAD_LIBRARY_SEARCH_SYSTEM32 0x00000800 -#endif +#if defined(_WIN32) && ! defined(XML_POOR_ENTROPY) +# include "random_rand_s.h" +#endif /* defined(_WIN32) && ! defined(XML_POOR_ENTROPY) */ #if ! defined(HAVE_GETRANDOM) && ! defined(HAVE_SYSCALL_GETRANDOM) \ && ! defined(HAVE_ARC4RANDOM_BUF) && ! defined(HAVE_ARC4RANDOM) \ - && ! defined(XML_DEV_URANDOM) && ! defined(_WIN32) \ - && ! defined(XML_POOR_ENTROPY) + && ! defined(HAVE_GETENTROPY) && ! defined(XML_DEV_URANDOM) \ + && ! defined(_WIN32) && ! defined(XML_POOR_ENTROPY) # error You do not have support for any sources of high quality entropy \ enabled. For end user security, that is probably not what you want. \ \ @@ -153,10 +151,11 @@ * Linux >=3.17 + glibc (including <2.25) (syscall SYS_getrandom): HAVE_SYSCALL_GETRANDOM, \ * BSD / macOS >=10.7 / glibc >=2.36 (arc4random_buf): HAVE_ARC4RANDOM_BUF, \ * BSD / macOS (including <10.7) / glibc >=2.36 (arc4random): HAVE_ARC4RANDOM, \ + * BSD / macOS >=10.12 / glibc >=2.25 (getentropy): HAVE_GETENTROPY, \ * Linux (including <3.17) / BSD / macOS (including <10.7) / Solaris >=8 (/dev/urandom): XML_DEV_URANDOM, \ * Windows >=Vista (rand_s): _WIN32. \ \ - If insist on not using any of these, bypass this error by defining \ + If you insist on not using any of these, bypass this error by defining \ XML_POOR_ENTROPY; you have been warned. \ \ If you have reasons to patch this detection code away or need changes \ @@ -604,7 +603,7 @@ static ELEMENT_TYPE *getElementType(XML_Parser parser, const ENCODING *enc, static XML_Char *copyString(const XML_Char *s, XML_Parser parser); -static unsigned long generate_hash_secret_salt(XML_Parser parser); +static struct sipkey generate_hash_secret_salt(void); static XML_Bool startParsing(XML_Parser parser); static XML_Parser parserCreate(const XML_Char *encodingName, @@ -777,7 +776,8 @@ struct XML_ParserStruct { XML_Bool m_useForeignDTD; enum XML_ParamEntityParsing m_paramEntityParsing; #endif - unsigned long m_hash_secret_salt; + struct sipkey m_hash_secret_salt_128; + XML_Bool m_hash_secret_salt_set; #if XML_GE == 1 ACCOUNTING m_accounting; MALLOC_TRACKER m_alloc_tracker; @@ -1036,135 +1036,6 @@ static const XML_Char implicitContext[] ASCII_s, ASCII_p, ASCII_a, ASCII_c, ASCII_e, '\0'}; -/* To avoid warnings about unused functions: */ -#if ! defined(HAVE_ARC4RANDOM_BUF) && ! defined(HAVE_ARC4RANDOM) - -# if defined(HAVE_GETRANDOM) || defined(HAVE_SYSCALL_GETRANDOM) - -/* Obtain entropy on Linux 3.17+ */ -static int -writeRandomBytes_getrandom_nonblock(void *target, size_t count) { - int success = 0; /* full count bytes written? */ - size_t bytesWrittenTotal = 0; - const unsigned int getrandomFlags = GRND_NONBLOCK; - - do { - void *const currentTarget = (void *)((char *)target + bytesWrittenTotal); - const size_t bytesToWrite = count - bytesWrittenTotal; - - assert(bytesToWrite <= INT_MAX); - - const int bytesWrittenMore = -# if defined(HAVE_GETRANDOM) - (int)getrandom(currentTarget, bytesToWrite, getrandomFlags); -# else - (int)syscall(SYS_getrandom, currentTarget, bytesToWrite, - getrandomFlags); -# endif - - if (bytesWrittenMore > 0) { - bytesWrittenTotal += bytesWrittenMore; - if (bytesWrittenTotal >= count) - success = 1; - } - } while (! success && (errno == EINTR)); - - return success; -} - -# endif /* defined(HAVE_GETRANDOM) || defined(HAVE_SYSCALL_GETRANDOM) */ - -# if ! defined(_WIN32) && defined(XML_DEV_URANDOM) - -/* Extract entropy from /dev/urandom */ -static int -writeRandomBytes_dev_urandom(void *target, size_t count) { - int success = 0; /* full count bytes written? */ - size_t bytesWrittenTotal = 0; - - const int fd = open("/dev/urandom", O_RDONLY); - if (fd < 0) { - return 0; - } - - do { - void *const currentTarget = (void *)((char *)target + bytesWrittenTotal); - const size_t bytesToWrite = count - bytesWrittenTotal; - - const ssize_t bytesWrittenMore = read(fd, currentTarget, bytesToWrite); - - if (bytesWrittenMore > 0) { - bytesWrittenTotal += bytesWrittenMore; - if (bytesWrittenTotal >= count) - success = 1; - } - } while (! success && (errno == EINTR)); - - close(fd); - return success; -} - -# endif /* ! defined(_WIN32) && defined(XML_DEV_URANDOM) */ - -#endif /* ! defined(HAVE_ARC4RANDOM_BUF) && ! defined(HAVE_ARC4RANDOM) */ - -#if defined(HAVE_ARC4RANDOM) && ! defined(HAVE_ARC4RANDOM_BUF) - -static void -writeRandomBytes_arc4random(void *target, size_t count) { - size_t bytesWrittenTotal = 0; - - while (bytesWrittenTotal < count) { - const uint32_t random32 = arc4random(); - size_t i = 0; - - for (; (i < sizeof(random32)) && (bytesWrittenTotal < count); - i++, bytesWrittenTotal++) { - const uint8_t random8 = (uint8_t)(random32 >> (i * 8)); - ((uint8_t *)target)[bytesWrittenTotal] = random8; - } - } -} - -#endif /* defined(HAVE_ARC4RANDOM) && ! defined(HAVE_ARC4RANDOM_BUF) */ - -#ifdef _WIN32 - -/* Provide declaration of rand_s() for MinGW-32 (not 64, which has it), - as it didn't declare it in its header prior to version 5.3.0 of its - runtime package (mingwrt, containing stdlib.h). The upstream fix - was introduced at https://osdn.net/projects/mingw/ticket/39658 . */ -# if defined(__MINGW32__) && defined(__MINGW32_VERSION) \ - && __MINGW32_VERSION < 5003000L && ! defined(__MINGW64_VERSION_MAJOR) -__declspec(dllimport) int rand_s(unsigned int *); -# endif - -/* Obtain entropy on Windows using the rand_s() function which - * generates cryptographically secure random numbers. Internally it - * uses RtlGenRandom API which is present in Windows XP and later. - */ -static int -writeRandomBytes_rand_s(void *target, size_t count) { - size_t bytesWrittenTotal = 0; - - while (bytesWrittenTotal < count) { - unsigned int random32 = 0; - size_t i = 0; - - if (rand_s(&random32)) - return 0; /* failure */ - - for (; (i < sizeof(random32)) && (bytesWrittenTotal < count); - i++, bytesWrittenTotal++) { - const uint8_t random8 = (uint8_t)(random32 >> (i * 8)); - ((uint8_t *)target)[bytesWrittenTotal] = random8; - } - } - return 1; /* success */ -} - -#endif /* _WIN32 */ - #if ! defined(HAVE_ARC4RANDOM_BUF) && ! defined(HAVE_ARC4RANDOM) static unsigned long @@ -1192,69 +1063,70 @@ gather_time_entropy(void) { #endif /* ! defined(HAVE_ARC4RANDOM_BUF) && ! defined(HAVE_ARC4RANDOM) */ -static unsigned long -ENTROPY_DEBUG(const char *label, unsigned long entropy) { +static struct sipkey +ENTROPY_DEBUG(const char *label, struct sipkey entropy_128) { if (getDebugLevel("EXPAT_ENTROPY_DEBUG", 0) >= 1u) { - fprintf(stderr, "expat: Entropy: %s --> 0x%0*lx (%lu bytes)\n", label, - (int)sizeof(entropy) * 2, entropy, (unsigned long)sizeof(entropy)); + fprintf(stderr, + "expat: Entropy: %s --> [0x" EXPAT_FMT_LLX( + "016") ", 0x" EXPAT_FMT_LLX("016") "] (16 bytes)\n", + label, (unsigned long long)entropy_128.k[0], + (unsigned long long)entropy_128.k[1]); } - return entropy; + return entropy_128; } -static unsigned long -generate_hash_secret_salt(XML_Parser parser) { - unsigned long entropy; - (void)parser; +static struct sipkey +generate_hash_secret_salt(void) { + struct sipkey entropy; /* "Failproof" high quality providers: */ #if defined(HAVE_ARC4RANDOM_BUF) - arc4random_buf(&entropy, sizeof(entropy)); + writeRandomBytes_arc4random_buf(&entropy, sizeof(entropy)); return ENTROPY_DEBUG("arc4random_buf", entropy); #elif defined(HAVE_ARC4RANDOM) - writeRandomBytes_arc4random((void *)&entropy, sizeof(entropy)); + writeRandomBytes_arc4random(&entropy, sizeof(entropy)); return ENTROPY_DEBUG("arc4random", entropy); #else /* Try high quality providers first .. */ -# ifdef _WIN32 - if (writeRandomBytes_rand_s((void *)&entropy, sizeof(entropy))) { +# if defined(_WIN32) && ! defined(XML_POOR_ENTROPY) + if (writeRandomBytes_rand_s(&entropy, sizeof(entropy))) { return ENTROPY_DEBUG("rand_s", entropy); } +# elif defined(HAVE_GETENTROPY) + if (writeRandomBytes_getentropy(&entropy, sizeof(entropy))) { + return ENTROPY_DEBUG("getentropy", entropy); + } + errno = 0; # elif defined(HAVE_GETRANDOM) || defined(HAVE_SYSCALL_GETRANDOM) - if (writeRandomBytes_getrandom_nonblock((void *)&entropy, sizeof(entropy))) { + if (writeRandomBytes_getrandom_nonblock(&entropy, sizeof(entropy))) { return ENTROPY_DEBUG("getrandom", entropy); } # endif # if ! defined(_WIN32) && defined(XML_DEV_URANDOM) - if (writeRandomBytes_dev_urandom((void *)&entropy, sizeof(entropy))) { + if (writeRandomBytes_dev_urandom(&entropy, sizeof(entropy))) { return ENTROPY_DEBUG("/dev/urandom", entropy); } # endif /* ! defined(_WIN32) && defined(XML_DEV_URANDOM) */ /* .. and self-made low quality for backup: */ - entropy = gather_time_entropy(); + entropy.k[0] = 0; + entropy.k[1] = gather_time_entropy(); # if ! defined(__wasi__) /* Process ID is 0 bits entropy if attacker has local access */ - entropy ^= getpid(); + entropy.k[1] ^= getpid(); # endif /* Factors are 2^31-1 and 2^61-1 (Mersenne primes M31 and M61) */ if (sizeof(unsigned long) == 4) { - return ENTROPY_DEBUG("fallback(4)", entropy * 2147483647); + entropy.k[1] *= 2147483647; + return ENTROPY_DEBUG("fallback(4)", entropy); } else { - return ENTROPY_DEBUG("fallback(8)", - entropy * (unsigned long)2305843009213693951ULL); + entropy.k[1] *= 2305843009213693951ULL; + return ENTROPY_DEBUG("fallback(8)", entropy); } #endif } -static unsigned long -get_hash_secret_salt(XML_Parser parser) { - const XML_Parser rootParser = getRootParserOf(parser, NULL); - assert(! rootParser->m_parentParser); - - return rootParser->m_hash_secret_salt; -} - static enum XML_Error callProcessor(XML_Parser parser, const char *start, const char *end, const char **endPtr) { @@ -1323,8 +1195,10 @@ callProcessor(XML_Parser parser, const char *start, const char *end, static XML_Bool /* only valid for root parser */ startParsing(XML_Parser parser) { /* hash functions must be initialized before setContext() is called */ - if (parser->m_hash_secret_salt == 0) - parser->m_hash_secret_salt = generate_hash_secret_salt(parser); + if (parser->m_hash_secret_salt_set != XML_TRUE) { + parser->m_hash_secret_salt_128 = generate_hash_secret_salt(); + parser->m_hash_secret_salt_set = XML_TRUE; + } if (parser->m_ns) { /* implicit context only set for root parser, since child parsers (i.e. external entity parsers) will inherit it @@ -1612,7 +1486,9 @@ parserInit(XML_Parser parser, const XML_Char *encodingName) { parser->m_useForeignDTD = XML_FALSE; parser->m_paramEntityParsing = XML_PARAM_ENTITY_PARSING_NEVER; #endif - parser->m_hash_secret_salt = 0; + parser->m_hash_secret_salt_128.k[0] = 0; + parser->m_hash_secret_salt_128.k[1] = 0; + parser->m_hash_secret_salt_set = XML_FALSE; #if XML_GE == 1 memset(&parser->m_accounting, 0, sizeof(ACCOUNTING)); @@ -1779,7 +1655,8 @@ XML_ExternalEntityParserCreate(XML_Parser oldParser, const XML_Char *context, from hash tables associated with either parser without us having to worry which hash secrets each table has. */ - unsigned long oldhash_secret_salt; + struct sipkey oldhash_secret_salt_128; + XML_Bool oldhash_secret_salt_set; XML_Bool oldReparseDeferralEnabled; /* Validate the oldParser parameter before we pull everything out of it */ @@ -1825,7 +1702,8 @@ XML_ExternalEntityParserCreate(XML_Parser oldParser, const XML_Char *context, from hash tables associated with either parser without us having to worry which hash secrets each table has. */ - oldhash_secret_salt = parser->m_hash_secret_salt; + oldhash_secret_salt_128 = parser->m_hash_secret_salt_128; + oldhash_secret_salt_set = parser->m_hash_secret_salt_set; oldReparseDeferralEnabled = parser->m_reparseDeferralEnabled; #ifdef XML_DTD @@ -1880,7 +1758,8 @@ XML_ExternalEntityParserCreate(XML_Parser oldParser, const XML_Char *context, parser->m_externalEntityRefHandlerArg = oldExternalEntityRefHandlerArg; parser->m_defaultExpandInternalEntities = oldDefaultExpandInternalEntities; parser->m_ns_triplets = oldns_triplets; - parser->m_hash_secret_salt = oldhash_secret_salt; + parser->m_hash_secret_salt_128 = oldhash_secret_salt_128; + parser->m_hash_secret_salt_set = oldhash_secret_salt_set; parser->m_reparseDeferralEnabled = oldReparseDeferralEnabled; parser->m_parentParser = oldParser; #ifdef XML_DTD @@ -2324,6 +2203,7 @@ XML_SetParamEntityParsing(XML_Parser parser, #endif } +// DEPRECATED since Expat 2.8.0. int XMLCALL XML_SetHashSalt(XML_Parser parser, unsigned long hash_salt) { if (parser == NULL) @@ -2335,10 +2215,46 @@ XML_SetHashSalt(XML_Parser parser, unsigned long hash_salt) { /* block after XML_Parse()/XML_ParseBuffer() has been called */ if (parserBusy(rootParser)) return 0; - rootParser->m_hash_secret_salt = hash_salt; + + rootParser->m_hash_secret_salt_128.k[0] = 0; + rootParser->m_hash_secret_salt_128.k[1] = hash_salt; + + if (hash_salt != 0) { // to remain backwards compatible + rootParser->m_hash_secret_salt_set = XML_TRUE; + + if (sizeof(unsigned long) == 4) + ENTROPY_DEBUG("explicit(4)", rootParser->m_hash_secret_salt_128); + else + ENTROPY_DEBUG("explicit(8)", rootParser->m_hash_secret_salt_128); + } + return 1; } +XML_Bool XMLCALL +XML_SetHashSalt16Bytes(XML_Parser parser, const uint8_t entropy[16]) { + if (parser == NULL) + return XML_FALSE; + + if (entropy == NULL) + return XML_FALSE; + + const XML_Parser rootParser = getRootParserOf(parser, NULL); + assert(! rootParser->m_parentParser); + + /* block after XML_Parse()/XML_ParseBuffer() has been called */ + if (parserBusy(rootParser)) + return XML_FALSE; + + sip_tokey(&(rootParser->m_hash_secret_salt_128), entropy); + + rootParser->m_hash_secret_salt_set = XML_TRUE; + + ENTROPY_DEBUG("explicit(16)", rootParser->m_hash_secret_salt_128); + + return XML_TRUE; +} + enum XML_Status XMLCALL XML_Parse(XML_Parser parser, const char *s, int len, int isFinal) { if ((parser == NULL) || (len < 0) || ((s == NULL) && (len != 0))) { @@ -7842,8 +7758,10 @@ keylen(KEY s) { static void copy_salt_to_sipkey(XML_Parser parser, struct sipkey *key) { - key->k[0] = 0; - key->k[1] = get_hash_secret_salt(parser); + const XML_Parser rootParser = getRootParserOf(parser, NULL); + assert(! rootParser->m_parentParser); + + *key = rootParser->m_hash_secret_salt_128; } static unsigned long FASTCALL diff --git a/Modules/expat/xmlrole.c b/Modules/expat/xmlrole.c index b1dfb456e5df87..d56bee82dd2d13 100644 --- a/Modules/expat/xmlrole.c +++ b/Modules/expat/xmlrole.c @@ -12,7 +12,7 @@ Copyright (c) 2002-2006 Karl Waclawek Copyright (c) 2002-2003 Fred L. Drake, Jr. Copyright (c) 2005-2009 Steven Solie - Copyright (c) 2016-2026 Sebastian Pipping + Copyright (c) 2016-2023 Sebastian Pipping Copyright (c) 2017 Rhodri James Copyright (c) 2019 David Loffredo Copyright (c) 2021 Donghee Na diff --git a/Modules/expat/xmltok.c b/Modules/expat/xmltok.c index f6e5f742c928c8..32cd5f147e9322 100644 --- a/Modules/expat/xmltok.c +++ b/Modules/expat/xmltok.c @@ -12,7 +12,7 @@ Copyright (c) 2002 Greg Stein Copyright (c) 2002-2016 Karl Waclawek Copyright (c) 2005-2009 Steven Solie - Copyright (c) 2016-2026 Sebastian Pipping + Copyright (c) 2016-2024 Sebastian Pipping Copyright (c) 2016 Pascal Cuoq Copyright (c) 2016 Don Lewis Copyright (c) 2017 Rhodri James diff --git a/Modules/expat/xmltok_ns.c b/Modules/expat/xmltok_ns.c index 1cd60de1e4fe51..810ca2c6d0485e 100644 --- a/Modules/expat/xmltok_ns.c +++ b/Modules/expat/xmltok_ns.c @@ -11,7 +11,7 @@ Copyright (c) 2002 Greg Stein Copyright (c) 2002 Fred L. Drake, Jr. Copyright (c) 2002-2006 Karl Waclawek - Copyright (c) 2017-2026 Sebastian Pipping + Copyright (c) 2017-2021 Sebastian Pipping Copyright (c) 2025 Alfonso Gregory Licensed under the MIT license: