From f9de629ff2377f101a2d43de5a8de869e82e9848 Mon Sep 17 00:00:00 2001 From: netliomax25-code Date: Mon, 8 Jun 2026 15:25:39 +0530 Subject: [PATCH] reject surrogate code points in \U unicode escapes --- tests/test_parser.py | 17 +++++++++++++++++ tomlkit/parser.py | 14 +++++++++++--- 2 files changed, 28 insertions(+), 3 deletions(-) diff --git a/tests/test_parser.py b/tests/test_parser.py index 5e7d9f1..5c03d6e 100644 --- a/tests/test_parser.py +++ b/tests/test_parser.py @@ -2,6 +2,7 @@ from tomlkit.exceptions import EmptyTableNameError from tomlkit.exceptions import InternalParserError +from tomlkit.exceptions import InvalidUnicodeValueError from tomlkit.exceptions import UnexpectedCharError from tomlkit.items import StringType from tomlkit.parser import Parser @@ -63,3 +64,19 @@ def test_parse_multiline_literal_string_with_crlf() -> None: content = "a = '''foo\r\nbar'''" parser = Parser(content) assert parser.parse() == {"a": "foo\r\nbar"} + + +@pytest.mark.parametrize( + "content", + [ + r'a = "\uD800"', + r'a = "\uDFFF"', + r'a = "\U0000D800"', + r'a = "\U0000DFFF"', + r'a = "\U0000DC00"', + ], +) +def test_parser_rejects_surrogate_unicode_escapes(content: str) -> None: + parser = Parser(content) + with pytest.raises(InvalidUnicodeValueError): + parser.parse() diff --git a/tomlkit/parser.py b/tomlkit/parser.py index 00079e6..341e3a9 100644 --- a/tomlkit/parser.py +++ b/tomlkit/parser.py @@ -1165,11 +1165,19 @@ def _peek_unicode(self, is_long: bool) -> tuple[str | None, str | None]: else: extracted = self.extract() - if extracted[0].lower() == "d" and extracted[1].strip("01234567"): - return None, None + try: + codepoint = int(extracted, 16) + except ValueError: + return None, extracted + + # Unicode scalar values exclude the surrogate range + # (U+D800 to U+DFFF). The 8-digit \U form reaches this range + # with leading zeros, so it must be checked on the value itself. + if 0xD800 <= codepoint <= 0xDFFF: + return None, extracted try: - value = chr(int(extracted, 16)) + value = chr(codepoint) except (ValueError, OverflowError): value = None