From a3394d3fe23a306e61bf1f35a65adcd068d99a1d Mon Sep 17 00:00:00 2001 From: Adam Korczynski Date: Fri, 10 Apr 2026 18:35:28 +0100 Subject: [PATCH] Add fuzzer for locale module MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Fuzzes the CPython _locale C module (Modules/_localemodule.c) through its two Unicode-consuming entry points. Dispatches per input to either locale.strxfrm() — the locale-aware string-to-transform-key conversion that exercises the underlying wcsxfrm() wrapper and its wchar_t encode/decode boundaries — or locale.strcoll(), which compares two fuzzed Unicode strings via wcscoll() to drive the locale-aware collation path. Inputs are Unicode text drawn from the fuzzed byte stream, so multibyte, surrogate-range, and astral-plane code points all reach the C conversion layer. --- Makefile | 5 ++++- fuzz_targets.txt | 1 + locale.py | 37 +++++++++++++++++++++++++++++++++++++ 3 files changed, 42 insertions(+), 1 deletion(-) create mode 100644 locale.py diff --git a/Makefile b/Makefile index 7bbbca4..1697ff9 100644 --- a/Makefile +++ b/Makefile @@ -1,4 +1,4 @@ -all : fuzzer-html fuzzer-email fuzzer-httpclient fuzzer-json fuzzer-difflib fuzzer-csv fuzzer-decode fuzzer-ast fuzzer-tarfile fuzzer-tarfile-hypothesis fuzzer-zipfile fuzzer-zipfile-hypothesis fuzzer-re fuzzer-configparser fuzzer-tomllib fuzzer-plistlib fuzzer-xml fuzzer-zoneinfo fuzzer-binascii +all : fuzzer-html fuzzer-email fuzzer-httpclient fuzzer-json fuzzer-difflib fuzzer-csv fuzzer-decode fuzzer-ast fuzzer-tarfile fuzzer-tarfile-hypothesis fuzzer-zipfile fuzzer-zipfile-hypothesis fuzzer-re fuzzer-configparser fuzzer-tomllib fuzzer-plistlib fuzzer-xml fuzzer-zoneinfo fuzzer-binascii fuzzer-locale PYTHON_CONFIG_PATH=$(CPYTHON_INSTALL_PATH)/bin/python3-config CXXFLAGS += $(shell $(PYTHON_CONFIG_PATH) --cflags) @@ -43,3 +43,6 @@ fuzzer-zoneinfo: fuzzer-binascii: clang++ $(CXXFLAGS) $(LIB_FUZZING_ENGINE) -std=c++17 fuzzer.cpp -DPYTHON_HARNESS_PATH="\"binascii.py\"" -ldl $(LDFLAGS) -o fuzzer-binascii + +fuzzer-locale: + clang++ $(CXXFLAGS) $(LIB_FUZZING_ENGINE) -std=c++17 fuzzer.cpp -DPYTHON_HARNESS_PATH="\"locale.py\"" -ldl $(LDFLAGS) -o fuzzer-locale diff --git a/fuzz_targets.txt b/fuzz_targets.txt index b016889..cd98e78 100644 --- a/fuzz_targets.txt +++ b/fuzz_targets.txt @@ -8,6 +8,7 @@ email email.py html html.py httpclient httpclient.py json json.py +locale locale.py plistlib plist.py re re.py tarfile tarfile.py diff --git a/locale.py b/locale.py new file mode 100644 index 0000000..3e6e90e --- /dev/null +++ b/locale.py @@ -0,0 +1,37 @@ +from fuzzeddataprovider import FuzzedDataProvider +import locale + +OP_STRXFRM = 0 +OP_STRCOLL = 1 + + +# Fuzzes the _locale C module (Modules/_localemodule.c). +# Exercises locale.strxfrm() for locale-aware string transformation +# and locale.strcoll() for locale-aware string comparison, both with +# fuzz-generated Unicode input. +def FuzzerRunOne(FuzzerInput): + if len(FuzzerInput) < 1 or len(FuzzerInput) > 0x10000: + return + fdp = FuzzedDataProvider(FuzzerInput) + target = fdp.ConsumeIntInRange(OP_STRXFRM, OP_STRCOLL) + n = ( + fdp.ConsumeIntInRange(1, min(fdp.remaining_bytes(), 10000)) + if fdp.remaining_bytes() > 0 + else 0 + ) + if n == 0: + return + s = fdp.ConsumeUnicode(n) + try: + if target == OP_STRXFRM: + locale.strxfrm(s) + elif target == OP_STRCOLL: + n2 = ( + fdp.ConsumeIntInRange(1, min(fdp.remaining_bytes(), 10000)) + if fdp.remaining_bytes() > 0 + else 0 + ) + s2 = fdp.ConsumeUnicode(n2) if n2 > 0 else "" + locale.strcoll(s, s2) + except Exception: + pass