diff --git a/module-fuzzers/fuzz_json_decode.cpp b/module-fuzzers/fuzz_json_decode.cpp new file mode 100644 index 0000000..a92c16d --- /dev/null +++ b/module-fuzzers/fuzz_json_decode.cpp @@ -0,0 +1,79 @@ +// fuzz_json_decode.cpp — Fuzzer for CPython's _json C extension module (decoding). +// +// This fuzzer exercises the following CPython C extension module via +// its Python API, called through the Python C API from C++: +// +// _json — json.loads(str), JSONDecoder().decode(str), +// JSONDecoder().raw_decode(str) +// +// The first two bytes of fuzz input select string encoding and target +// function. Remaining bytes become the input string. Each target makes +// a single call. Exercises the _json C acceleration module's scanning, +// string unescaping, number parsing, and recursive container building. +// +// All module functions are imported once during init and cached as static +// PyObject* pointers. PyRef (RAII) prevents reference leaks. +// Max input size: 64 KB. + +#include "fuzz_helpers.h" + +static PyObject *json_loads, *json_JSONDecoder; + +static int initialized = 0; + +static void init_json_decode(void) { + if (initialized) return; + + json_loads = import_attr("json", "loads"); + json_JSONDecoder = import_attr("json", "JSONDecoder"); + assert(!PyErr_Occurred()); + initialized = 1; +} + +// op_json_decode: the fuzzer selects one of 3 targets — json.loads(str), +// JSONDecoder().decode(str), or JSONDecoder().raw_decode(str). +// Exercises the _json C acceleration module's decoding paths. +static void op_json_decode(FuzzedDataProvider &fdp) { + int str_enc = fdp.ConsumeIntegralInRange(0, 3); + enum { LOADS, DECODE, RAW_DECODE, NUM_TARGETS }; + int target_fn = fdp.ConsumeIntegralInRange(0, NUM_TARGETS - 1); + if (fdp.remaining_bytes() == 0) return; + std::string data = fdp.ConsumeRemainingBytesAsString(); + PyRef pystr(fuzz_bytes_to_str(data, str_enc)); + CHECK(pystr); + + switch (target_fn) { + case LOADS: { + // json.loads(str) + PyRef r = PyObject_CallFunction(json_loads, "O", (PyObject *)pystr); + break; + } + case DECODE: { + // JSONDecoder().decode(str) + PyRef dec = PyObject_CallFunction(json_JSONDecoder, NULL); + CHECK(dec); + PyRef r = PyObject_CallMethod(dec, "decode", "O", (PyObject *)pystr); + break; + } + case RAW_DECODE: { + // JSONDecoder().raw_decode(str) + PyRef dec = PyObject_CallFunction(json_JSONDecoder, NULL); + CHECK(dec); + PyRef r = PyObject_CallMethod(dec, "raw_decode", "O", (PyObject *)pystr); + break; + } + } + if (PyErr_Occurred()) PyErr_Clear(); +} + +extern "C" int LLVMFuzzerTestOneInput(const uint8_t *data, size_t size) { + assert(Py_IsInitialized()); + init_json_decode(); + if (size < 1 || size > 0x10000) return 0; + if (PyErr_Occurred()) PyErr_Clear(); + + FuzzedDataProvider fdp(data, size); + op_json_decode(fdp); + + return 0; +} diff --git a/module-fuzzers/fuzz_json_encode.cpp b/module-fuzzers/fuzz_json_encode.cpp new file mode 100644 index 0000000..4ff7688 --- /dev/null +++ b/module-fuzzers/fuzz_json_encode.cpp @@ -0,0 +1,198 @@ +// fuzz_json_encode.cpp — Fuzzer for CPython's _json C extension module (encoding). +// +// This fuzzer exercises the following CPython C extension module via +// its Python API, called through the Python C API from C++: +// +// _json — json.dumps(str), json.dumps({k:v,...} with +// fuzz-typed keys/values), +// json.dumps([str,...] with 1-10 unique strings), +// JSONEncoder(ensure_ascii=True/False).encode(), +// JSONEncoder(sort_keys, indent, ensure_ascii).encode() +// +// All module functions are imported once during init and cached as static +// PyObject* pointers. PyRef (RAII) prevents reference leaks. +// Max input size: 64 KB. + +#include "fuzz_helpers.h" + +static PyObject *json_dumps, *json_JSONEncoder; + +static int initialized = 0; + +static void init_json(void) { + if (initialized) return; + + json_dumps = import_attr("json", "dumps"); + json_JSONEncoder = import_attr("json", "JSONEncoder"); + assert(!PyErr_Occurred()); + initialized = 1; +} + +// Build a fuzz-chosen JSON-serializable Python object. +// key_only=true restricts to hashable types (str, int, float, bool, None). +// depth limits recursion for nested list/dict values. +static PyObject *make_json_value(FuzzedDataProvider &fdp, int str_enc, + bool key_only, int depth = 0) { + enum { T_STR, T_INT, T_FLOAT, T_BOOL, T_NONE, T_LIST, T_DICT, NUM_TYPES }; + int max_type = (key_only || depth >= 3) ? T_NONE : (NUM_TYPES - 1); + int t = fdp.ConsumeIntegralInRange(0, max_type); + switch (t) { + case T_STR: { + size_t slen = (fdp.ConsumeIntegral() % 10000) + 1; + std::string s = fdp.ConsumeBytesAsString(slen); + return fuzz_bytes_to_str(s, str_enc); + } + case T_INT: + return PyLong_FromLong(fdp.ConsumeIntegral()); + case T_FLOAT: + return PyFloat_FromDouble(fdp.ConsumeFloatingPoint()); + case T_BOOL: { + PyObject *b = fdp.ConsumeBool() ? Py_True : Py_False; + Py_INCREF(b); + return b; + } + case T_NONE: + Py_INCREF(Py_None); + return Py_None; + case T_LIST: { + int count = fdp.ConsumeIntegralInRange(0, 3); + PyObject *lst = PyList_New(0); + if (!lst) return NULL; + for (int i = 0; i < count; i++) { + PyObject *item = make_json_value(fdp, str_enc, false, depth + 1); + if (!item) { PyErr_Clear(); continue; } + PyList_Append(lst, item); + Py_DECREF(item); + } + return lst; + } + case T_DICT: { + int count = fdp.ConsumeIntegralInRange(0, 3); + PyObject *d = PyDict_New(); + if (!d) return NULL; + for (int i = 0; i < count; i++) { + PyObject *k = make_json_value(fdp, str_enc, true, depth + 1); + if (!k) { PyErr_Clear(); continue; } + PyObject *v = make_json_value(fdp, str_enc, false, depth + 1); + if (!v) { Py_DECREF(k); PyErr_Clear(); continue; } + PyDict_SetItem(d, k, v); + Py_DECREF(k); + Py_DECREF(v); + } + return d; + } + default: + Py_INCREF(Py_None); + return Py_None; + } +} + +// op_json_encode: the fuzzer selects the target: json.dumps(str), +// json.dumps({k:v,...} with 1-5 fuzz-typed entries), +// json.dumps([1-10 unique strs]), or JSONEncoder with options. +// Exercises the _json C acceleration module's encoding paths. +static void op_json_encode(FuzzedDataProvider &fdp) { + int str_enc = fdp.ConsumeIntegralInRange(0, 3); + enum { DUMPS_STR, DUMPS_DICT, DUMPS_LIST, ENCODE_NO_ASCII, ENCODE_ASCII, ENCODE_OPTS, NUM_TARGETS }; + int target_fn = fdp.ConsumeIntegralInRange(0, NUM_TARGETS - 1); + if (fdp.remaining_bytes() == 0) return; + size_t data_len = fdp.ConsumeIntegralInRange( + 1, std::min(fdp.remaining_bytes(), (size_t)10000)); + std::string data = fdp.ConsumeBytesAsString(data_len); + PyRef pystr(fuzz_bytes_to_str(data, str_enc)); + CHECK(pystr); + + switch (target_fn) { + case DUMPS_STR: { + // json.dumps(str) + PyRef r = PyObject_CallFunction(json_dumps, "O", (PyObject *)pystr); + break; + } + case DUMPS_DICT: { + // json.dumps({k: v, ...}) — 1 to 5 entries, fuzz-chosen types. + int count = fdp.ConsumeIntegralInRange(1, 5); + PyRef d = PyDict_New(); + CHECK(d); + for (int i = 0; i < count; i++) { + PyRef k(make_json_value(fdp, str_enc, true)); + if (!k) { PyErr_Clear(); continue; } + PyRef v(make_json_value(fdp, str_enc, false)); + if (!v) { PyErr_Clear(); continue; } + PyDict_SetItem(d, k, v); + } + PyRef r = PyObject_CallFunction(json_dumps, "O", (PyObject *)d); + break; + } + case DUMPS_LIST: { + // json.dumps([str, str, ...]) — 1 to 10 unique fuzz strings. + int count = fdp.ConsumeIntegralInRange(1, 10); + PyRef lst = PyList_New(0); + CHECK(lst); + for (int i = 0; i < count; i++) { + size_t slen = (fdp.ConsumeIntegral() % 10) + 1; + std::string s = fdp.ConsumeBytesAsString(slen); + PyRef item(fuzz_bytes_to_str(s, str_enc)); + if (!item) { PyErr_Clear(); continue; } + PyList_Append(lst, item); + } + PyRef r = PyObject_CallFunction(json_dumps, "O", (PyObject *)lst); + break; + } + case ENCODE_NO_ASCII: { + // JSONEncoder(ensure_ascii=False).encode(str) + PyRef kwargs = PyDict_New(); + CHECK(kwargs); + PyDict_SetItemString(kwargs, "ensure_ascii", Py_False); + PyRef empty = PyTuple_New(0); + CHECK(empty); + PyRef enc = PyObject_Call(json_JSONEncoder, empty, kwargs); + CHECK(enc); + PyRef r = PyObject_CallMethod(enc, "encode", "O", (PyObject *)pystr); + break; + } + case ENCODE_ASCII: { + // JSONEncoder(ensure_ascii=True).encode(str) + PyRef kwargs = PyDict_New(); + CHECK(kwargs); + PyDict_SetItemString(kwargs, "ensure_ascii", Py_True); + PyRef empty = PyTuple_New(0); + CHECK(empty); + PyRef enc = PyObject_Call(json_JSONEncoder, empty, kwargs); + CHECK(enc); + PyRef r = PyObject_CallMethod(enc, "encode", "O", (PyObject *)pystr); + break; + } + case ENCODE_OPTS: { + // JSONEncoder(sort_keys=True, indent=2, ensure_ascii=False).encode({s:s}) + PyRef kwargs = PyDict_New(); + CHECK(kwargs); + PyDict_SetItemString(kwargs, "sort_keys", Py_True); + PyRef indent = PyLong_FromLong(2); + CHECK(indent); + PyDict_SetItemString(kwargs, "indent", indent); + PyDict_SetItemString(kwargs, "ensure_ascii", Py_False); + PyRef empty = PyTuple_New(0); + CHECK(empty); + PyRef enc = PyObject_Call(json_JSONEncoder, empty, kwargs); + CHECK(enc); + PyRef d = PyDict_New(); + CHECK(d); + PyDict_SetItem(d, pystr, pystr); + PyRef r = PyObject_CallMethod(enc, "encode", "O", (PyObject *)d); + break; + } + } + if (PyErr_Occurred()) PyErr_Clear(); +} + +extern "C" int LLVMFuzzerTestOneInput(const uint8_t *data, size_t size) { + assert(Py_IsInitialized()); + init_json(); + if (size < 1 || size > 0x10000) return 0; + if (PyErr_Occurred()) PyErr_Clear(); + + FuzzedDataProvider fdp(data, size); + op_json_encode(fdp); + + return 0; +}