Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
79 changes: 79 additions & 0 deletions module-fuzzers/fuzz_json_decode.cpp
Original file line number Diff line number Diff line change
@@ -0,0 +1,79 @@
// fuzz_json_decode.cpp — Fuzzer for CPython's _json C extension module (decoding).
//
// This fuzzer exercises the following CPython C extension module via
// its Python API, called through the Python C API from C++:
//
// _json — json.loads(str), JSONDecoder().decode(str),
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

We already fuzz json.loads() in json.py?

// JSONDecoder().raw_decode(str)
//
// The first two bytes of fuzz input select string encoding and target
// function. Remaining bytes become the input string. Each target makes
// a single call. Exercises the _json C acceleration module's scanning,
// string unescaping, number parsing, and recursive container building.
//
// All module functions are imported once during init and cached as static
// PyObject* pointers. PyRef (RAII) prevents reference leaks.
// Max input size: 64 KB.

#include "fuzz_helpers.h"

static PyObject *json_loads, *json_JSONDecoder;

static int initialized = 0;

static void init_json_decode(void) {
if (initialized) return;

json_loads = import_attr("json", "loads");
json_JSONDecoder = import_attr("json", "JSONDecoder");
assert(!PyErr_Occurred());
initialized = 1;
}

// op_json_decode: the fuzzer selects one of 3 targets — json.loads(str),
// JSONDecoder().decode(str), or JSONDecoder().raw_decode(str).
// Exercises the _json C acceleration module's decoding paths.
static void op_json_decode(FuzzedDataProvider &fdp) {
int str_enc = fdp.ConsumeIntegralInRange<int>(0, 3);
enum { LOADS, DECODE, RAW_DECODE, NUM_TARGETS };
int target_fn = fdp.ConsumeIntegralInRange<int>(0, NUM_TARGETS - 1);
if (fdp.remaining_bytes() == 0) return;
std::string data = fdp.ConsumeRemainingBytesAsString();
PyRef pystr(fuzz_bytes_to_str(data, str_enc));
CHECK(pystr);

switch (target_fn) {
case LOADS: {
// json.loads(str)
PyRef r = PyObject_CallFunction(json_loads, "O", (PyObject *)pystr);
break;
}
case DECODE: {
// JSONDecoder().decode(str)
PyRef dec = PyObject_CallFunction(json_JSONDecoder, NULL);
CHECK(dec);
PyRef r = PyObject_CallMethod(dec, "decode", "O", (PyObject *)pystr);
break;
}
case RAW_DECODE: {
// JSONDecoder().raw_decode(str)
PyRef dec = PyObject_CallFunction(json_JSONDecoder, NULL);
CHECK(dec);
PyRef r = PyObject_CallMethod(dec, "raw_decode", "O", (PyObject *)pystr);
break;
}
}
if (PyErr_Occurred()) PyErr_Clear();
}

extern "C" int LLVMFuzzerTestOneInput(const uint8_t *data, size_t size) {
assert(Py_IsInitialized());
init_json_decode();
if (size < 1 || size > 0x10000) return 0;
if (PyErr_Occurred()) PyErr_Clear();

FuzzedDataProvider fdp(data, size);
op_json_decode(fdp);

return 0;
}
198 changes: 198 additions & 0 deletions module-fuzzers/fuzz_json_encode.cpp
Original file line number Diff line number Diff line change
@@ -0,0 +1,198 @@
// fuzz_json_encode.cpp — Fuzzer for CPython's _json C extension module (encoding).
//
// This fuzzer exercises the following CPython C extension module via
// its Python API, called through the Python C API from C++:
//
// _json — json.dumps(str), json.dumps({k:v,...} with
// fuzz-typed keys/values),
// json.dumps([str,...] with 1-10 unique strings),
// JSONEncoder(ensure_ascii=True/False).encode(),
// JSONEncoder(sort_keys, indent, ensure_ascii).encode()
//
// All module functions are imported once during init and cached as static
// PyObject* pointers. PyRef (RAII) prevents reference leaks.
// Max input size: 64 KB.

#include "fuzz_helpers.h"

static PyObject *json_dumps, *json_JSONEncoder;

static int initialized = 0;

static void init_json(void) {
if (initialized) return;

json_dumps = import_attr("json", "dumps");
json_JSONEncoder = import_attr("json", "JSONEncoder");
assert(!PyErr_Occurred());
initialized = 1;
}

// Build a fuzz-chosen JSON-serializable Python object.
// key_only=true restricts to hashable types (str, int, float, bool, None).
// depth limits recursion for nested list/dict values.
static PyObject *make_json_value(FuzzedDataProvider &fdp, int str_enc,
bool key_only, int depth = 0) {
enum { T_STR, T_INT, T_FLOAT, T_BOOL, T_NONE, T_LIST, T_DICT, NUM_TYPES };
int max_type = (key_only || depth >= 3) ? T_NONE : (NUM_TYPES - 1);
int t = fdp.ConsumeIntegralInRange<int>(0, max_type);
switch (t) {
case T_STR: {
size_t slen = (fdp.ConsumeIntegral<uint16_t>() % 10000) + 1;
std::string s = fdp.ConsumeBytesAsString(slen);
return fuzz_bytes_to_str(s, str_enc);
}
case T_INT:
return PyLong_FromLong(fdp.ConsumeIntegral<int32_t>());
case T_FLOAT:
return PyFloat_FromDouble(fdp.ConsumeFloatingPoint<double>());
case T_BOOL: {
PyObject *b = fdp.ConsumeBool() ? Py_True : Py_False;
Py_INCREF(b);
return b;
}
case T_NONE:
Py_INCREF(Py_None);
return Py_None;
case T_LIST: {
int count = fdp.ConsumeIntegralInRange<int>(0, 3);
PyObject *lst = PyList_New(0);
if (!lst) return NULL;
for (int i = 0; i < count; i++) {
PyObject *item = make_json_value(fdp, str_enc, false, depth + 1);
if (!item) { PyErr_Clear(); continue; }
PyList_Append(lst, item);
Py_DECREF(item);
}
return lst;
}
case T_DICT: {
int count = fdp.ConsumeIntegralInRange<int>(0, 3);
PyObject *d = PyDict_New();
if (!d) return NULL;
for (int i = 0; i < count; i++) {
PyObject *k = make_json_value(fdp, str_enc, true, depth + 1);
if (!k) { PyErr_Clear(); continue; }
PyObject *v = make_json_value(fdp, str_enc, false, depth + 1);
if (!v) { Py_DECREF(k); PyErr_Clear(); continue; }
PyDict_SetItem(d, k, v);
Py_DECREF(k);
Py_DECREF(v);
}
return d;
}
default:
Py_INCREF(Py_None);
return Py_None;
}
}

// op_json_encode: the fuzzer selects the target: json.dumps(str),
// json.dumps({k:v,...} with 1-5 fuzz-typed entries),
// json.dumps([1-10 unique strs]), or JSONEncoder with options.
// Exercises the _json C acceleration module's encoding paths.
static void op_json_encode(FuzzedDataProvider &fdp) {
int str_enc = fdp.ConsumeIntegralInRange<int>(0, 3);
enum { DUMPS_STR, DUMPS_DICT, DUMPS_LIST, ENCODE_NO_ASCII, ENCODE_ASCII, ENCODE_OPTS, NUM_TARGETS };
int target_fn = fdp.ConsumeIntegralInRange<int>(0, NUM_TARGETS - 1);
if (fdp.remaining_bytes() == 0) return;
size_t data_len = fdp.ConsumeIntegralInRange<size_t>(
1, std::min(fdp.remaining_bytes(), (size_t)10000));
std::string data = fdp.ConsumeBytesAsString(data_len);
PyRef pystr(fuzz_bytes_to_str(data, str_enc));
CHECK(pystr);

switch (target_fn) {
case DUMPS_STR: {
// json.dumps(str)
PyRef r = PyObject_CallFunction(json_dumps, "O", (PyObject *)pystr);
break;
}
case DUMPS_DICT: {
// json.dumps({k: v, ...}) — 1 to 5 entries, fuzz-chosen types.
int count = fdp.ConsumeIntegralInRange<int>(1, 5);
PyRef d = PyDict_New();
CHECK(d);
for (int i = 0; i < count; i++) {
PyRef k(make_json_value(fdp, str_enc, true));
if (!k) { PyErr_Clear(); continue; }
PyRef v(make_json_value(fdp, str_enc, false));
if (!v) { PyErr_Clear(); continue; }
PyDict_SetItem(d, k, v);
}
PyRef r = PyObject_CallFunction(json_dumps, "O", (PyObject *)d);
break;
}
case DUMPS_LIST: {
// json.dumps([str, str, ...]) — 1 to 10 unique fuzz strings.
int count = fdp.ConsumeIntegralInRange<int>(1, 10);
PyRef lst = PyList_New(0);
CHECK(lst);
for (int i = 0; i < count; i++) {
size_t slen = (fdp.ConsumeIntegral<uint8_t>() % 10) + 1;
std::string s = fdp.ConsumeBytesAsString(slen);
PyRef item(fuzz_bytes_to_str(s, str_enc));
if (!item) { PyErr_Clear(); continue; }
PyList_Append(lst, item);
}
PyRef r = PyObject_CallFunction(json_dumps, "O", (PyObject *)lst);
break;
}
case ENCODE_NO_ASCII: {
// JSONEncoder(ensure_ascii=False).encode(str)
PyRef kwargs = PyDict_New();
CHECK(kwargs);
PyDict_SetItemString(kwargs, "ensure_ascii", Py_False);
PyRef empty = PyTuple_New(0);
CHECK(empty);
PyRef enc = PyObject_Call(json_JSONEncoder, empty, kwargs);
CHECK(enc);
PyRef r = PyObject_CallMethod(enc, "encode", "O", (PyObject *)pystr);
break;
}
case ENCODE_ASCII: {
// JSONEncoder(ensure_ascii=True).encode(str)
PyRef kwargs = PyDict_New();
CHECK(kwargs);
PyDict_SetItemString(kwargs, "ensure_ascii", Py_True);
PyRef empty = PyTuple_New(0);
CHECK(empty);
PyRef enc = PyObject_Call(json_JSONEncoder, empty, kwargs);
CHECK(enc);
PyRef r = PyObject_CallMethod(enc, "encode", "O", (PyObject *)pystr);
break;
}
case ENCODE_OPTS: {
// JSONEncoder(sort_keys=True, indent=2, ensure_ascii=False).encode({s:s})
PyRef kwargs = PyDict_New();
CHECK(kwargs);
PyDict_SetItemString(kwargs, "sort_keys", Py_True);
PyRef indent = PyLong_FromLong(2);
CHECK(indent);
PyDict_SetItemString(kwargs, "indent", indent);
PyDict_SetItemString(kwargs, "ensure_ascii", Py_False);
PyRef empty = PyTuple_New(0);
CHECK(empty);
PyRef enc = PyObject_Call(json_JSONEncoder, empty, kwargs);
CHECK(enc);
PyRef d = PyDict_New();
CHECK(d);
PyDict_SetItem(d, pystr, pystr);
PyRef r = PyObject_CallMethod(enc, "encode", "O", (PyObject *)d);
break;
}
}
if (PyErr_Occurred()) PyErr_Clear();
}

extern "C" int LLVMFuzzerTestOneInput(const uint8_t *data, size_t size) {
assert(Py_IsInitialized());
init_json();
if (size < 1 || size > 0x10000) return 0;
if (PyErr_Occurred()) PyErr_Clear();

FuzzedDataProvider fdp(data, size);
op_json_encode(fdp);

return 0;
}