Skip to content

Commit 4f4965f

Browse files
eendebakptclaude
andcommitted
gh-150942: Speed up json.loads array and object decoding
Append parsed values to the result list with _PyList_AppendTakeRef and insert key/value pairs with _PyDict_SetItem_Take2, which take ownership of the references instead of incref-ing on insert and then decref-ing the local. This removes a reference-count round-trip per element (and, on the free-threaded build, a per-append lock). Co-Authored-By: Claude Opus 4.8 (1M context) <noreply@anthropic.com>
1 parent 61ec3c2 commit 4f4965f

2 files changed

Lines changed: 21 additions & 19 deletions

File tree

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,3 @@
1+
Speed up :func:`json.loads` decoding of arrays and objects by storing
2+
parsed values into the result list/dict without an extra reference-count
3+
round-trip (using the internal reference-stealing append/insert helpers).

Modules/_json.c

Lines changed: 18 additions & 19 deletions
Original file line numberDiff line numberDiff line change
@@ -11,6 +11,8 @@
1111
#include "Python.h"
1212
#include "pycore_ceval.h" // _Py_EnterRecursiveCall()
1313
#include "pycore_critical_section.h" // Py_BEGIN_CRITICAL_SECTION_SEQUENCE_FAST()
14+
#include "pycore_dict.h" // _PyDict_SetItem_Take2()
15+
#include "pycore_list.h" // _PyList_AppendTakeRef()
1416
#include "pycore_global_strings.h" // _Py_ID()
1517
#include "pycore_pyerrors.h" // _PyErr_FormatNote
1618
#include "pycore_runtime.h" // _PyRuntime
@@ -752,7 +754,6 @@ _parse_object_unicode(PyScannerObject *s, PyObject *memo, PyObject *pystr, Py_ss
752754
const void *str;
753755
int kind;
754756
Py_ssize_t end_idx;
755-
PyObject *val = NULL;
756757
PyObject *rval = NULL;
757758
PyObject *key = NULL;
758759
int has_pairs_hook = (s->object_pairs_hook != Py_None);
@@ -802,13 +803,16 @@ _parse_object_unicode(PyScannerObject *s, PyObject *memo, PyObject *pystr, Py_ss
802803
while (idx <= end_idx && IS_WHITESPACE(PyUnicode_READ(kind, str, idx))) idx++;
803804

804805
/* read any JSON term */
805-
val = scan_once_unicode(s, memo, pystr, idx, &next_idx);
806+
PyObject *val = scan_once_unicode(s, memo, pystr, idx, &next_idx);
806807
if (val == NULL)
807808
goto bail;
808809

810+
/* The steal below takes our references to both key and val
811+
(releasing them on failure). Only key is reset for the bail
812+
path; val is never live there, so it needs no cleanup. */
809813
if (has_pairs_hook) {
810814
PyObject *item = _PyTuple_FromPairSteal(key, val);
811-
key = val = NULL;
815+
key = NULL;
812816
if (item == NULL)
813817
goto bail;
814818
if (PyList_Append(rval, item) == -1) {
@@ -818,10 +822,10 @@ _parse_object_unicode(PyScannerObject *s, PyObject *memo, PyObject *pystr, Py_ss
818822
Py_DECREF(item);
819823
}
820824
else {
821-
if (PyDict_SetItem(rval, key, val) < 0)
825+
int err = _PyDict_SetItem_Take2((PyDictObject *)rval, key, val);
826+
key = NULL;
827+
if (err < 0)
822828
goto bail;
823-
Py_CLEAR(key);
824-
Py_CLEAR(val);
825829
}
826830
idx = next_idx;
827831

@@ -851,21 +855,20 @@ _parse_object_unicode(PyScannerObject *s, PyObject *memo, PyObject *pystr, Py_ss
851855
*next_idx_ptr = idx + 1;
852856

853857
if (has_pairs_hook) {
854-
val = PyObject_CallOneArg(s->object_pairs_hook, rval);
858+
PyObject *res = PyObject_CallOneArg(s->object_pairs_hook, rval);
855859
Py_DECREF(rval);
856-
return val;
860+
return res;
857861
}
858862

859863
/* if object_hook is not None: rval = object_hook(rval) */
860864
if (s->object_hook != Py_None) {
861-
val = PyObject_CallOneArg(s->object_hook, rval);
865+
PyObject *res = PyObject_CallOneArg(s->object_hook, rval);
862866
Py_DECREF(rval);
863-
return val;
867+
return res;
864868
}
865869
return rval;
866870
bail:
867871
Py_XDECREF(key);
868-
Py_XDECREF(val);
869872
Py_XDECREF(rval);
870873
return NULL;
871874
}
@@ -882,7 +885,6 @@ _parse_array_unicode(PyScannerObject *s, PyObject *memo, PyObject *pystr, Py_ssi
882885
const void *str;
883886
int kind;
884887
Py_ssize_t end_idx;
885-
PyObject *val = NULL;
886888
PyObject *rval;
887889
Py_ssize_t next_idx;
888890
Py_ssize_t comma_idx;
@@ -903,14 +905,12 @@ _parse_array_unicode(PyScannerObject *s, PyObject *memo, PyObject *pystr, Py_ssi
903905
while (1) {
904906

905907
/* read any JSON term */
906-
val = scan_once_unicode(s, memo, pystr, idx, &next_idx);
908+
PyObject *val = scan_once_unicode(s, memo, pystr, idx, &next_idx);
907909
if (val == NULL)
908910
goto bail;
909911

910-
if (PyList_Append(rval, val) == -1)
912+
if (_PyList_AppendTakeRef((PyListObject *)rval, val) < 0)
911913
goto bail;
912-
913-
Py_CLEAR(val);
914914
idx = next_idx;
915915

916916
/* skip whitespace between term and , */
@@ -944,13 +944,12 @@ _parse_array_unicode(PyScannerObject *s, PyObject *memo, PyObject *pystr, Py_ssi
944944
*next_idx_ptr = idx + 1;
945945
/* if array_hook is not None: return array_hook(rval) */
946946
if (!Py_IsNone(s->array_hook)) {
947-
val = PyObject_CallOneArg(s->array_hook, rval);
947+
PyObject *res = PyObject_CallOneArg(s->array_hook, rval);
948948
Py_DECREF(rval);
949-
return val;
949+
return res;
950950
}
951951
return rval;
952952
bail:
953-
Py_XDECREF(val);
954953
Py_DECREF(rval);
955954
return NULL;
956955
}

0 commit comments

Comments
 (0)