diff --git a/.agents/skills/third-party-package-patches/SKILL.md b/.agents/skills/third-party-package-patches/SKILL.md new file mode 100644 index 0000000000..e79e4c3ae5 --- /dev/null +++ b/.agents/skills/third-party-package-patches/SKILL.md @@ -0,0 +1,97 @@ +--- +name: third-party-package-patches +description: Create or update GraalPy third-party package compatibility patches under graalpython/lib-graalpython/patches, including PyPI source preparation, rebasing existing patches, metadata.toml updates, license checks, version-range validation, and verify_patches.py validation. +--- + +# Third-Party Package Patches + +Use this skill when creating or updating compatibility patches for packages installed by pip on GraalPy. + +## Key Files +- Source preparation: `scripts/get_pypi_source.py` +- Patch metadata: `graalpython/lib-graalpython/patches/metadata.toml` +- Patch directory: `graalpython/lib-graalpython/patches/` +- Metadata verifier: `mx.graalpython/verify_patches.py` + +## Workflow +1. Identify the package name and target version. Use the normalized package key used by PyPI/pip: lowercase with runs of `-`, `_`, and `.` normalized to `-`. + +2. Prepare source with the repo helper: +```bash +python scripts/get_pypi_source.py package==version +``` +The script prints `Prepared source at: ...`. Use that directory as the working tree. It is already a temporary git repository with an initial commit, and it has already been processed by `graalpython/lib-graalpython/modules/autopatch_capi.py`. + +3. Inspect `graalpython/lib-graalpython/patches/metadata.toml` for existing `[[package.rules]]` entries. +- If a matching patch exists for the requested version, apply it first. +- If only a nearby version has a patch, try applying that patch and rebase it carefully onto the prepared source. +- Honor `subdir` when present: pip applies sdist patches from that subdirectory. Apply and generate the patch from the same directory layout that the metadata rule will use. +- Honor `dist-type` when choosing whether the patch is for `sdist`, `wheel`, or both. + +4. Apply existing patches using the same semantics as pip where practical: +```bash +patch -f -d /tmp/package-version-... -p1 -i /path/to/graalpython/lib-graalpython/patches/existing.patch +``` +If the rule has `subdir = 'src'`, use `-d /tmp/package-version-.../src`. Resolve rejects by editing source files in the temporary repository, then remove any `.rej`/`.orig` files after checking them. Search for conflict markers and rejects before staging. + +5. Make the GraalPy compatibility changes in the prepared source. Keep the patch minimal and package-focused. + +6. Stage the desired changes in the temporary source repository: +```bash +git add -A +git diff --cached +``` +Review the staged diff. Do not include generated caches, build outputs, rejected patch files, or unrelated churn. + +7. Create or refresh the patch file only from the staged git diff: +```bash +git diff --cached > /path/to/graalpython/lib-graalpython/patches/package-version.patch +``` +Guardrail: do not hand-edit patch files. If the patch output is wrong, fix the temporary source tree, adjust staging, and regenerate with `git diff --cached`. + +8. Update `metadata.toml` if needed. +- Add a new `[[package.rules]]` entry when no suitable one exists. +- Keep existing precedent for rule ordering, patch names, comments, `install-priority`, `dist-type`, and `subdir`. +- Every rule with `patch = ...` needs `license = ...`. +- When adding a new patch entry, confirm the license from PyPI metadata, preferably the JSON API for the exact release or current package metadata. Use an SPDX identifier accepted by `mx.graalpython/verify_patches.py`. +- If upstream publishes no suitable PyPI source artifact, add `[[package.add-sources]]` with the exact version and release tarball URL, then rerun `scripts/get_pypi_source.py`. + +9. Choose the version range deliberately. +- Prefer one patch over many when the same patch applies cleanly and the underlying package layout/API is stable. +- Test every version covered by a widened range, including lower and upper boundary releases. +- Small, robust patches may use an open-ended range when they are unlikely to break in future versions. +- If newer versions no longer need a patch, add a no-patch rule with a note rather than leaving users pointed at stale patched versions. + +10. Validate patch application for the covered versions. +- For each version in the rule range that you claim to support, prepare a fresh source tree with `scripts/get_pypi_source.py`. +- Apply the generated patch with `patch -f -p1` from the root or `subdir` exactly as the metadata rule requires. +- Check for nonzero exit status, `.rej` files, `.orig` files, unexpected unstaged files, and conflict markers. + +11. Run the repository verifier before finishing: +```bash +python mx.graalpython/verify_patches.py graalpython/lib-graalpython/patches +``` + +12. If you were asked to build or test the patched package, you need to rebuild GraalPy with `mx python-jvm` to pick up the changes. Create a venv with `mx python -m venv venv_name` and use it for building and testing. + +## Metadata Reference +Rule keys accepted by the verifier are: +- `version` +- `patch` +- `license` +- `subdir` +- `dist-type` +- `install-priority` +- `note` + +Allowed `dist-type` values are `wheel` and `sdist`. + +Allowed license identifiers are maintained in `mx.graalpython/verify_patches.py`. If PyPI metadata is ambiguous, inspect the source distribution license files and report the ambiguity instead of guessing. + +## Reporting +When done, report: +- package and version(s) tested +- patch file created or updated +- metadata rule added or changed +- PyPI license value used +- `verify_patches.py` result diff --git a/graalpython/lib-graalpython/patches/librt-0.10.0.patch b/graalpython/lib-graalpython/patches/librt-0.10.0.patch new file mode 100644 index 0000000000..e3e7e555ee --- /dev/null +++ b/graalpython/lib-graalpython/patches/librt-0.10.0.patch @@ -0,0 +1,196 @@ +diff --git a/CPy.h b/CPy.h +index 89ef4d0..a763914 100644 +--- a/CPy.h ++++ b/CPy.h +@@ -333,6 +333,7 @@ static inline bool CPyTagged_IsLe(CPyTagged left, CPyTagged right) { + } + + static inline int64_t CPyLong_AsInt64(PyObject *o) { ++#ifndef GRAALPY_VERSION_NUM + if (likely(PyLong_Check(o))) { + PyLongObject *lobj = (PyLongObject *)o; + #if CPY_3_12_FEATURES +@@ -349,11 +350,13 @@ static inline int64_t CPyLong_AsInt64(PyObject *o) { + } + #endif + } ++#endif + // Slow path + return CPyLong_AsInt64_(o); + } + + static inline int32_t CPyLong_AsInt32(PyObject *o) { ++#ifndef GRAALPY_VERSION_NUM + if (likely(PyLong_Check(o))) { + #if CPY_3_12_FEATURES + PyLongObject *lobj = (PyLongObject *)o; +@@ -375,11 +378,13 @@ static inline int32_t CPyLong_AsInt32(PyObject *o) { + } + #endif + } ++#endif + // Slow path + return CPyLong_AsInt32_(o); + } + + static inline int16_t CPyLong_AsInt16(PyObject *o) { ++#ifndef GRAALPY_VERSION_NUM + if (likely(PyLong_Check(o))) { + #if CPY_3_12_FEATURES + PyLongObject *lobj = (PyLongObject *)o; +@@ -405,11 +410,13 @@ static inline int16_t CPyLong_AsInt16(PyObject *o) { + } + #endif + } ++#endif + // Slow path + return CPyLong_AsInt16_(o); + } + + static inline uint8_t CPyLong_AsUInt8(PyObject *o) { ++#ifndef GRAALPY_VERSION_NUM + if (likely(PyLong_Check(o))) { + #if CPY_3_12_FEATURES + PyLongObject *lobj = (PyLongObject *)o; +@@ -435,6 +442,7 @@ static inline uint8_t CPyLong_AsUInt8(PyObject *o) { + } + #endif + } ++#endif + // Slow path + return CPyLong_AsUInt8_(o); + } +diff --git a/mypyc_util.h b/mypyc_util.h +index 6309ed5..2851067 100644 +--- a/mypyc_util.h ++++ b/mypyc_util.h +@@ -82,21 +82,17 @@ + + static inline void CPy_INCREF_NO_IMM(PyObject *op) + { +- Py_REFCNT(op)++; ++ Py_INCREF(op); + } + + static inline void CPy_DECREF_NO_IMM(PyObject *op) + { +- if ((Py_SET_REFCNT(op, Py_REFCNT(op) - 1), Py_REFCNT(op)) == 0) { +- _Py_Dealloc(op); +- } ++ Py_DECREF(op); + } + + static inline void CPy_XDECREF_NO_IMM(PyObject *op) + { +- if (op != NULL && (Py_SET_REFCNT(op, Py_REFCNT(op) - 1), Py_REFCNT(op)) == 0) { +- _Py_Dealloc(op); +- } ++ Py_XDECREF(op); + } + + #define CPy_INCREF_NO_IMM(op) CPy_INCREF_NO_IMM((PyObject *)(op)) +diff --git a/pythonsupport.c b/pythonsupport.c +index 0a99f0a..1a442bb 100644 +--- a/pythonsupport.c ++++ b/pythonsupport.c +@@ -108,7 +108,46 @@ init_subclass(PyTypeObject *type, PyObject *kwds) + return 0; + } + +-#if CPY_3_12_FEATURES ++#ifdef GRAALPY_VERSION_NUM ++ ++Py_ssize_t ++CPyLong_AsSsize_tAndOverflow_(PyObject *vv, int *overflow) ++{ ++ Py_ssize_t res; ++ ++ *overflow = 0; ++ ++ res = PyLong_AsSsize_t(vv); ++ if (res == -1 && PyErr_Occurred()) { ++ PyObject *zero; ++ int is_negative; ++ ++ if (!PyErr_ExceptionMatches(PyExc_OverflowError)) { ++ return -1; ++ } ++ PyErr_Clear(); ++ ++ zero = PyLong_FromLong(0); ++ if (zero == NULL) { ++ return -1; ++ } ++ is_negative = PyObject_RichCompareBool(vv, zero, Py_LT); ++ Py_DECREF(zero); ++ if (is_negative < 0) { ++ return -1; ++ } ++ *overflow = is_negative ? -1 : 1; ++ return -1; ++ } ++ ++ if ((size_t)res > CPY_TAGGED_MAX && (res >= 0 || res < CPY_TAGGED_MIN)) { ++ *overflow = res < 0 ? -1 : 1; ++ return -1; ++ } ++ return res; ++} ++ ++#elif CPY_3_12_FEATURES + + // Slow path of CPyLong_AsSsize_tAndOverflow (non-inlined) + Py_ssize_t +diff --git a/pythonsupport.h b/pythonsupport.h +index 6f38a9b..0e89511 100644 +--- a/pythonsupport.h ++++ b/pythonsupport.h +@@ -40,7 +40,15 @@ int init_subclass(PyTypeObject *type, PyObject *kwds); + Py_ssize_t + CPyLong_AsSsize_tAndOverflow_(PyObject *vv, int *overflow); + +-#if CPY_3_12_FEATURES ++#ifdef GRAALPY_VERSION_NUM ++ ++static inline Py_ssize_t ++CPyLong_AsSsize_tAndOverflow(PyObject *vv, int *overflow) ++{ ++ return CPyLong_AsSsize_tAndOverflow_(vv, overflow); ++} ++ ++#elif CPY_3_12_FEATURES + + static inline Py_ssize_t + CPyLong_AsSsize_tAndOverflow(PyObject *vv, int *overflow) +@@ -117,6 +125,7 @@ CPyLong_AsSsize_tAndOverflow(PyObject *vv, int *overflow) + #endif + + // Adapted from listobject.c in Python 3.7.0 ++#if 0 // GraalPy change + static int + list_resize(PyListObject *self, Py_ssize_t newsize) + { +@@ -162,6 +171,7 @@ list_resize(PyListObject *self, Py_ssize_t newsize) + self->allocated = new_allocated; + return 0; + } ++#endif + + // Changed to use PyList_SetSlice instead of the internal list_ass_slice + static PyObject * +@@ -182,6 +192,7 @@ list_pop_impl(PyListObject *self, Py_ssize_t index) + return NULL; + } + v = PySequence_Fast_ITEMS((PyObject*)self)[index]; ++#if 0 // GraalPy change + if (index == Py_SIZE(self) - 1) { + status = list_resize(self, Py_SIZE(self) - 1); + if (status >= 0) +@@ -189,6 +200,7 @@ list_pop_impl(PyListObject *self, Py_ssize_t index) + else + return NULL; + } ++#endif + Py_INCREF(v); + status = PyList_SetSlice((PyObject *)self, index, index+1, (PyObject *)NULL); + if (status < 0) { diff --git a/graalpython/lib-graalpython/patches/metadata.toml b/graalpython/lib-graalpython/patches/metadata.toml index 14d47f6d52..ac35b422e1 100644 --- a/graalpython/lib-graalpython/patches/metadata.toml +++ b/graalpython/lib-graalpython/patches/metadata.toml @@ -390,7 +390,13 @@ patch = 'numba-0.59.1.patch' license = 'BSD-2-Clause' [[numpy.rules]] -version = '>= 2.0.0rc1, < 2.3' +version = '>= 2.4, < 2.4.5' +patch = 'numpy-2.4.4.patch' +license = 'BSD-3-Clause' +dist-type = 'sdist' + +[[numpy.rules]] +version = '>= 2.0.0rc1, < 2.4' patch = 'numpy-2.0.0.patch' license = 'BSD-3-Clause' dist-type = 'sdist' @@ -428,6 +434,48 @@ dist-type = 'sdist' version = '1.22.1' url = 'https://github.com/microsoft/onnxruntime/archive/refs/tags/v1.22.1.tar.gz' +[[onnxruntime.rules]] +version = '>= 1.23, < 1.26' +patch = 'onnxruntime-1.25.1.patch' +license = 'MIT' +dist-type = 'sdist' + +[[onnxruntime.add-sources]] +version = '1.23.0' +url = 'https://github.com/microsoft/onnxruntime/archive/refs/tags/v1.23.0.tar.gz' + +[[onnxruntime.add-sources]] +version = '1.23.1' +url = 'https://github.com/microsoft/onnxruntime/archive/refs/tags/v1.23.1.tar.gz' + +[[onnxruntime.add-sources]] +version = '1.23.2' +url = 'https://github.com/microsoft/onnxruntime/archive/refs/tags/v1.23.2.tar.gz' + +[[onnxruntime.add-sources]] +version = '1.24.1' +url = 'https://github.com/microsoft/onnxruntime/archive/refs/tags/v1.24.1.tar.gz' + +[[onnxruntime.add-sources]] +version = '1.24.2' +url = 'https://github.com/microsoft/onnxruntime/archive/refs/tags/v1.24.2.tar.gz' + +[[onnxruntime.add-sources]] +version = '1.24.3' +url = 'https://github.com/microsoft/onnxruntime/archive/refs/tags/v1.24.3.tar.gz' + +[[onnxruntime.add-sources]] +version = '1.24.4' +url = 'https://github.com/microsoft/onnxruntime/archive/refs/tags/v1.24.4.tar.gz' + +[[onnxruntime.add-sources]] +version = '1.25.0' +url = 'https://github.com/microsoft/onnxruntime/archive/refs/tags/v1.25.0.tar.gz' + +[[onnxruntime.add-sources]] +version = '1.25.1' +url = 'https://github.com/microsoft/onnxruntime/archive/refs/tags/v1.25.1.tar.gz' + [[optree.rules]] patch = 'optree.patch' license = 'Apache-2.0' @@ -447,6 +495,11 @@ version = '== 3.10.18' patch = 'orjson-3.10.18.patch' license = 'Apache-2.0 OR MIT' +[[orjson.rules]] +version = '== 3.11.9' +patch = 'orjson-3.11.9.patch' +license = 'Apache-2.0 OR MIT' + [[ormsgpack.rules]] version = '>= 1.8.0, <= 1.9.1' patch = 'ormsgpack-1.8.0-1.9.1.patch' @@ -468,6 +521,18 @@ install-priority = 0 patch = 'packaging.patch' license = 'Apache-2.0 OR BSD-2-Clause' +[[pandas.rules]] +version = '>= 3.0, < 3.0.3' +patch = 'pandas-3.0.2.patch' +license = 'BSD-3-Clause' +dist-type = 'sdist' + +[[pandas.rules]] +version = '>= 2.3, < 2.4' +patch = 'pandas-2.3.3.patch' +license = 'BSD-3-Clause' +dist-type = 'sdist' + [[pandas.rules]] version = '== 2.2.2' patch = 'pandas-2.2.2.patch' @@ -558,6 +623,16 @@ patch = 'py4j.patch' license = 'BSD-3-Clause' subdir = 'src' +[[pyarrow.rules]] +version = '== 24.0.0' +patch = 'pyarrow-24.0.0.patch' +license = 'Apache-2.0' + +[[pyarrow.rules]] +version = '>= 21.0.0, < 24.0.0' +patch = 'pyarrow-22.0.0.patch' +license = 'Apache-2.0' + [[pyarrow.rules]] version = '== 15.0.0' patch = 'pyarrow-15.0.0.patch' @@ -1084,6 +1159,11 @@ version = '== 0.9.0' patch = 'librt-0.9.0.patch' license = 'MIT' +[[librt.rules]] +version = '== 0.10.0' +patch = 'librt-0.10.0.patch' +license = 'MIT' + [[opencv-python.rules]] version = '== 4.12.*' patch = 'opencv-python-4.12.patch' diff --git a/graalpython/lib-graalpython/patches/numpy-2.4.4.patch b/graalpython/lib-graalpython/patches/numpy-2.4.4.patch new file mode 100644 index 0000000000..8296b9f717 --- /dev/null +++ b/graalpython/lib-graalpython/patches/numpy-2.4.4.patch @@ -0,0 +1,133 @@ +diff --git a/numpy/_core/include/numpy/ndarrayobject.h b/numpy/_core/include/numpy/ndarrayobject.h +index 6bfc40f..046ff74 100644 +--- a/numpy/_core/include/numpy/ndarrayobject.h ++++ b/numpy/_core/include/numpy/ndarrayobject.h +@@ -220,7 +220,7 @@ NPY_TITLE_KEY_check(PyObject *key, PyObject *value) + if (key == title) { + return 1; + } +-#ifdef PYPY_VERSION ++#if defined(PYPY_VERSION) || defined(GRAALVM_PYTHON) + /* + * On PyPy, dictionary keys do not always preserve object identity. + * Fall back to comparison by value. +diff --git a/numpy/_core/src/multiarray/compiled_base.c b/numpy/_core/src/multiarray/compiled_base.c +index 6e37968..010bd03 100644 +--- a/numpy/_core/src/multiarray/compiled_base.c ++++ b/numpy/_core/src/multiarray/compiled_base.c +@@ -1462,6 +1462,7 @@ fail: + NPY_NO_EXPORT PyObject * + arr_add_docstring(PyObject *NPY_UNUSED(dummy), PyObject *const *args, Py_ssize_t len_args) + { ++#if 0 // GraalPy change + PyObject *obj; + PyObject *str; + const char *docstr; +@@ -1566,6 +1567,7 @@ arr_add_docstring(PyObject *NPY_UNUSED(dummy), PyObject *const *args, Py_ssize_t + } + + #undef _ADDDOC ++#endif // GraalPy change + + Py_RETURN_NONE; + } +diff --git a/numpy/_core/src/multiarray/shape.c b/numpy/_core/src/multiarray/shape.c +index 4742a8f..2e5793a 100644 +--- a/numpy/_core/src/multiarray/shape.c ++++ b/numpy/_core/src/multiarray/shape.c +@@ -88,11 +88,16 @@ PyArray_Resize_int(PyArrayObject *self, PyArray_Dims *newshape, int refcheck) + return -1; + } + if (refcheck) { +-#ifdef PYPY_VERSION ++#if defined(PYPY_VERSION) + PyErr_SetString(PyExc_ValueError, + "cannot resize an array with refcheck=True on PyPy.\n" + "Use the np.resize function or refcheck=False"); + return -1; ++#elif defined(GRAALVM_PYTHON) ++ PyErr_SetString(PyExc_ValueError, ++ "cannot resize an array with refcheck=True on GraalPy.\n" ++ "Use the np.resize function or refcheck=False"); ++ return -1; + #else + #if PY_VERSION_HEX >= 0x030E00B0 + // Python 3.14 changed reference counting semantics for function- +diff --git a/numpy/_core/src/multiarray/stringdtype/dtype.c b/numpy/_core/src/multiarray/stringdtype/dtype.c +index f64b21c..9fd315f 100644 +--- a/numpy/_core/src/multiarray/stringdtype/dtype.c ++++ b/numpy/_core/src/multiarray/stringdtype/dtype.c +@@ -1026,7 +1026,7 @@ init_string_dtype(void) + }; + + /* Loaded dynamically, so needs to be set here: */ +- Py_TYPE(((PyObject *)&PyArray_StringDType)) = &PyArrayDTypeMeta_Type; ++ Py_SET_TYPE(((PyObject *)&PyArray_StringDType), &PyArrayDTypeMeta_Type); + ((PyTypeObject *)&PyArray_StringDType)->tp_base = &PyArrayDescr_Type; + if (PyType_Ready((PyTypeObject *)&PyArray_StringDType) < 0) { + return -1; +diff --git a/numpy/_core/src/multiarray/temp_elide.c b/numpy/_core/src/multiarray/temp_elide.c +index ea6cac0..f175a10 100644 +--- a/numpy/_core/src/multiarray/temp_elide.c ++++ b/numpy/_core/src/multiarray/temp_elide.c +@@ -59,7 +59,7 @@ + * supported too by using the appropriate Windows APIs. + */ + +-#if defined HAVE_BACKTRACE && defined HAVE_DLFCN_H && ! defined PYPY_VERSION ++#if defined HAVE_BACKTRACE && defined HAVE_DLFCN_H && ! defined PYPY_VERSION && !defined(GRAALVM_PYTHON) + + #include + +diff --git a/numpy/_core/src/npymath/ieee754.c.src b/numpy/_core/src/npymath/ieee754.c.src +index 8fccc9a..93ee333 100644 +--- a/numpy/_core/src/npymath/ieee754.c.src ++++ b/numpy/_core/src/npymath/ieee754.c.src +@@ -8,6 +8,9 @@ + #include "npy_math_private.h" + #include "numpy/utils.h" + ++// GraalPy change ++#include ++ + /* + The below code is provided for compilers which do not yet provide C11 compatibility (gcc 4.5 and older) + */ +@@ -362,6 +365,11 @@ int npy_get_floatstatus_barrier(char* param) + * By using a volatile, the compiler cannot reorder this call + */ + if (param != NULL) { ++ // GraalPy change: the pointer needs to be dereferenced to establish ++ // a data dependency to to ensure the compiler won't reorder the call ++ if (points_to_py_handle_space(param)) { ++ param = (char*)pointer_to_stub(param); ++ } + volatile char NPY_UNUSED(c) = *(char*)param; + } + +diff --git a/numpy/_core/src/npymath/ieee754.cpp b/numpy/_core/src/npymath/ieee754.cpp +index 1c59bf3..0d8cb63 100644 +--- a/numpy/_core/src/npymath/ieee754.cpp ++++ b/numpy/_core/src/npymath/ieee754.cpp +@@ -9,6 +9,9 @@ + #include "npy_math_common.h" + #include "npy_math_private.h" + ++// GraalPy change ++#include ++ + /* + The below code is provided for compilers which do not yet provide C11 + compatibility (gcc 4.5 and older) +@@ -428,6 +431,11 @@ npy_get_floatstatus_barrier(char *param) + * By using a volatile, the compiler cannot reorder this call + */ + if (param != NULL) { ++ // GraalPy change: the pointer needs to be dereferenced to establish ++ // a data dependency to to ensure the compiler won't reorder the call ++ if (points_to_py_handle_space(param)) { ++ param = (char*)pointer_to_stub(param); ++ } + volatile char NPY_UNUSED(c) = *(char *)param; + } + diff --git a/graalpython/lib-graalpython/patches/onnxruntime-1.25.1.patch b/graalpython/lib-graalpython/patches/onnxruntime-1.25.1.patch new file mode 100644 index 0000000000..7e080fc90d --- /dev/null +++ b/graalpython/lib-graalpython/patches/onnxruntime-1.25.1.patch @@ -0,0 +1,105 @@ +diff --git a/onnxruntime_build_backend.py b/onnxruntime_build_backend.py +new file mode 100644 +index 0000000..8c07fa4 +--- /dev/null ++++ b/onnxruntime_build_backend.py +@@ -0,0 +1,60 @@ ++import os ++import re ++import sys ++import tarfile ++import subprocess ++import tempfile ++import shutil ++from pathlib import Path ++ ++VERSION = Path("VERSION_NUMBER").read_text().strip() ++ ++def build_sdist(sdist_directory, config_settings=None): ++ nv = f'onnxruntime-{VERSION}' ++ srcdir = Path(__file__).parent ++ archive_path = Path(sdist_directory) / f'{nv}.tar.gz' ++ ++ def tarfilter(info): ++ if re.match(r'\./(?:.git|venv|[^-/]+-venv|dist)', info.name): ++ return None ++ info.name = f'./{nv}/{info.name}' ++ return info ++ ++ with tarfile.open(archive_path, 'w:gz') as tar: ++ tar.add('.', filter=tarfilter) ++ return archive_path.name ++ ++ ++def build_wheel(wheel_directory, config_settings=None, metadata_directory=None): ++ wheel_directory = Path(wheel_directory).absolute() ++ build_type = 'Release' ++ build_dir = Path(f'build/{build_type}') ++ parallel = os.environ.get('CMAKE_BUILD_PARALLEL_LEVEL', os.cpu_count()) ++ build_cmd = [ ++ sys.executable, ++ 'tools/ci_build/build.py', ++ '--build_dir', 'build', ++ '--skip_submodule_sync', ++ '--skip_tests', ++ '--config', build_type, ++ '--enable_pybind', ++ '--parallel', str(parallel), ++ ] ++ if sys.implementation.name == 'graalpy': ++ # The cmake build downloads a bunch of sources that need to be patched ++ subprocess.check_call(build_cmd) ++ marker = build_dir / 'graalpy-patched-marker' ++ if not marker.exists(): ++ subprocess.check_call([sys.executable, '-m', 'autopatch_capi', '.']) ++ pybind11_dir = build_dir / '_deps/pybind11_project-src' ++ patches_dir = Path(__graalpython__.core_home) / 'patches' ++ with open(patches_dir / 'pybind11-2.11.patch') as f: ++ subprocess.check_call(['patch', '-p2', '-f'], stdin=f, cwd=pybind11_dir) ++ with open(marker, 'w') as f: ++ pass ++ subprocess.check_call([*build_cmd, '--build_wheel']) ++ wheels = list((build_dir / 'dist').glob('*.whl')) ++ assert len(wheels) == 1, f"Expected 1 wheel, found {len(wheels)}" ++ wheel = wheels[0] ++ shutil.copyfile(wheel, wheel_directory / wheel.name) ++ return str(wheel.name) +diff --git a/pyproject.toml b/pyproject.toml +index eed772f..1da5261 100644 +--- a/pyproject.toml ++++ b/pyproject.toml +@@ -1,3 +1,8 @@ ++[build-system] ++requires = ["setuptools >= 40.6.0", "wheel", "packaging", "numpy>=1.24.2"] ++build-backend = "onnxruntime_build_backend" ++backend-path = ["."] ++ + [tool.pydocstyle] + convention = "google" + +diff --git a/tools/ci_build/build.py b/tools/ci_build/build.py +index 4b23101..64c3e38 100644 +--- a/tools/ci_build/build.py ++++ b/tools/ci_build/build.py +@@ -400,6 +400,12 @@ def generate_build_tree( + "-Ddml_EXTERNAL_PROJECT=ON", + ] + ++ if not args.test: ++ cmake_args += [ ++ "-Donnxruntime_BUILD_UNIT_TESTS=OFF", ++ ] ++ ++ + if args.use_gdk: + cmake_args += [ + "-DCMAKE_TOOLCHAIN_FILE=" + os.path.join(source_dir, "cmake", "gdk_toolchain.cmake"), +@@ -2297,7 +2303,7 @@ def main(): + os.environ["ANDROID_NDK_HOME"] = args.android_ndk_path + + if not is_windows() and not is_macOS(): +- if not args.allow_running_as_root: ++ if False and not args.allow_running_as_root: + is_root_user = os.geteuid() == 0 + if is_root_user: + raise BuildError( diff --git a/graalpython/lib-graalpython/patches/orjson-3.11.9.patch b/graalpython/lib-graalpython/patches/orjson-3.11.9.patch new file mode 100644 index 0000000000..5dd42646e5 --- /dev/null +++ b/graalpython/lib-graalpython/patches/orjson-3.11.9.patch @@ -0,0 +1,473 @@ +diff --git a/build.rs b/build.rs +index 09b723e..5347b2c 100644 +--- a/build.rs ++++ b/build.rs +@@ -24,6 +24,7 @@ fn main() { + println!("cargo:rustc-cfg=feature=\"inline_str\""); + } + } ++ pyo3_build_config::PythonImplementation::GraalPy => {} + _ => not_supported(&python_config.implementation.to_string()), + } + +diff --git a/src/deserialize/input.rs b/src/deserialize/input.rs +index faa6d30..18f1ddb 100644 +--- a/src/deserialize/input.rs ++++ b/src/deserialize/input.rs +@@ -3,8 +3,10 @@ + + use crate::deserialize::DeserializeError; + #[cfg(all(CPython, not(Py_GIL_DISABLED)))] +-use crate::ffi::{PyByteArrayRef, PyMemoryViewRef}; +-use crate::ffi::{PyBytesRef, PyStrRef}; ++use crate::ffi::PyMemoryViewRef; ++#[cfg(not(CPython))] ++use crate::ffi::{Py_buffer, PyBUF_C_CONTIGUOUS}; ++use crate::ffi::{PyByteArrayRef, PyBytesRef, PyStrRef}; + use crate::util::INVALID_STR; + use std::borrow::Cow; + +@@ -15,11 +17,23 @@ const INPUT_TYPE_MESSAGE: &str = "Input must be bytes, bytearray, memoryview, or + const INPUT_TYPE_MESSAGE: &str = "Input must be bytes or str"; + + #[cfg(not(CPython))] +-const INPUT_TYPE_MESSAGE: &str = "Input must be bytes, bytearray, or str"; ++const INPUT_TYPE_MESSAGE: &str = "Input must be bytes, bytearray, memoryview, or str"; + +-#[cfg_attr(not(Py_GIL_DISABLED), repr(transparent))] + pub struct Utf8Buffer { + buffer: &'static str, ++ #[cfg(not(CPython))] ++ view: Option, ++} ++ ++#[cfg(not(CPython))] ++impl Drop for Utf8Buffer { ++ fn drop(&mut self) { ++ if let Some(mut view) = self.view { ++ unsafe { ++ crate::ffi::PyBuffer_Release(&raw mut view); ++ } ++ } ++ } + } + + impl Utf8Buffer { +@@ -40,7 +54,7 @@ impl Utf8Buffer { + } + } + +- #[cfg(any(not(CPython), Py_GIL_DISABLED))] ++ #[cfg(all(CPython, Py_GIL_DISABLED))] + fn buffer_from_ptr( + ptr: *mut crate::ffi::PyObject, + ) -> Result, DeserializeError<'static>> { +@@ -53,6 +67,64 @@ impl Utf8Buffer { + } + } + ++ #[cfg(not(CPython))] ++ pub fn from_pyobject( ++ ptr: *mut crate::ffi::PyObject, ++ ) -> Result> { ++ debug_assert!(!ptr.is_null()); ++ let as_str = if let Ok(ob) = PyBytesRef::from_ptr(ptr) { ++ ob.as_str() ++ } else if let Ok(ob) = PyStrRef::from_ptr(ptr) { ++ ob.as_str() ++ } else if let Ok(ob) = PyByteArrayRef::from_ptr(ptr) { ++ ob.as_str() ++ } else { ++ let mut view = Py_buffer::new(); ++ unsafe { ++ if crate::ffi::PyObject_GetBuffer(ptr, &raw mut view, PyBUF_C_CONTIGUOUS) != 0 { ++ crate::ffi::PyErr_Clear(); ++ return Err(DeserializeError::invalid(Cow::Borrowed(INPUT_TYPE_MESSAGE))); ++ } ++ let buffer = core::slice::from_raw_parts( ++ view.buf.cast::().cast_const(), ++ crate::util::isize_to_usize(view.len), ++ ); ++ if core::str::from_utf8(buffer).is_err() { ++ crate::ffi::PyBuffer_Release(&raw mut view); ++ return Err(DeserializeError::invalid(Cow::Borrowed(INVALID_STR))); ++ } ++ let as_str = core::str::from_utf8_unchecked(buffer); ++ if as_str.is_empty() { ++ crate::ffi::PyBuffer_Release(&raw mut view); ++ return Err(DeserializeError::invalid(Cow::Borrowed( ++ "Input is a zero-length, empty document", ++ ))); ++ } ++ return Ok(Self { ++ buffer: as_str, ++ view: Some(view), ++ }); ++ } ++ }; ++ match as_str { ++ Some(buffer) => { ++ if buffer.is_empty() { ++ cold_path!(); ++ Err(DeserializeError::invalid(Cow::Borrowed( ++ "Input is a zero-length, empty document", ++ ))) ++ } else { ++ Ok(Self { buffer, view: None }) ++ } ++ } ++ None => { ++ cold_path!(); ++ Err(DeserializeError::invalid(Cow::Borrowed(INVALID_STR))) ++ } ++ } ++ } ++ ++ #[cfg(CPython)] + pub fn from_pyobject( + ptr: *mut crate::ffi::PyObject, + ) -> Result> { +diff --git a/src/ffi/mod.rs b/src/ffi/mod.rs +index b33af1f..6f4505a 100644 +--- a/src/ffi/mod.rs ++++ b/src/ffi/mod.rs +@@ -8,7 +8,6 @@ pub(crate) mod compat; + mod fragment; + mod numpy; + mod pyboolref; +-#[cfg(all(CPython, not(Py_GIL_DISABLED)))] + mod pybytearrayref; + mod pybytesref; + mod pydateref; +@@ -41,6 +40,7 @@ pub(crate) use { + bytes::{PyBytes_AS_STRING, PyBytes_GET_SIZE, PyBytesObject}, + fragment::{Fragment, orjson_fragmenttype_new}, + pyboolref::PyBoolRef, ++ pybytearrayref::{PyByteArrayRef, PyByteArrayRefError}, + pybytesref::{PyBytesRef, PyBytesRefError}, + pydateref::PyDateRef, + pydatetimeref::PyDateTimeRef, +@@ -59,7 +59,6 @@ pub(crate) use { + #[allow(unused_imports)] + #[cfg(all(CPython, not(Py_GIL_DISABLED)))] + pub(crate) use { +- pybytearrayref::{PyByteArrayRef, PyByteArrayRefError}, + pymemoryview::{PyMemoryViewRef, PyMemoryViewRefError}, + }; + +@@ -77,18 +76,20 @@ pub(crate) use pyo3_ffi::{ + PyDateTime_DATE_GET_TZINFO, PyDateTime_DELTA_GET_DAYS, PyDateTime_DELTA_GET_SECONDS, + PyDateTime_DateTime, PyDateTime_GET_DAY, PyDateTime_GET_MONTH, PyDateTime_GET_YEAR, + PyDateTime_IMPORT, PyDateTime_TIME_GET_HOUR, PyDateTime_TIME_GET_MICROSECOND, +- PyDateTime_TIME_GET_MINUTE, PyDateTime_TIME_GET_SECOND, PyDateTime_Time, PyDict_Contains, +- PyDict_Next, PyDict_SetItem, PyDict_Type, PyDictObject, PyErr_Clear, PyErr_NewException, +- PyErr_Occurred, PyErr_SetObject, PyExc_TypeError, PyException_SetCause, PyFloat_AS_DOUBLE, +- PyFloat_FromDouble, PyFloat_Type, PyImport_ImportModule, PyList_GET_ITEM, PyList_New, +- PyList_SET_ITEM, PyList_Type, PyListObject, PyLong_AsLong, PyLong_AsLongLong, ++ PyDateTime_TIME_GET_MINUTE, PyDateTime_TIME_GET_SECOND, PyDateTime_TIME_GET_TZINFO, ++ PyDateTime_Time, PyDict_Contains, PyDict_Next, PyDict_SetItem, PyDict_Type, PyDictObject, ++ PyErr_Clear, PyErr_NewException, PyErr_Occurred, PyErr_SetObject, PyExc_TypeError, ++ PyException_SetCause, PyFloat_AS_DOUBLE, PyFloat_FromDouble, PyFloat_Type, ++ PyImport_ImportModule, PyList_GET_ITEM, PyList_New, PyList_SET_ITEM, PyList_Type, ++ PyListObject, PyLong_AsLong, PyLong_AsLongLong, + PyLong_AsUnsignedLongLong, PyLong_FromLongLong, PyLong_FromUnsignedLongLong, PyLong_Type, + PyLongObject, PyMapping_GetItemString, PyMem_Free, PyMem_Malloc, PyMem_Realloc, + PyMemoryView_Type, PyMethodDef, PyMethodDefPointer, PyModule_AddIntConstant, + PyModule_AddObject, PyModuleDef, PyModuleDef_HEAD_INIT, PyModuleDef_Init, PyModuleDef_Slot, + PyObject, PyObject_CallFunctionObjArgs, PyObject_CallMethodObjArgs, PyObject_GenericGetDict, +- PyObject_GetAttr, PyObject_HasAttr, PyObject_Hash, PyObject_Vectorcall, PyTuple_New, +- PyTuple_Type, PyTupleObject, PyType_FromSpec, PyType_Slot, PyType_Spec, PyTypeObject, ++ PyBUF_C_CONTIGUOUS, PyBuffer_Release, PyObject_GetAttr, PyObject_GetBuffer, ++ PyObject_HasAttr, PyObject_Hash, PyObject_Vectorcall, PyTuple_New, PyTuple_Type, ++ PyTupleObject, PyType_FromSpec, PyType_Slot, PyType_Spec, PyTypeObject, + PyUnicode_AsUTF8AndSize, PyUnicode_FromStringAndSize, PyUnicode_InternFromString, + PyUnicode_New, PyUnicode_Type, PyVarObject, PyVectorcall_NARGS, + }; +@@ -99,10 +100,44 @@ pub(crate) use pyo3_ffi::PyErr_Restore; + #[cfg(CPython)] + pub(crate) use pyo3_ffi::{PyObject_CallMethodNoArgs, PyObject_CallMethodOneArg}; + ++#[cfg(not(CPython))] ++#[inline(always)] ++#[allow(non_snake_case)] ++pub(crate) unsafe fn PyObject_CallMethodNoArgs( ++ obj: *mut pyo3_ffi::PyObject, ++ name: *mut pyo3_ffi::PyObject, ++) -> *mut pyo3_ffi::PyObject { ++ unsafe { ++ pyo3_ffi::PyObject_CallMethodObjArgs( ++ obj, ++ name, ++ core::ptr::null_mut::(), ++ ) ++ } ++} ++ ++#[cfg(not(CPython))] ++#[inline(always)] ++#[allow(non_snake_case)] ++pub(crate) unsafe fn PyObject_CallMethodOneArg( ++ obj: *mut pyo3_ffi::PyObject, ++ name: *mut pyo3_ffi::PyObject, ++ arg: *mut pyo3_ffi::PyObject, ++) -> *mut pyo3_ffi::PyObject { ++ unsafe { ++ pyo3_ffi::PyObject_CallMethodObjArgs( ++ obj, ++ name, ++ arg, ++ core::ptr::null_mut::(), ++ ) ++ } ++} ++ + #[cfg(all(CPython, not(Py_GIL_DISABLED)))] + pub(crate) use buffer::PyMemoryView_GET_BUFFER; + +-#[cfg(not(feature = "inline_str"))] ++#[cfg(all(CPython, not(feature = "inline_str")))] + pub(crate) use pyo3_ffi::{PyUnicode_DATA, PyUnicode_KIND}; + + #[cfg(Py_3_12)] +diff --git a/src/ffi/pydatetimeref.rs b/src/ffi/pydatetimeref.rs +index 26c0e07..dd0c395 100644 +--- a/src/ffi/pydatetimeref.rs ++++ b/src/ffi/pydatetimeref.rs +@@ -2,8 +2,10 @@ + // Copyright ijl (2025-2026), Ben Sully (2021) + + use crate::typeref::{ +- CONVERT_METHOD_STR, DST_STR, NORMALIZE_METHOD_STR, UTCOFFSET_METHOD_STR, ZONEINFO_TYPE, ++ CONVERT_METHOD_STR, DST_STR, NORMALIZE_METHOD_STR, UTCOFFSET_METHOD_STR, + }; ++#[cfg(CPython)] ++use crate::typeref::ZONEINFO_TYPE; + + use crate::ffi::{PyObject_CallMethodNoArgs, PyObject_CallMethodOneArg, PyObject_HasAttr}; + +@@ -157,7 +159,11 @@ impl PyDateTimeRef { + #[cfg(not(CPython))] + #[inline] + pub fn offset(&self) -> Option { +- unimplemented!() ++ if !self.has_tz() { ++ Some(Offset::default()) ++ } else { ++ self.slow_offset(self.tzinfo()) ++ } + } + + #[cfg(CPython)] +@@ -185,7 +191,6 @@ impl PyDateTimeRef { + } + } + +- #[cfg(CPython)] + #[cold] + #[inline(never)] + fn slow_offset(&self, tzinfo: *mut crate::ffi::PyObject) -> Option { +diff --git a/src/ffi/pystrref/object.rs b/src/ffi/pystrref/object.rs +index 69f8d4d..e28449b 100644 +--- a/src/ffi/pystrref/object.rs ++++ b/src/ffi/pystrref/object.rs +@@ -3,9 +3,10 @@ + + #[allow(unused)] + use crate::ffi::{ +- Py_HashBuffer, Py_SIZE, Py_ssize_t, PyASCIIObject, PyCompactUnicodeObject, PyObject, +- PyUnicode_AsUTF8AndSize, ++ Py_SIZE, Py_ssize_t, PyASCIIObject, PyCompactUnicodeObject, PyObject, PyUnicode_AsUTF8AndSize, + }; ++#[cfg(CPython)] ++use crate::ffi::Py_HashBuffer; + #[cfg(all(CPython, not(feature = "inline_str")))] + use crate::ffi::{PyUnicode_DATA, PyUnicode_KIND}; + use crate::typeref::{EMPTY_UNICODE, STR_TYPE}; +@@ -124,6 +125,12 @@ impl PyStrRef { + obj + } + ++ #[cfg(not(CPython))] ++ #[inline(always)] ++ pub fn from_str_with_hash(buf: &str) -> Self { ++ PyStrRef::from_str(buf) ++ } ++ + #[cfg(CPython)] + #[inline(always)] + pub fn from_str(buf: &str) -> Self { +@@ -166,7 +173,7 @@ impl PyStrRef { + unsafe { crate::ffi::PyUnstable_Unicode_GET_CACHED_HASH(self.as_ptr()) } + } + +- #[cfg(feature = "inline_str")] ++ #[cfg(all(CPython, feature = "inline_str"))] + fn set_hash(&mut self) { + unsafe { + let ptr = self.as_ptr().cast::(); +@@ -187,7 +194,7 @@ impl PyStrRef { + } + } + +- #[cfg(not(feature = "inline_str"))] ++ #[cfg(all(CPython, not(feature = "inline_str")))] + fn set_hash(&mut self) { + unsafe { + let data_ptr = PyUnicode_DATA(self.as_ptr()); +diff --git a/src/ffi/pytimeref.rs b/src/ffi/pytimeref.rs +index 57faa72..04c1ece 100644 +--- a/src/ffi/pytimeref.rs ++++ b/src/ffi/pytimeref.rs +@@ -3,8 +3,10 @@ + + use crate::ffi::{ + PyDateTime_TIME_GET_HOUR, PyDateTime_TIME_GET_MICROSECOND, PyDateTime_TIME_GET_MINUTE, +- PyDateTime_TIME_GET_SECOND, PyDateTime_Time, PyObject, ++ PyDateTime_TIME_GET_SECOND, PyDateTime_TIME_GET_TZINFO, PyObject, + }; ++#[cfg(CPython)] ++use crate::ffi::PyDateTime_Time; + + #[derive(Clone)] + #[repr(transparent)] +@@ -54,7 +56,7 @@ impl PyTimeRef { + #[cfg(not(CPython))] + #[inline] + pub fn has_tz(&self) -> bool { +- unimplemented!() ++ unsafe { PyDateTime_TIME_GET_TZINFO(self.ptr.as_ptr()) != crate::typeref::NONE } + } + + #[inline] +diff --git a/src/serialize/per_type/list.rs b/src/serialize/per_type/list.rs +index 8003ee9..15fcb30 100644 +--- a/src/serialize/per_type/list.rs ++++ b/src/serialize/per_type/list.rs +@@ -40,12 +40,21 @@ impl Serialize for ZeroListSerializer { + } + + pub(crate) struct ListTupleSerializer { ++ ptr: *mut crate::ffi::PyObject, ++ kind: ListTupleKind, ++ #[cfg(CPython)] + data_ptr: *const *mut crate::ffi::PyObject, + state: SerializerState, + default: Option>, + len: usize, + } + ++#[derive(Copy, Clone)] ++enum ListTupleKind { ++ List, ++ Tuple, ++} ++ + impl ListTupleSerializer { + pub fn from_list( + ob: PyListRef, +@@ -53,6 +62,9 @@ impl ListTupleSerializer { + default: Option>, + ) -> Self { + Self { ++ ptr: ob.as_ptr(), ++ kind: ListTupleKind::List, ++ #[cfg(CPython)] + data_ptr: ob.data_ptr(), + len: ob.len(), + state: state.copy_for_recursive_call(), +@@ -72,15 +84,39 @@ impl ListTupleSerializer { + Py_TPFLAGS_TUPLE_SUBCLASS + ) + ); ++ #[cfg(CPython)] + let data_ptr = unsafe { (*ptr.cast::()).ob_item.as_ptr() }; + let len = isize_to_usize(ffi!(Py_SIZE(ptr))); + Self { ++ ptr: ptr, ++ kind: ListTupleKind::Tuple, ++ #[cfg(CPython)] + data_ptr: data_ptr, + len: len, + state: state.copy_for_recursive_call(), + default: default, + } + } ++ ++ #[inline(always)] ++ unsafe fn get_item(&self, idx: usize) -> *mut crate::ffi::PyObject { ++ #[cfg(CPython)] ++ { ++ unsafe { *((self.data_ptr).add(idx)) } ++ } ++ ++ #[cfg(not(CPython))] ++ { ++ match self.kind { ++ ListTupleKind::List => unsafe { ++ crate::ffi::PyList_GET_ITEM(self.ptr, crate::util::usize_to_isize(idx)) ++ }, ++ ListTupleKind::Tuple => unsafe { ++ crate::ffi::PyTuple_GET_ITEM(self.ptr, crate::util::usize_to_isize(idx)) ++ }, ++ } ++ } ++ } + } + + impl Serialize for ListTupleSerializer { +@@ -96,7 +132,7 @@ impl Serialize for ListTupleSerializer { + debug_assert!(self.len >= 1); + let mut seq = serializer.serialize_seq(None).unwrap(); + for idx in 0..self.len { +- let value = unsafe { *((self.data_ptr).add(idx)) }; ++ let value = unsafe { self.get_item(idx) }; + match pyobject_to_obtype(value, self.state.opts()) { + ObType::Str => { + seq.serialize_element(&StrSerializer::new(unsafe { +diff --git a/src/util.rs b/src/util.rs +index bfeccbf..59df84e 100644 +--- a/src/util.rs ++++ b/src/util.rs +@@ -69,7 +69,7 @@ macro_rules! str_from_slice { + }; + } + +-#[cfg(all(Py_3_12, not(Py_GIL_DISABLED)))] ++#[cfg(all(CPython, Py_3_12, not(Py_GIL_DISABLED)))] + macro_rules! reverse_pydict_incref { + ($op:expr) => { + unsafe { +@@ -80,14 +80,14 @@ macro_rules! reverse_pydict_incref { + }; + } + +-#[cfg(Py_GIL_DISABLED)] ++#[cfg(any(Py_GIL_DISABLED, not(CPython)))] + macro_rules! reverse_pydict_incref { + ($op:expr) => { + unsafe { crate::ffi::Py_DECREF($op) } + }; + } + +-#[cfg(not(Py_3_12))] ++#[cfg(all(CPython, not(Py_3_12)))] + macro_rules! reverse_pydict_incref { + ($op:expr) => { + unsafe { +@@ -160,14 +160,14 @@ macro_rules! pydict_contains { + }; + } + +-#[cfg(Py_3_12)] ++#[cfg(all(CPython, Py_3_12))] + macro_rules! use_immortal { + ($op:expr) => { + unsafe { $op } + }; + } + +-#[cfg(not(Py_3_12))] ++#[cfg(any(not(Py_3_12), not(CPython)))] + macro_rules! use_immortal { + ($op:expr) => { + unsafe { diff --git a/graalpython/lib-graalpython/patches/pandas-2.3.3.patch b/graalpython/lib-graalpython/patches/pandas-2.3.3.patch new file mode 100644 index 0000000000..571b016075 --- /dev/null +++ b/graalpython/lib-graalpython/patches/pandas-2.3.3.patch @@ -0,0 +1,39 @@ +diff --git a/pandas/_libs/include/pandas/vendored/klib/khash_python.h b/pandas/_libs/include/pandas/vendored/klib/khash_python.h +index edfbce4..118a34b 100644 +--- a/pandas/_libs/include/pandas/vendored/klib/khash_python.h ++++ b/pandas/_libs/include/pandas/vendored/klib/khash_python.h +@@ -173,13 +173,15 @@ static inline int floatobject_cmp(PyFloatObject *a, PyFloatObject *b) { + // PyObject_RichCompareBool for complexobjects has a different behavior + // needs to be replaced + static inline int complexobject_cmp(PyComplexObject *a, PyComplexObject *b) { +- return (Py_IS_NAN(a->cval.real) && Py_IS_NAN(b->cval.real) && +- Py_IS_NAN(a->cval.imag) && Py_IS_NAN(b->cval.imag)) || +- (Py_IS_NAN(a->cval.real) && Py_IS_NAN(b->cval.real) && +- a->cval.imag == b->cval.imag) || +- (a->cval.real == b->cval.real && Py_IS_NAN(a->cval.imag) && +- Py_IS_NAN(b->cval.imag)) || +- (a->cval.real == b->cval.real && a->cval.imag == b->cval.imag); ++ Py_complex a_cval = PyComplex_AsCComplex((PyObject*)a); ++ Py_complex b_cval = PyComplex_AsCComplex((PyObject*)b); ++ return (Py_IS_NAN(a_cval.real) && Py_IS_NAN(b_cval.real) && ++ Py_IS_NAN(a_cval.imag) && Py_IS_NAN(b_cval.imag)) || ++ (Py_IS_NAN(a_cval.real) && Py_IS_NAN(b_cval.real) && ++ a_cval.imag == b_cval.imag) || ++ (a_cval.real == b_cval.real && Py_IS_NAN(a_cval.imag) && ++ Py_IS_NAN(b_cval.imag)) || ++ (a_cval.real == b_cval.real && a_cval.imag == b_cval.imag); + } + + static inline int pyobject_cmp(PyObject *a, PyObject *b); +@@ -250,8 +252,9 @@ static inline Py_hash_t floatobject_hash(PyFloatObject *key) { + + // replaces _Py_HashDouble with _Pandas_HashDouble + static inline Py_hash_t complexobject_hash(PyComplexObject *key) { +- Py_uhash_t realhash = (Py_uhash_t)_Pandas_HashDouble(key->cval.real); +- Py_uhash_t imaghash = (Py_uhash_t)_Pandas_HashDouble(key->cval.imag); ++ Py_complex cval = PyComplex_AsCComplex((PyObject*)key); ++ Py_uhash_t realhash = (Py_uhash_t)_Pandas_HashDouble(cval.real); ++ Py_uhash_t imaghash = (Py_uhash_t)_Pandas_HashDouble(cval.imag); + if (realhash == (Py_uhash_t)-1 || imaghash == (Py_uhash_t)-1) { + return -1; + } diff --git a/graalpython/lib-graalpython/patches/pandas-3.0.2.patch b/graalpython/lib-graalpython/patches/pandas-3.0.2.patch new file mode 100644 index 0000000000..e01a671af8 --- /dev/null +++ b/graalpython/lib-graalpython/patches/pandas-3.0.2.patch @@ -0,0 +1,39 @@ +diff --git a/pandas/_libs/include/pandas/vendored/klib/khash_python.h b/pandas/_libs/include/pandas/vendored/klib/khash_python.h +index 05cdeba..00b6230 100644 +--- a/pandas/_libs/include/pandas/vendored/klib/khash_python.h ++++ b/pandas/_libs/include/pandas/vendored/klib/khash_python.h +@@ -163,13 +163,15 @@ static inline int floatobject_cmp(PyFloatObject *a, PyFloatObject *b) { + // PyObject_RichCompareBool for complexobjects has a different behavior + // needs to be replaced + static inline int complexobject_cmp(PyComplexObject *a, PyComplexObject *b) { +- return (isnan(a->cval.real) && isnan(b->cval.real) && isnan(a->cval.imag) && +- isnan(b->cval.imag)) || +- (isnan(a->cval.real) && isnan(b->cval.real) && +- a->cval.imag == b->cval.imag) || +- (a->cval.real == b->cval.real && isnan(a->cval.imag) && +- isnan(b->cval.imag)) || +- (a->cval.real == b->cval.real && a->cval.imag == b->cval.imag); ++ Py_complex a_cval = PyComplex_AsCComplex((PyObject*)a); ++ Py_complex b_cval = PyComplex_AsCComplex((PyObject*)b); ++ return (isnan(a_cval.real) && isnan(b_cval.real) && isnan(a_cval.imag) && ++ isnan(b_cval.imag)) || ++ (isnan(a_cval.real) && isnan(b_cval.real) && ++ a_cval.imag == b_cval.imag) || ++ (a_cval.real == b_cval.real && isnan(a_cval.imag) && ++ isnan(b_cval.imag)) || ++ (a_cval.real == b_cval.real && a_cval.imag == b_cval.imag); + } + + static inline int pyobject_cmp(PyObject *a, PyObject *b); +@@ -237,8 +239,9 @@ static inline Py_hash_t floatobject_hash(PyFloatObject *key) { + + // replaces _Py_HashDouble with _Pandas_HashDouble + static inline Py_hash_t complexobject_hash(PyComplexObject *key) { +- Py_uhash_t realhash = (Py_uhash_t)_Pandas_HashDouble(key->cval.real); +- Py_uhash_t imaghash = (Py_uhash_t)_Pandas_HashDouble(key->cval.imag); ++ Py_complex cval = PyComplex_AsCComplex((PyObject*)key); ++ Py_uhash_t realhash = (Py_uhash_t)_Pandas_HashDouble(cval.real); ++ Py_uhash_t imaghash = (Py_uhash_t)_Pandas_HashDouble(cval.imag); + if (realhash == (Py_uhash_t)-1 || imaghash == (Py_uhash_t)-1) { + return -1; + } diff --git a/graalpython/lib-graalpython/patches/pyarrow-22.0.0.patch b/graalpython/lib-graalpython/patches/pyarrow-22.0.0.patch new file mode 100644 index 0000000000..b6003399e2 --- /dev/null +++ b/graalpython/lib-graalpython/patches/pyarrow-22.0.0.patch @@ -0,0 +1,155 @@ +diff --git a/pyarrow/error.pxi b/pyarrow/error.pxi +index cbe2552..8d0d9d9 100644 +--- a/pyarrow/error.pxi ++++ b/pyarrow/error.pxi +@@ -248,7 +248,7 @@ cdef class SignalStopHandler: + if exc_value.signum: + # Re-emit the exact same signal. We restored the Python signal + # handler above, so it should receive it. +- if os.name == 'nt': ++ if os.name == 'nt' or sys.implementation.name == 'graalpy': + SendSignal(exc_value.signum) + else: + SendSignalToThread(exc_value.signum, +diff --git a/pyarrow/memory.pxi b/pyarrow/memory.pxi +index a526f0f..defb8f7 100644 +--- a/pyarrow/memory.pxi ++++ b/pyarrow/memory.pxi +@@ -20,6 +20,10 @@ + # cython: embedsignature = True + + ++cdef extern from "Python.h": ++ void Py_INCREF(object) ++ ++ + cdef class MemoryPool(_Weakrefable): + """ + Base class for memory allocation. +@@ -34,6 +38,13 @@ cdef class MemoryPool(_Weakrefable): + + cdef void init(self, CMemoryPool* pool): + self.pool = pool ++ # GraalPy change: pyarrow doesn't maintain python references from ++ # buffers to pools, but they dereference the pointer to the pool in the ++ # destructor. They just assume buffers will get GC'ed before their ++ # pools. You can easily get a segfault even on CPython if you make ++ # a buffer outlive its pool. Since we can't guarantee destruction ++ # order, we just leak the pool. ++ Py_INCREF(self) + + def release_unused(self): + """ +diff --git a/pyarrow_build_backend.py b/pyarrow_build_backend.py +new file mode 100644 +index 0000000..cb98041 +--- /dev/null ++++ b/pyarrow_build_backend.py +@@ -0,0 +1,93 @@ ++import os ++import re ++import sys ++import tarfile ++import subprocess ++import tempfile ++import shutil ++import tarfile ++import urllib.request ++from pathlib import Path ++ ++VERSION = re.search(r'set\\(PYARROW_VERSION "([^"]+)"\\)', Path("CMakeLists.txt").read_text()).group(1) ++ ++ ++def build_sdist(sdist_directory, config_settings=None): ++ nv = f'pyarrow-{VERSION}' ++ srcdir = Path(__file__).parent ++ archive_path = Path(sdist_directory) / f'{nv}.tar.gz' ++ ++ def tarfilter(info): ++ if re.match(r'\./(?:.git|venv|[^-/]+-venv|dist)', info.name): ++ return None ++ info.name = f'./{nv}/{info.name}' ++ return info ++ ++ with tarfile.open(archive_path, 'w:gz') as tar: ++ tar.add('.', filter=tarfilter) ++ return archive_path.name ++ ++ ++def build_wheel(wheel_directory, config_settings=None, metadata_directory=None): ++ wheel_directory = Path(wheel_directory).absolute() ++ with tempfile.TemporaryDirectory() as tmpdir: ++ tmpdir = Path(tmpdir).absolute() ++ tarname = f'apache-arrow-{VERSION}.tar.gz' ++ tarpath = tmpdir / tarname ++ urllib.request.urlretrieve(f"https://github.com/apache/arrow/archive/refs/tags/{tarname}", tarpath) ++ with tarfile.open(tarpath) as tar: ++ tar.extractall(tmpdir) ++ arrow_dir = tmpdir / f'arrow-apache-arrow-{VERSION}' ++ assert arrow_dir.is_dir() ++ arrow_dist = tmpdir / 'arrow-dist' ++ build_dir = tmpdir / 'arrow-build' ++ subprocess.check_call([ ++ 'cmake', '-S', str(arrow_dir / 'cpp'), '-B', str(build_dir), ++ '-DCMAKE_INSTALL_LIBDIR=lib', ++ f'-DCMAKE_INSTALL_PREFIX={arrow_dist}', ++ '-DCMAKE_BUILD_TYPE=Release', ++ '-DARROW_RPATH_ORIGIN=ON', ++ '-DARROW_BUILD_TESTS=OFF', ++ '-DARROW_BUILD_SHARED=ON', ++ '-DARROW_BUILD_STATIC=OFF', ++ # Features ++ '-DARROW_COMPUTE=ON', ++ '-DARROW_CSV=ON', ++ '-DARROW_JSON=ON', ++ '-DARROW_FILESYSTEM=ON', ++ '-DARROW_DATASET=ON', ++ '-DARROW_PARQUET=ON', ++ '-DPARQUET_REQUIRE_ENCRYPTION=ON', ++ '-DARROW_GANDIVA=ON', ++ '-DARROW_WITH_BZ2=ON', ++ '-DARROW_WITH_ZLIB=ON', ++ '-DARROW_WITH_ZSTD=ON', ++ '-DARROW_WITH_LZ4=ON', ++ '-DARROW_WITH_SNAPPY=ON', ++ '-DARROW_WITH_BROTLI=ON', ++ ]) ++ subprocess.check_call([ ++ 'cmake', '--build', str(build_dir), ++ ]) ++ subprocess.check_call([ ++ 'cmake', '--install', str(build_dir), ++ ]) ++ env = os.environ.copy() ++ env['ARROW_HOME'] = str(arrow_dist) ++ env['CMAKE_PREFIX_PATH'] = str(arrow_dist) ++ env['PYARROW_WITH_DATASET'] = '1' ++ env['PYARROW_WITH_PARQUET'] = '1' ++ env['PYARROW_WITH_PARQUET_ENCRYPTION'] = '1' ++ env['PYARROW_WITH_GANDIVA'] = '1' ++ env['PYARROW_BUNDLE_ARROW_CPP'] = '1' ++ env['PYARROW_BUNDLE_CYTHON_CPP'] = '1' ++ subprocess.run( ++ [sys.executable, 'setup.py', 'bdist_wheel'], ++ env=env, ++ check=True, ++ ) ++ wheels = list(Path('dist').glob('*.whl')) ++ assert len(wheels) == 1, f"Expected 1 wheel, found {len(wheels)}" ++ wheel = wheels[0] ++ shutil.copyfile(wheel, wheel_directory / wheel.name) ++ return str(wheel.name) +diff --git a/pyproject.toml b/pyproject.toml +index a1cab40..798ea55 100644 +--- a/pyproject.toml ++++ b/pyproject.toml +@@ -24,7 +24,8 @@ requires = [ + "setuptools_scm[toml]>=8", + "setuptools>=64", + ] +-build-backend = "setuptools.build_meta" ++build-backend = "pyarrow_build_backend" ++backend-path = ["."] + + [project] + name = "pyarrow" diff --git a/graalpython/lib-graalpython/patches/pyarrow-24.0.0.patch b/graalpython/lib-graalpython/patches/pyarrow-24.0.0.patch new file mode 100644 index 0000000000..bee49c8ad0 --- /dev/null +++ b/graalpython/lib-graalpython/patches/pyarrow-24.0.0.patch @@ -0,0 +1,154 @@ +diff --git a/pyarrow/error.pxi b/pyarrow/error.pxi +index cbe2552..8d0d9d9 100644 +--- a/pyarrow/error.pxi ++++ b/pyarrow/error.pxi +@@ -248,7 +248,7 @@ cdef class SignalStopHandler: + if exc_value.signum: + # Re-emit the exact same signal. We restored the Python signal + # handler above, so it should receive it. +- if os.name == 'nt': ++ if os.name == 'nt' or sys.implementation.name == 'graalpy': + SendSignal(exc_value.signum) + else: + SendSignalToThread(exc_value.signum, +diff --git a/pyarrow/memory.pxi b/pyarrow/memory.pxi +index a526f0f..defb8f7 100644 +--- a/pyarrow/memory.pxi ++++ b/pyarrow/memory.pxi +@@ -20,6 +20,10 @@ + # cython: embedsignature = True + + ++cdef extern from "Python.h": ++ void Py_INCREF(object) ++ ++ + cdef class MemoryPool(_Weakrefable): + """ + Base class for memory allocation. +@@ -34,6 +38,13 @@ cdef class MemoryPool(_Weakrefable): + + cdef void init(self, CMemoryPool* pool): + self.pool = pool ++ # GraalPy change: pyarrow doesn't maintain python references from ++ # buffers to pools, but they dereference the pointer to the pool in the ++ # destructor. They just assume buffers will get GC'ed before their ++ # pools. You can easily get a segfault even on CPython if you make ++ # a buffer outlive its pool. Since we can't guarantee destruction ++ # order, we just leak the pool. ++ Py_INCREF(self) + + def release_unused(self): + """ +diff --git a/pyarrow_build_backend.py b/pyarrow_build_backend.py +new file mode 100644 +index 0000000..cb98041 +--- /dev/null ++++ b/pyarrow_build_backend.py +@@ -0,0 +1,93 @@ ++import os ++import re ++import sys ++import tarfile ++import subprocess ++import tempfile ++import shutil ++import tarfile ++import urllib.request ++from pathlib import Path ++ ++VERSION = re.search(r'set\\(PYARROW_VERSION "([^"]+)"\\)', Path("CMakeLists.txt").read_text()).group(1) ++ ++ ++def build_sdist(sdist_directory, config_settings=None): ++ nv = f'pyarrow-{VERSION}' ++ srcdir = Path(__file__).parent ++ archive_path = Path(sdist_directory) / f'{nv}.tar.gz' ++ ++ def tarfilter(info): ++ if re.match(r'\./(?:.git|venv|[^-/]+-venv|dist)', info.name): ++ return None ++ info.name = f'./{nv}/{info.name}' ++ return info ++ ++ with tarfile.open(archive_path, 'w:gz') as tar: ++ tar.add('.', filter=tarfilter) ++ return archive_path.name ++ ++ ++def build_wheel(wheel_directory, config_settings=None, metadata_directory=None): ++ wheel_directory = Path(wheel_directory).absolute() ++ with tempfile.TemporaryDirectory() as tmpdir: ++ tmpdir = Path(tmpdir).absolute() ++ tarname = f'apache-arrow-{VERSION}.tar.gz' ++ tarpath = tmpdir / tarname ++ urllib.request.urlretrieve(f"https://github.com/apache/arrow/archive/refs/tags/{tarname}", tarpath) ++ with tarfile.open(tarpath) as tar: ++ tar.extractall(tmpdir) ++ arrow_dir = tmpdir / f'arrow-apache-arrow-{VERSION}' ++ assert arrow_dir.is_dir() ++ arrow_dist = tmpdir / 'arrow-dist' ++ build_dir = tmpdir / 'arrow-build' ++ subprocess.check_call([ ++ 'cmake', '-S', str(arrow_dir / 'cpp'), '-B', str(build_dir), ++ '-DCMAKE_INSTALL_LIBDIR=lib', ++ f'-DCMAKE_INSTALL_PREFIX={arrow_dist}', ++ '-DCMAKE_BUILD_TYPE=Release', ++ '-DARROW_RPATH_ORIGIN=ON', ++ '-DARROW_BUILD_TESTS=OFF', ++ '-DARROW_BUILD_SHARED=ON', ++ '-DARROW_BUILD_STATIC=OFF', ++ # Features ++ '-DARROW_COMPUTE=ON', ++ '-DARROW_CSV=ON', ++ '-DARROW_JSON=ON', ++ '-DARROW_FILESYSTEM=ON', ++ '-DARROW_DATASET=ON', ++ '-DARROW_PARQUET=ON', ++ '-DPARQUET_REQUIRE_ENCRYPTION=ON', ++ '-DARROW_GANDIVA=ON', ++ '-DARROW_WITH_BZ2=ON', ++ '-DARROW_WITH_ZLIB=ON', ++ '-DARROW_WITH_ZSTD=ON', ++ '-DARROW_WITH_LZ4=ON', ++ '-DARROW_WITH_SNAPPY=ON', ++ '-DARROW_WITH_BROTLI=ON', ++ ]) ++ subprocess.check_call([ ++ 'cmake', '--build', str(build_dir), ++ ]) ++ subprocess.check_call([ ++ 'cmake', '--install', str(build_dir), ++ ]) ++ env = os.environ.copy() ++ env['ARROW_HOME'] = str(arrow_dist) ++ env['CMAKE_PREFIX_PATH'] = str(arrow_dist) ++ env['PYARROW_WITH_DATASET'] = '1' ++ env['PYARROW_WITH_PARQUET'] = '1' ++ env['PYARROW_WITH_PARQUET_ENCRYPTION'] = '1' ++ env['PYARROW_WITH_GANDIVA'] = '1' ++ env['PYARROW_BUNDLE_ARROW_CPP'] = '1' ++ env['PYARROW_BUNDLE_CYTHON_CPP'] = '1' ++ subprocess.run( ++ [sys.executable, 'setup.py', 'bdist_wheel'], ++ env=env, ++ check=True, ++ ) ++ wheels = list(Path('dist').glob('*.whl')) ++ assert len(wheels) == 1, f"Expected 1 wheel, found {len(wheels)}" ++ wheel = wheels[0] ++ shutil.copyfile(wheel, wheel_directory / wheel.name) ++ return str(wheel.name) +diff --git a/pyproject.toml b/pyproject.toml +index 2fbe78e..7a80e39 100644 +--- a/pyproject.toml ++++ b/pyproject.toml +@@ -26,7 +26,7 @@ requires = [ + ] + # We use a really simple build backend wrapper over scikit-build-core + # to solve licenses to work around links not being included in sdists. +-build-backend = "_build_backend" ++build-backend = "pyarrow_build_backend" + backend-path = ["."] + + [project] diff --git a/scripts/check_patch_updates.py b/scripts/check_patch_updates.py new file mode 100755 index 0000000000..77af704e6d --- /dev/null +++ b/scripts/check_patch_updates.py @@ -0,0 +1,317 @@ +#!/usr/bin/python +# +# Copyright (c) 2026, Oracle and/or its affiliates. All rights reserved. +# DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. +# +# The Universal Permissive License (UPL), Version 1.0 +# +# Subject to the condition set forth below, permission is hereby granted to any +# person obtaining a copy of this software, associated documentation and/or +# data (collectively the "Software"), free of charge and under any and all +# copyright rights in the Software, and any and all patent rights owned or +# freely licensable by each licensor hereunder covering either (i) the +# unmodified Software as contributed to or provided by such licensor, or (ii) +# the Larger Works (as defined below), to deal in both +# +# (a) the Software, and +# +# (b) any piece of software and/or hardware listed in the lrgrwrks.txt file if +# one is included with the Software each a "Larger Work" to which the Software +# is contributed by such licensors), +# +# without restriction, including without limitation the rights to copy, create +# derivative works of, display, perform, and distribute the Software and make, +# use, sell, offer for sale, import, export, have made, and have sold the +# Software and the Larger Work(s), and to sublicense the foregoing rights on +# either these or other terms. +# +# This license is subject to the following condition: +# +# The above copyright notice and either this complete permission notice or at a +# minimum a reference to the UPL must be included in all copies or substantial +# portions of the Software. +# +# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +# AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +# OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE +# SOFTWARE. + +from __future__ import annotations + +import argparse +import json +import sys +import urllib.error +import urllib.parse +import urllib.request +from dataclasses import dataclass +from pathlib import Path +from typing import Any + +try: + import tomllib +except ModuleNotFoundError: + from pip._vendor import tomli as tomllib # pylint: disable=no-name-in-module + +try: + from packaging.specifiers import InvalidSpecifier, SpecifierSet + from packaging.version import InvalidVersion, Version +except ModuleNotFoundError: + from pip._vendor.packaging.specifiers import InvalidSpecifier, SpecifierSet # pylint: disable=no-name-in-module + from pip._vendor.packaging.version import InvalidVersion, Version # pylint: disable=no-name-in-module + + +DEFAULT_METADATA = Path(__file__).resolve().parents[1] / "graalpython" / "lib-graalpython" / "patches" / "metadata.toml" +PYPI_PROJECT_URL = "https://pypi.org/pypi/{project}/json" + + +@dataclass(frozen=True) +class Rule: + package: str + index: int + version: str + specifier: SpecifierSet + patch: str + + +@dataclass(frozen=True) +class PackageUpdate: + package: str + latest: Version + latest_patched: Version | None + rules: tuple[Rule, ...] + matched_by_metadata: bool + + +class CheckError(RuntimeError): + pass + + +def load_metadata(path: Path) -> dict[str, Any]: + with path.open("rb") as file: + data = tomllib.load(file) + if not isinstance(data, dict): + raise CheckError(f"{path} does not contain a TOML table") + return data + + +def has_upper_bound(specifier: SpecifierSet) -> bool: + for spec in specifier: + if spec.operator in {"<", "<=", "~="}: + return True + if spec.operator == "==": + # PEP 440 prefix matching, such as "== 1.4.*", implies an upper bound. + return True + return False + + +def iter_upper_bounded_patch_rules(metadata: dict[str, Any]) -> list[Rule]: + result: list[Rule] = [] + for package, package_metadata in metadata.items(): + rules = package_metadata.get("rules", []) + for index, rule in enumerate(rules, start=1): + version = rule.get("version") + patch = rule.get("patch") + if not version or not patch or rule.get("install-priority") == 0: + continue + try: + specifier = SpecifierSet(version) + except InvalidSpecifier as exc: + raise CheckError(f"{package}.rules[{index}] has invalid version specifier {version!r}: {exc}") from exc + if has_upper_bound(specifier): + result.append(Rule(package, index, version, specifier, patch)) + return result + + +def iter_metadata_specifiers(package_metadata: dict[str, Any]) -> list[SpecifierSet]: + result: list[SpecifierSet] = [] + for rule in package_metadata.get("rules", []): + if version := rule.get("version"): + result.append(SpecifierSet(version)) + else: + result.append(SpecifierSet("")) + return result + + +def fetch_pypi_releases(package: str, timeout: int) -> list[tuple[Version, list[dict[str, Any]]]]: + quoted = urllib.parse.quote(package, safe="") + request = urllib.request.Request(PYPI_PROJECT_URL.format(project=quoted), headers={"Accept": "application/json"}) + try: + with urllib.request.urlopen(request, timeout=timeout) as response: + payload = json.load(response) + except urllib.error.HTTPError as exc: + if exc.code == 404: + raise CheckError(f"{package}: package not found on PyPI") from exc + raise CheckError(f"{package}: PyPI returned HTTP {exc.code}") from exc + except urllib.error.URLError as exc: + raise CheckError(f"{package}: could not reach PyPI: {exc.reason}") from exc + except json.JSONDecodeError as exc: + raise CheckError(f"{package}: PyPI returned invalid JSON") from exc + + releases = payload.get("releases") + if not isinstance(releases, dict): + raise CheckError(f"{package}: PyPI returned an unexpected payload") + + parsed: list[tuple[Version, list[dict[str, Any]]]] = [] + for version_text, files in releases.items(): + try: + version = Version(version_text) + except InvalidVersion: + continue + if isinstance(files, list): + parsed.append((version, [file for file in files if isinstance(file, dict)])) + return parsed + + +def release_is_usable( + version: Version, files: list[dict[str, Any]], include_prereleases: bool, include_yanked: bool +) -> bool: + if version.is_prerelease and not include_prereleases: + return False + if not files: + return False + if include_yanked: + return True + return not all(file.get("yanked", False) for file in files) + + +def latest_release( + releases: list[tuple[Version, list[dict[str, Any]]]], include_prereleases: bool, include_yanked: bool +) -> Version | None: + versions = [ + version + for version, files in releases + if release_is_usable(version, files, include_prereleases, include_yanked) + ] + return max(versions, default=None) + + +def latest_matching_release( + releases: list[tuple[Version, list[dict[str, Any]]]], + specifier: SpecifierSet, + include_prereleases: bool, + include_yanked: bool, +) -> Version | None: + versions = [ + version + for version, files in releases + if release_is_usable(version, files, include_prereleases, include_yanked) + and specifier.contains(version, prereleases=True) + ] + return max(versions, default=None) + + +def version_matches_any_metadata_rule(version: Version, package_metadata: dict[str, Any]) -> bool: + return any( + specifier.contains(version, prereleases=True) for specifier in iter_metadata_specifiers(package_metadata) + ) + + +def find_updates( + metadata: dict[str, Any], + timeout: int, + include_prereleases: bool, + include_yanked: bool, + all_rules: bool, +) -> list[PackageUpdate]: + updates: list[PackageUpdate] = [] + rules_by_package: dict[str, list[Rule]] = {} + for rule in iter_upper_bounded_patch_rules(metadata): + rules_by_package.setdefault(rule.package, []).append(rule) + + for package, rules in rules_by_package.items(): + releases = fetch_pypi_releases(package, timeout) + latest = latest_release(releases, include_prereleases, include_yanked) + if latest is None: + continue + + outdated_rules: list[Rule] = [] + latest_patched: Version | None = None + for rule in rules: + patched = latest_matching_release(releases, rule.specifier, include_prereleases, include_yanked) + if patched is not None: + latest_patched = max(latest_patched, patched) if latest_patched is not None else patched + if patched is not None and latest > patched: + outdated_rules.append(rule) + + matched_by_metadata = version_matches_any_metadata_rule(latest, metadata[package]) + if all_rules: + selected_rules = tuple(outdated_rules) + elif matched_by_metadata: + selected_rules = () + else: + selected_rules = tuple(rules) + + if selected_rules: + updates.append(PackageUpdate(package, latest, latest_patched, selected_rules, matched_by_metadata)) + return updates + + +def print_updates(updates: list[PackageUpdate], all_rules: bool) -> None: + if not updates: + print("No patch update candidates found.") + return + + package_width = max(len(update.package) for update in updates) + latest_width = max(len(str(update.latest)) for update in updates) + patched_width = max(len(str(update.latest_patched or "-")) for update in updates) + print(f"{'package'.ljust(package_width)} {'latest'.ljust(latest_width)} {'patched'.ljust(patched_width)} rules") + print(f"{'-' * package_width} {'-' * latest_width} {'-' * patched_width} -----") + for update in updates: + rules = ", ".join(f"#{rule.index} {rule.version} ({rule.patch})" for rule in update.rules) + if all_rules and update.matched_by_metadata: + rules = f"{rules}; latest is already covered by another metadata rule" + print( + f"{update.package.ljust(package_width)} {str(update.latest).ljust(latest_width)} " + f"{str(update.latest_patched or '-').ljust(patched_width)} {rules}" + ) + + +def main(argv: list[str] | None = None) -> int: + parser = argparse.ArgumentParser( + description="Check upper-bounded GraalPy package patch rules against the newest PyPI releases." + ) + parser.add_argument( + "metadata", + nargs="?", + type=Path, + default=DEFAULT_METADATA, + help=f"Path to metadata.toml. Defaults to {DEFAULT_METADATA}", + ) + parser.add_argument("--timeout", type=int, default=30, help="PyPI request timeout in seconds.") + parser.add_argument("--include-prereleases", action="store_true", help="Consider pre-release PyPI versions.") + parser.add_argument("--include-yanked", action="store_true", help="Consider yanked PyPI releases.") + parser.add_argument( + "--all-rules", + action="store_true", + help=( + "Show every outdated upper-bounded patch rule, even when a later metadata rule already covers PyPI latest." + ), + ) + parser.add_argument("--fail-on-updates", action="store_true", help="Exit with status 1 when candidates are found.") + args = parser.parse_args(argv) + + try: + metadata = load_metadata(args.metadata) + updates = find_updates( + metadata, + args.timeout, + args.include_prereleases, + args.include_yanked, + args.all_rules, + ) + except CheckError as exc: + print(exc, file=sys.stderr) + return 2 + + print_updates(updates, args.all_rules) + if updates and args.fail_on_updates: + return 1 + return 0 + + +if __name__ == "__main__": + sys.exit(main()) diff --git a/scripts/get_pypi_source.py b/scripts/get_pypi_source.py new file mode 100755 index 0000000000..0524e6138f --- /dev/null +++ b/scripts/get_pypi_source.py @@ -0,0 +1,258 @@ +# Copyright (c) 2026, Oracle and/or its affiliates. All rights reserved. +# DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. +# +# The Universal Permissive License (UPL), Version 1.0 +# +# Subject to the condition set forth below, permission is hereby granted to any +# person obtaining a copy of this software, associated documentation and/or +# data (collectively the "Software"), free of charge and under any and all +# copyright rights in the Software, and any and all patent rights owned or +# freely licensable by each licensor hereunder covering either (i) the +# unmodified Software as contributed to or provided by such licensor, or (ii) +# the Larger Works (as defined below), to deal in both +# +# (a) the Software, and +# +# (b) any piece of software and/or hardware listed in the lrgrwrks.txt file if +# one is included with the Software each a "Larger Work" to which the Software +# is contributed by such licensors), +# +# without restriction, including without limitation the rights to copy, create +# derivative works of, display, perform, and distribute the Software and make, +# use, sell, offer for sale, import, export, have made, and have sold the +# Software and the Larger Work(s), and to sublicense the foregoing rights on +# either these or other terms. +# +# This license is subject to the following condition: +# +# The above copyright notice and either this complete permission notice or at a +# minimum a reference to the UPL must be included in all copies or substantial +# portions of the Software. +# +# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +# AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +# OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE +# SOFTWARE. + +#!/usr/bin/python + +import argparse +import json +import os +import re +import shutil +import subprocess +import sys +import tarfile +import tempfile +import urllib.error +import urllib.parse +import urllib.request +import zipfile +from pathlib import Path +try: + import tomllib +except ModuleNotFoundError: + tomllib = None + + +PYPI_URL = "https://pypi.org/pypi" +METADATA_TOML = Path(__file__).resolve().parents[1] / "graalpython" / "lib-graalpython" / "patches" / "metadata.toml" + + +def eprint(*args, **kwargs): + print(*args, file=sys.stderr, **kwargs) + + +def parse_pkg(pkg_arg): + if "==" in pkg_arg: + name, version = pkg_arg.split("==", 1) + else: + name, version = pkg_arg, None + if not name: + raise ValueError("missing package name") + return re.sub(r"[-_.]+", "-", name).lower(), version + + +def pypi_json(name, version=None): + url = f"{PYPI_URL}/{name}/json" + with urllib.request.urlopen(url) as resp: + data = json.load(resp) + if version is None: + version = data["info"]["version"] + if version not in data["releases"]: + return version, [] + return version, data["releases"][version] + + +def choose_artifact(files): + # Match ~/scripts/get-source.py: prefer a universal wheel, then source archives. + for file_info in files: + filename = file_info["filename"] + if filename.endswith(".whl") and "-none-any.whl" in filename: + return file_info, "wheel" + for file_info in files: + filename = file_info["filename"] + if filename.endswith((".tar.gz", ".zip")): + return file_info, "sdist" + return None, None + + +def find_add_source(name, version): + if not version or not METADATA_TOML.is_file(): + return None + if tomllib is None: + raise RuntimeError("Reading metadata.toml requires Python 3.11 or newer") + with open(METADATA_TOML, "rb") as metadata_file: + metadata = tomllib.load(metadata_file) + for add_source in metadata.get(name, {}).get("add-sources", []): + if add_source.get("version") == version: + return add_source["url"] + return None + + +def artifact_from_add_source(name, version): + url = find_add_source(name, version) + if url is None: + return None, None + filename = urllib.parse.urlparse(url).path.rsplit("/", 1)[-1] or f"{name}-{version}.tar.gz" + return {"filename": filename, "url": url}, "sdist" + + +def no_source_error(name, version): + sys.exit( + f"No suitable source found for {name}=={version}. " + f"Hint: If upstream doesn't publish sources on PyPI, you can add a [[{name}.add-sources]] entry to {METADATA_TOML} pointing to a release tarball and try again." + ) + + +def download_file(url, dest): + with urllib.request.urlopen(url) as resp, open(dest, "wb") as out: + shutil.copyfileobj(resp, out) + + +def safe_extract_zip(zip_path, target_dir): + target = Path(target_dir).resolve() + with zipfile.ZipFile(zip_path, "r") as zip_file: + for member in zip_file.infolist(): + destination = (target / member.filename).resolve() + if target != destination and target not in destination.parents: + raise RuntimeError(f"Refusing to extract {member.filename!r} outside {target}") + zip_file.extractall(target) + + +def safe_extract_tar(tar_path, target_dir): + with tarfile.open(tar_path, "r:*") as tar: + try: + tar.extractall(path=target_dir, filter="data") + except TypeError: + target = Path(target_dir).resolve() + for member in tar.getmembers(): + destination = (target / member.name).resolve() + if target != destination and target not in destination.parents: + raise RuntimeError(f"Refusing to extract {member.name!r} outside {target}") + tar.extractall(path=target_dir) + + +def flatten_single_directory(target_dir): + entries = os.listdir(target_dir) + if len(entries) != 1: + return + inner_dir = os.path.join(target_dir, entries[0]) + if not os.path.isdir(inner_dir): + return + staging_dir = tempfile.mkdtemp(prefix=".get-pypi-source-", dir=target_dir) + try: + for entry in os.listdir(inner_dir): + shutil.move(os.path.join(inner_dir, entry), staging_dir) + os.rmdir(inner_dir) + for entry in os.listdir(staging_dir): + shutil.move(os.path.join(staging_dir, entry), target_dir) + finally: + if os.path.isdir(staging_dir): + os.rmdir(staging_dir) + + +def unpack(archive_path, artifact_type, target_dir): + if artifact_type == "wheel": + safe_extract_zip(archive_path, target_dir) + elif str(archive_path).endswith(".zip"): + safe_extract_zip(archive_path, target_dir) + flatten_single_directory(target_dir) + else: + safe_extract_tar(archive_path, target_dir) + flatten_single_directory(target_dir) + + +def run(cmd, cwd, **kwargs): + subprocess.run(cmd, cwd=cwd, check=True, **kwargs) + + +def init_git_repo(target_dir): + run(["git", "init"], cwd=target_dir) + run(["git", "add", "-A", "-f"], cwd=target_dir) + run(["git", "commit", "--quiet", "-m", "Initial commit"], cwd=target_dir) + + +def has_git_changes(target_dir): + result = subprocess.run( + ["git", "status", "--porcelain"], + cwd=target_dir, + check=True, + text=True, + stdout=subprocess.PIPE, + ) + return bool(result.stdout.strip()) + + +def autopatch_capi(target_dir): + repo_root = Path(__file__).resolve().parents[1] + autopatch_script = repo_root / "graalpython" / "lib-graalpython" / "modules" / "autopatch_capi.py" + run([sys.executable, str(autopatch_script), target_dir], cwd=repo_root) + if has_git_changes(target_dir): + run(["git", "add", "-A", "-f"], cwd=target_dir) + run(["git", "commit", "--quiet", "-m", "Autopatched"], cwd=target_dir) + + +def main(argv=None): + parser = argparse.ArgumentParser( + description="Download and extract a PyPI package artifact into a temporary directory." + ) + parser.add_argument("package", help="Package specifier, optionally with an exact version: name==version") + args = parser.parse_args(argv) + + name, requested_version = parse_pkg(args.package) + try: + version, files = pypi_json(name, requested_version) + except urllib.error.HTTPError as exc: + if requested_version is None or exc.code != 404: + raise + version = requested_version + files = [] + artifact, artifact_type = choose_artifact(files) + if artifact is None: + artifact, artifact_type = artifact_from_add_source(name, version) + if artifact is None: + raise no_source_error(name, version) + + target_dir = tempfile.mkdtemp(prefix=f"{name}-{version}-") + try: + with tempfile.TemporaryDirectory() as download_dir: + archive_path = os.path.join(download_dir, artifact["filename"]) + eprint(f"Downloading {artifact['filename']}") + download_file(artifact["url"], archive_path) + unpack(archive_path, artifact_type, target_dir) + init_git_repo(target_dir) + except Exception: + shutil.rmtree(target_dir, ignore_errors=True) + raise + autopatch_capi(target_dir) + + print(f"Prepared source at: {target_dir}") + + +if __name__ == "__main__": + main()