diff --git a/dev/requirements.txt b/dev/requirements.txt index 2508b79d5e16..61c7e540f3bf 100644 --- a/dev/requirements.txt +++ b/dev/requirements.txt @@ -3,7 +3,7 @@ py4j>=0.10.9.9 # PySpark dependencies (optional) numpy>=1.22 -pyarrow>=15.0.0 +pyarrow>=18.0.0 six==1.16.0 pandas>=2.2.0 scipy diff --git a/dev/spark-test-image/python-minimum/Dockerfile b/dev/spark-test-image/python-minimum/Dockerfile index 575b4afdd02c..2981223c19f6 100644 --- a/dev/spark-test-image/python-minimum/Dockerfile +++ b/dev/spark-test-image/python-minimum/Dockerfile @@ -24,7 +24,7 @@ LABEL org.opencontainers.image.ref.name="Apache Spark Infra Image For PySpark wi # Overwrite this label to avoid exposing the underlying Ubuntu OS version label LABEL org.opencontainers.image.version="" -ENV FULL_REFRESH_DATE=20250703 +ENV FULL_REFRESH_DATE=20251225 ENV DEBIAN_FRONTEND=noninteractive ENV DEBCONF_NONINTERACTIVE_SEEN=true @@ -62,7 +62,7 @@ RUN apt-get update && apt-get install -y \ wget \ zlib1g-dev -ARG BASIC_PIP_PKGS="numpy==1.22.4 pyarrow==15.0.0 pandas==2.2.0 six==1.16.0 scipy scikit-learn coverage unittest-xml-reporting" +ARG BASIC_PIP_PKGS="numpy==1.22.4 pyarrow==18.0.0 pandas==2.2.0 six==1.16.0 scipy scikit-learn coverage unittest-xml-reporting" # Python deps for Spark Connect ARG CONNECT_PIP_PKGS="grpcio==1.76.0 grpcio-status==1.76.0 googleapis-common-protos==1.71.0 zstandard==0.25.0 graphviz==0.20 protobuf" diff --git a/dev/spark-test-image/python-ps-minimum/Dockerfile b/dev/spark-test-image/python-ps-minimum/Dockerfile index 5142d46cc3eb..94edd66c7138 100644 --- a/dev/spark-test-image/python-ps-minimum/Dockerfile +++ b/dev/spark-test-image/python-ps-minimum/Dockerfile @@ -24,7 +24,7 @@ LABEL org.opencontainers.image.ref.name="Apache Spark Infra Image For Pandas API # Overwrite this label to avoid exposing the underlying Ubuntu OS version label LABEL org.opencontainers.image.version="" -ENV FULL_REFRESH_DATE=20250708 +ENV FULL_REFRESH_DATE=20251225 ENV DEBIAN_FRONTEND=noninteractive ENV DEBCONF_NONINTERACTIVE_SEEN=true @@ -63,7 +63,7 @@ RUN apt-get update && apt-get install -y \ zlib1g-dev -ARG BASIC_PIP_PKGS="pyarrow==15.0.0 pandas==2.2.0 six==1.16.0 numpy scipy coverage unittest-xml-reporting" +ARG BASIC_PIP_PKGS="pyarrow==18.0.0 pandas==2.2.0 six==1.16.0 numpy scipy coverage unittest-xml-reporting" # Python deps for Spark Connect ARG CONNECT_PIP_PKGS="grpcio==1.76.0 grpcio-status==1.76.0 googleapis-common-protos==1.71.0 zstandard==0.25.0 graphviz==0.20 protobuf" diff --git a/python/docs/source/getting_started/install.rst b/python/docs/source/getting_started/install.rst index 6b5a09205e4a..8ee075e693ea 100644 --- a/python/docs/source/getting_started/install.rst +++ b/python/docs/source/getting_started/install.rst @@ -226,7 +226,7 @@ Installable with ``pip install "pyspark[connect]"``. Package Supported version Note ========================== ================= ========================== `pandas` >=2.2.0 Required for Spark Connect -`pyarrow` >=15.0.0 Required for Spark Connect +`pyarrow` >=18.0.0 Required for Spark Connect `grpcio` >=1.76.0 Required for Spark Connect `grpcio-status` >=1.76.0 Required for Spark Connect `googleapis-common-protos` >=1.71.0 Required for Spark Connect @@ -243,7 +243,7 @@ Installable with ``pip install "pyspark[sql]"``. Package Supported version Note ========= ================= ====================== `pandas` >=2.2.0 Required for Spark SQL -`pyarrow` >=15.0.0 Required for Spark SQL +`pyarrow` >=18.0.0 Required for Spark SQL ========= ================= ====================== Additional libraries that enhance functionality but are not included in the installation packages: @@ -260,7 +260,7 @@ Installable with ``pip install "pyspark[pandas_on_spark]"``. Package Supported version Note ========= ================= ================================ `pandas` >=2.2.0 Required for Pandas API on Spark -`pyarrow` >=15.0.0 Required for Pandas API on Spark +`pyarrow` >=18.0.0 Required for Pandas API on Spark ========= ================= ================================ Additional libraries that enhance functionality but are not included in the installation packages: @@ -310,7 +310,7 @@ Installable with ``pip install "pyspark[pipelines]"``. Includes all dependencies Package Supported version Note ========================== ================= =================================================== `pandas` >=2.2.0 Required for Spark Connect and Spark SQL -`pyarrow` >=15.0.0 Required for Spark Connect and Spark SQL +`pyarrow` >=18.0.0 Required for Spark Connect and Spark SQL `grpcio` >=1.76.0 Required for Spark Connect `grpcio-status` >=1.76.0 Required for Spark Connect `googleapis-common-protos` >=1.71.0 Required for Spark Connect diff --git a/python/pyspark/sql/classic/dataframe.py b/python/pyspark/sql/classic/dataframe.py index 6533f8a5ffc0..634006bdbf8c 100644 --- a/python/pyspark/sql/classic/dataframe.py +++ b/python/pyspark/sql/classic/dataframe.py @@ -2011,7 +2011,7 @@ def _test() -> None: import pyarrow as pa from pyspark.loose_version import LooseVersion - if LooseVersion(pa.__version__) < LooseVersion("17.0.0"): + if LooseVersion(pa.__version__) < LooseVersion("21.0.0"): del pyspark.sql.dataframe.DataFrame.mapInArrow.__doc__ spark = ( diff --git a/python/pyspark/sql/connect/dataframe.py b/python/pyspark/sql/connect/dataframe.py index d10a576dec77..e60b1c21c556 100644 --- a/python/pyspark/sql/connect/dataframe.py +++ b/python/pyspark/sql/connect/dataframe.py @@ -2376,7 +2376,7 @@ def _test() -> None: import pyarrow as pa from pyspark.loose_version import LooseVersion - if LooseVersion(pa.__version__) < LooseVersion("17.0.0"): + if LooseVersion(pa.__version__) < LooseVersion("21.0.0"): del pyspark.sql.dataframe.DataFrame.mapInArrow.__doc__ globs["spark"] = (