From 888c52f5149e707578f0539a372cd6f49b92c2a4 Mon Sep 17 00:00:00 2001 From: Andrew Ma <136692+ajma@users.noreply.github.com> Date: Tue, 7 Apr 2026 15:47:52 -0700 Subject: [PATCH] fix: Clean up temporary PyPI artifact files after use write_packages_config() creates temp JSON files that were never deleted, accumulating on disk over many addArtifacts() calls. Now removes the file in a finally block after addArtifact completes. --- google/cloud/dataproc_spark_connect/session.py | 13 ++++++++++--- 1 file changed, 10 insertions(+), 3 deletions(-) diff --git a/google/cloud/dataproc_spark_connect/session.py b/google/cloud/dataproc_spark_connect/session.py index 7ad24fe..6af6744 100644 --- a/google/cloud/dataproc_spark_connect/session.py +++ b/google/cloud/dataproc_spark_connect/session.py @@ -1214,10 +1214,17 @@ def addArtifacts( if pypi: artifacts = PyPiArtifacts(set(artifact)) logger.debug("Making addArtifact call to install packages") - self.addArtifact( - artifacts.write_packages_config(self._active_s8s_session_uuid), - file=True, + config_path = artifacts.write_packages_config( + self._active_s8s_session_uuid ) + try: + self.addArtifact(config_path, file=True) + finally: + try: + os.remove(config_path) + os.rmdir(os.path.dirname(config_path)) + except OSError: + pass else: super().addArtifacts( *artifact, pyfile=pyfile, archive=archive, file=file