From 3eb46f73b4083afe5ed0220a4205e3123202664a Mon Sep 17 00:00:00 2001 From: Jitka Halova Date: Thu, 30 Apr 2026 13:32:53 +0200 Subject: [PATCH] Fix pull-through failing to check repository when package was not in remote --- CHANGES/1004.bugfix | 1 + pulp_python/app/pypi/views.py | 52 ++++++++++--------- .../tests/functional/api/test_full_mirror.py | 34 ++++++++++-- 3 files changed, 58 insertions(+), 29 deletions(-) create mode 100644 CHANGES/1004.bugfix diff --git a/CHANGES/1004.bugfix b/CHANGES/1004.bugfix new file mode 100644 index 000000000..8d1132fe0 --- /dev/null +++ b/CHANGES/1004.bugfix @@ -0,0 +1 @@ +Fixed pull-through caching not checking the repository if package was not present on remote. diff --git a/pulp_python/app/pypi/views.py b/pulp_python/app/pypi/views.py index ac62c6748..863795e31 100644 --- a/pulp_python/app/pypi/views.py +++ b/pulp_python/app/pypi/views.py @@ -15,6 +15,7 @@ HttpResponse, HttpResponseBadRequest, HttpResponseForbidden, + HttpResponseNotFound, StreamingHttpResponse, ) from django.shortcuts import redirect @@ -255,7 +256,7 @@ def parse_package(release_package): rfilter = get_remote_package_filter(remote) if not rfilter.filter_project(package): - raise Http404(f"{package} does not exist.") + return {} url = remote.get_remote_artifact_url(f"simple/{package}/") remote.headers = remote.headers or [] @@ -263,19 +264,19 @@ def parse_package(release_package): downloader = remote.get_downloader(url=url, max_retries=1) try: d = downloader.fetch() - except ClientError: - return HttpResponse(f"Failed to fetch {package} from {remote.url}.", status=502) - except TimeoutException: - return HttpResponse(f"{remote.url} timed out while fetching {package}.", status=504) + except (ClientError, TimeoutException): + log.info(f"Failed to fetch {package} simple page from {remote.url}") + return {} if d.headers["content-type"] == "application/vnd.pypi.simple.v1+json": page = ProjectPage.from_json_data(json.load(open(d.path, "rb")), base_url=url) else: page = ProjectPage.from_html(package, open(d.path, "rb").read(), base_url=url) - packages = [ - parse_package(p) for p in page.packages if rfilter.filter_release(package, p.version) - ] - return HttpResponse(write_simple_detail(package, packages)) + return { + p.filename: parse_package(p) + for p in page.packages + if rfilter.filter_release(package, p.version) + } @extend_schema(operation_id="pypi_simple_package_read", summary="Get package simple page") def retrieve(self, request, path, package): @@ -283,24 +284,25 @@ def retrieve(self, request, path, package): repo_ver, content = self.get_rvc() # Should I redirect if the normalized name is different? normalized = canonicalize_name(package) + releases = {} if self.distribution.remote: - return self.pull_through_package_simple(normalized, path, self.distribution.remote) - if self.should_redirect(repo_version=repo_ver): + releases = self.pull_through_package_simple(normalized, path, self.distribution.remote) + elif self.should_redirect(repo_version=repo_ver): return redirect(urljoin(self.base_content_url, f"{path}/simple/{normalized}/")) - packages = ( - content.filter(name__normalize=normalized) - .values_list("filename", "sha256", "name") - .iterator() - ) - try: - present = next(packages) - except StopIteration: - raise Http404(f"{normalized} does not exist.") - else: - packages = chain([present], packages) - name = present[2] - releases = ((f, urljoin(self.base_content_url, f"{path}/{f}"), d) for f, d, _ in packages) - return StreamingHttpResponse(write_simple_detail(name, releases, streamed=True)) + if content: + packages = content.filter(name__normalize=normalized).values("filename", "sha256") + local_releases = { + p["filename"]: ( + p["filename"], + urljoin(self.base_content_url, f"{path}/{p['filename']}"), + p["sha256"], + ) + for p in packages + } + releases.update(local_releases) + if not releases: + return HttpResponseNotFound(f"{normalized} does not exist.") + return HttpResponse(write_simple_detail(normalized, releases.values())) @extend_schema( request=PackageUploadSerializer, diff --git a/pulp_python/tests/functional/api/test_full_mirror.py b/pulp_python/tests/functional/api/test_full_mirror.py index ea07bab2d..247fe3119 100644 --- a/pulp_python/tests/functional/api/test_full_mirror.py +++ b/pulp_python/tests/functional/api/test_full_mirror.py @@ -66,7 +66,7 @@ def test_pull_through_filter(python_remote_factory, python_distribution_factory) r = requests.get(f"{distro.base_url}simple/pulpcore/") assert r.status_code == 404 - assert r.json() == {"detail": "pulpcore does not exist."} + assert r.text == "pulpcore does not exist." r = requests.get(f"{distro.base_url}simple/shelf-reader/") assert r.status_code == 200 @@ -86,11 +86,11 @@ def test_pull_through_filter(python_remote_factory, python_distribution_factory) r = requests.get(f"{distro.base_url}simple/django/") assert r.status_code == 404 - assert r.json() == {"detail": "django does not exist."} + assert r.text == "django does not exist." r = requests.get(f"{distro.base_url}simple/pulpcore/") - assert r.status_code == 502 - assert r.text == f"Failed to fetch pulpcore from {remote.url}." + assert r.status_code == 404 + assert r.text == "pulpcore does not exist." r = requests.get(f"{distro.base_url}simple/shelf-reader/") assert r.status_code == 200 @@ -138,3 +138,29 @@ def test_pull_through_with_repo( assert r.status_code == 200 tasks = pulpcore_bindings.TasksApi.list(reserved_resources=repo.prn) assert tasks.count == 3 + + +@pytest.mark.parallel +def test_pull_through_local_only( + python_remote_factory, python_distribution_factory, python_repo_with_sync +): + """Tests that pull-through checks the repository if the package is not present on the remote.""" + remote = python_remote_factory(url=PYPI_URL, includes=["pulpcore"]) + repo = python_repo_with_sync(remote=remote) + remote2 = python_remote_factory(includes=[]) # Fixtures does not have pulpcore + distro = python_distribution_factory(repository=repo.pulp_href, remote=remote2.pulp_href) + + url = f"{distro.base_url}simple/pulpcore/" + r = requests.get(url) + assert r.status_code == 200 + assert "?redirect=" not in r.text + + url = f"{distro.base_url}simple/shelf-reader/" + r = requests.get(url) + assert r.status_code == 200 + assert "?redirect=" in r.text + + url = f"{distro.base_url}simple/pulp_python/" + r = requests.get(url) + assert r.status_code == 404 + assert r.text == "pulp-python does not exist."