diff --git a/osism/commands/manage.py b/osism/commands/manage.py index 9fed7681..6af7b424 100644 --- a/osism/commands/manage.py +++ b/osism/commands/manage.py @@ -1,13 +1,61 @@ # SPDX-License-Identifier: Apache-2.0 +import re from re import findall from urllib.parse import urljoin from cliff.command import Command from loguru import logger -import requests from osism import utils +from osism.utils.http import fetch_text + +_MARKER_DATE_RE = re.compile(r"\d{4}-\d{2}-\d{2}") +_QCOW2_FILENAME_RE = re.compile(r"\S+\.qcow2") +_SHA256_RE = re.compile(r"[0-9a-f]{64}") + + +def _is_sha256(body: str) -> bool: + """Lowercase-hex sha256, per sha256sum(1) output. + + The OSISM image publishing pipeline produces .CHECKSUM and .sha256 files + via sha256sum, which emits the digest as lowercase hex. The existing + parsing code passes the digest verbatim to image-manager as + sha256:, with no case normalization, so accepting uppercase + here would only paper over a downstream mismatch. + """ + parts = body.strip().split() + return bool(parts) and bool(_SHA256_RE.fullmatch(parts[0])) + + +def _validate_marker(body: str) -> bool: + """Generic marker contract: ``YYYY-MM-DD .qcow2``. + + Intentionally does NOT enforce a specific image-name prefix. CI + exercises a narrow slice of OSISM image variants; production + deployments may publish images with names this code has never seen. + """ + parts = body.strip().split() + return ( + len(parts) >= 2 + and bool(_MARKER_DATE_RE.fullmatch(parts[0])) + and bool(_QCOW2_FILENAME_RE.fullmatch(parts[1])) + ) + + +def _fetch_image_info(base_url, marker_url): + marker_body = fetch_text(marker_url, validate=_validate_marker) + date, image_filename = marker_body.strip().split()[:2] + logger.info(f"date: {date}") + logger.info(f"image: {image_filename}") + url = urljoin(base_url, image_filename) + logger.info(f"url: {url}") + logger.info(f"checksum_url: {url}.CHECKSUM") + checksum_body = fetch_text(f"{url}.CHECKSUM", validate=_is_sha256) + checksum = checksum_body.strip().split()[0] + logger.info(f"checksum: {checksum}") + return date, image_filename, url, checksum + SUPPORTED_CLUSTERAPI_GARDENER_K8S_IMAGES = ["1.33"] SUPPORTED_CLUSTERAPI_K8S_IMAGES = ["1.32", "1.33", "1.34"] @@ -72,27 +120,16 @@ def take_action(self, parsed_args): result = [] for kubernetes_release in supported_cluterapi_k8s_images: - url = urljoin(base_url, f"last-{kubernetes_release}") - - response = requests.get(url) - splitted = response.text.strip().split(" ") - - logger.info(f"date: {splitted[0]}") - logger.info(f"image: {splitted[1]}") + marker_url = urljoin(base_url, f"last-{kubernetes_release}") + date, image_filename, url, checksum = _fetch_image_info( + base_url, marker_url + ) r = findall( - r".*ubuntu-[0-9][02468]04-kube-v(.*\..*\..*).qcow2", splitted[1] + r".*ubuntu-[0-9][02468]04-kube-v(.*\..*\..*).qcow2", image_filename ) logger.info(f"version: {r[0].strip()}") - url = urljoin(base_url, splitted[1]) - logger.info(f"url: {url}") - - logger.info(f"checksum_url: {url}.CHECKSUM") - response_checksum = requests.get(f"{url}.CHECKSUM") - splitted_checksum = response_checksum.text.strip().split(" ") - logger.info(f"checksum: {splitted_checksum[0]}") - from jinja2 import Template from osism.data import TEMPLATE_IMAGE_CLUSTERAPI @@ -101,9 +138,9 @@ def take_action(self, parsed_args): [ template.render( image_url=url, - image_checksum=f"sha256:{splitted_checksum[0]}", + image_checksum=f"sha256:{checksum}", image_version=r[0].strip(), - image_builddate=splitted[0], + image_builddate=date, ) ] ) @@ -193,27 +230,16 @@ def take_action(self, parsed_args): result = [] for kubernetes_release in supported_cluterapi_gardener_k8s_images: - url = urljoin(base_url, f"last-{kubernetes_release}-gardener") - - response = requests.get(url) - splitted = response.text.strip().split(" ") - - logger.info(f"date: {splitted[0]}") - logger.info(f"image: {splitted[1]}") + marker_url = urljoin(base_url, f"last-{kubernetes_release}-gardener") + date, image_filename, url, checksum = _fetch_image_info( + base_url, marker_url + ) r = findall( - r".*ubuntu-[0-9][02468]04-kube-v(.*\..*\..*)\.qcow2", splitted[1] + r".*ubuntu-[0-9][02468]04-kube-v(.*\..*\..*)\.qcow2", image_filename ) logger.info(f"version: {r[0].strip()}") - url = urljoin(base_url, splitted[1]) - logger.info(f"url: {url}") - - logger.info(f"checksum_url: {url}.CHECKSUM") - response_checksum = requests.get(f"{url}.CHECKSUM") - splitted_checksum = response_checksum.text.strip().split(" ") - logger.info(f"checksum: {splitted_checksum[0]}") - from jinja2 import Template from osism.data import TEMPLATE_IMAGE_CLUSTERAPI_GARDENER @@ -222,9 +248,9 @@ def take_action(self, parsed_args): [ template.render( image_url=url, - image_checksum=f"sha256:{splitted_checksum[0]}", + image_checksum=f"sha256:{checksum}", image_version=r[0].strip(), - image_builddate=splitted[0], + image_builddate=date, ) ] ) @@ -319,11 +345,10 @@ def take_action(self, parsed_args): ) logger.info(f"url: {url}") - # Get checksum file checksum_url = f"{url}.sha256" logger.info(f"checksum_url: {checksum_url}") - response_checksum = requests.get(checksum_url) - checksum = response_checksum.text.strip().split()[0] + checksum_body = fetch_text(checksum_url, validate=_is_sha256) + checksum = checksum_body.strip().split()[0] logger.info(f"checksum: {checksum}") from jinja2 import Template @@ -406,22 +431,8 @@ def take_action(self, parsed_args): client = docker.from_env() container = client.containers.get("kolla-ansible") openstack_release = container.labels["de.osism.release.openstack"] - url = urljoin(base_url, f"last-{openstack_release}") - - response = requests.get(url) - splitted = response.text.strip().split(" ") - - logger.info(f"date: {splitted[0]}") - logger.info(f"image: {splitted[1]}") - - url = urljoin(base_url, splitted[1]) - logger.info(f"url: {url}") - - logger.info(f"checksum_url: {url}.CHECKSUM") - response_checksum = requests.get(f"{url}.CHECKSUM") - logger.info(f"checksum_url_status: {response_checksum.status_code}") - splitted_checksum = response_checksum.text.strip().split(" ") - logger.info(f"checksum: {splitted_checksum[0]}") + marker_url = urljoin(base_url, f"last-{openstack_release}") + date, _, url, checksum = _fetch_image_info(base_url, marker_url) template = Template(TEMPLATE_IMAGE_OCTAVIA) result = [] @@ -429,9 +440,9 @@ def take_action(self, parsed_args): [ template.render( image_url=url, - image_checksum=f"sha256:{splitted_checksum[0]}", - image_version=splitted[0], - image_builddate=splitted[0], + image_checksum=f"sha256:{checksum}", + image_version=date, + image_builddate=date, ) ] ) diff --git a/osism/utils/http.py b/osism/utils/http.py new file mode 100644 index 00000000..2f50f28a --- /dev/null +++ b/osism/utils/http.py @@ -0,0 +1,91 @@ +# SPDX-License-Identifier: Apache-2.0 + +"""HTTP fetch helper with retry, content validation, and structured logging.""" + +from __future__ import annotations + +import time +from typing import Callable, Optional + +import requests +from loguru import logger + +RETRYABLE_STATUSES = {408, 429} | set(range(500, 600)) + + +def fetch_text( + url: str, + *, + delays: tuple[float, ...] = (2.0, 4.0, 8.0), + validate: Optional[Callable[[str], bool]] = None, +) -> str: + """Fetch ``url`` as text with retry on transient failures.""" + if not delays: + raise ValueError( + "fetch_text requires non-empty delays; the helper exists to retry" + ) + + attempts = len(delays) + 1 + last_failure: Optional[BaseException] = None + + for n in range(1, attempts + 1): + logger.info(f"fetch_text url={url} attempt={n}/{attempts}") + try: + response = requests.get(url) + response.raise_for_status() + except requests.HTTPError as exc: + status = exc.response.status_code if exc.response is not None else 0 + if status not in RETRYABLE_STATUSES: + logger.info( + f"fetch_text url={url} attempt={n}/{attempts} status={status} non-retryable" + ) + raise + last_failure = exc + if n < attempts: + logger.info( + f"fetch_text url={url} attempt={n}/{attempts} status={status} " + f"retrying in {delays[n - 1]}s" + ) + time.sleep(delays[n - 1]) + continue + logger.info( + f"fetch_text url={url} attempt={n}/{attempts} status={status} giving up" + ) + raise + except requests.RequestException as exc: + last_failure = exc + if n < attempts: + logger.info( + f"fetch_text url={url} attempt={n}/{attempts} " + f"error={type(exc).__name__}({exc}) retrying in {delays[n - 1]}s" + ) + time.sleep(delays[n - 1]) + continue + logger.info( + f"fetch_text url={url} attempt={n}/{attempts} " + f"error={type(exc).__name__}({exc}) giving up" + ) + raise + + status = response.status_code + text = response.text + if validate is not None and not validate(text): + last_failure = ValueError(f"fetch_text validate rejected body for {url!r}") + excerpt = text[:40].replace("\n", "\\n") + if n < attempts: + logger.info( + f"fetch_text url={url} attempt={n}/{attempts} status={status} " + f"invalid_body={excerpt!r} retrying in {delays[n - 1]}s" + ) + time.sleep(delays[n - 1]) + continue + logger.info( + f"fetch_text url={url} attempt={n}/{attempts} status={status} " + f"invalid_body={excerpt!r} giving up" + ) + raise last_failure + + logger.info(f"fetch_text url={url} attempt={n}/{attempts} status={status} ok") + return text + + raise RuntimeError("fetch_text loop exited without return or raise") diff --git a/tests/unit/commands/__init__.py b/tests/unit/commands/__init__.py new file mode 100644 index 00000000..98813136 --- /dev/null +++ b/tests/unit/commands/__init__.py @@ -0,0 +1 @@ +# SPDX-License-Identifier: Apache-2.0 diff --git a/tests/unit/commands/test_manage_validators.py b/tests/unit/commands/test_manage_validators.py new file mode 100644 index 00000000..d3f9fd55 --- /dev/null +++ b/tests/unit/commands/test_manage_validators.py @@ -0,0 +1,80 @@ +# SPDX-License-Identifier: Apache-2.0 + +"""Unit tests for the marker and sha256 body validators in osism.commands.manage.""" + +from osism.commands.manage import _is_sha256, _validate_marker + +# --- Marker validator (M1-M9) --- + + +def test_m1_validates_octavia_marker(): + assert ( + _validate_marker("2026-04-12 octavia-amphora-haproxy-2024.2.20260412.qcow2") + is True + ) + + +def test_m2_validates_capi_marker(): + assert _validate_marker("2026-04-12 ubuntu-2404-kube-v1.33.1.qcow2") is True + + +def test_m3_rejects_xml_error_body(): + body = '\nInternalError' + assert _validate_marker(body) is False + + +def test_m4_rejects_empty_body(): + assert _validate_marker("") is False + + +def test_m5_rejects_single_token(): + assert _validate_marker("2026-04-12") is False + + +def test_m6_rejects_wrong_suffix(): + assert _validate_marker("2026-04-12 random.txt") is False + + +def test_m7_rejects_wrong_date_shape(): + assert _validate_marker("yesterday octavia-amphora-foo.qcow2") is False + + +def test_m8_accepts_unfamiliar_qcow2_name(): + """Production-diversity: validator must accept names CI has never seen.""" + assert _validate_marker("2026-04-12 some-future-amphora-variant.qcow2") is True + + +def test_m9_rejects_filename_with_internal_whitespace(): + """Second token must be a single \\S+\\.qcow2 token.""" + assert _validate_marker("2026-04-12 image-with-spaces in-name.qcow2") is False + + +# --- Checksum validator (S1-S6) --- + + +def test_s1_accepts_lowercase_hex_sha256(): + body = "8ce3f3" + "a" * 58 + " octavia-amphora-haproxy-2024.2.20260412.qcow2" + assert _is_sha256(body) is True + + +def test_s2_rejects_xml_body(): + assert _is_sha256(' ') is False + + +def test_s3_rejects_empty_body(): + assert _is_sha256("") is False + + +def test_s4_rejects_non_hex_64_char_first_token(): + assert ( + _is_sha256("not-hex-but-64-chars-long-XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX") + is False + ) + + +def test_s5_rejects_too_short_hex(): + assert _is_sha256("abc123") is False + + +def test_s6_rejects_uppercase_hex(): + assert _is_sha256("ABCDEF" + "0" * 58) is False diff --git a/tests/unit/commands/test_manage_wiring.py b/tests/unit/commands/test_manage_wiring.py new file mode 100644 index 00000000..79e42fad --- /dev/null +++ b/tests/unit/commands/test_manage_wiring.py @@ -0,0 +1,149 @@ +# SPDX-License-Identifier: Apache-2.0 + +"""Wiring tests: verify each Image* command passes the correct validator to fetch_text.""" + +from unittest.mock import MagicMock, patch + +from osism.commands import manage +from osism.commands.manage import _is_sha256, _validate_marker + + +def _stub_docker(release: str = "2024.1") -> MagicMock: + docker_client = MagicMock() + container = MagicMock() + container.labels = {"de.osism.release.openstack": release} + docker_client.containers.get.return_value = container + return docker_client + + +def _stub_args(**overrides) -> MagicMock: + args = MagicMock() + args.no_wait = True # short-circuit handle_task waiting + args.cloud = "octavia" + args.base_url = "https://example.com/octavia/" + args.dry_run = False + args.tag = None + args.filter = None + for k, v in overrides.items(): + setattr(args, k, v) + return args + + +def test_w1_octavia_wires_validators_to_call_sites(): + cmd = manage.ImageOctavia(MagicMock(), MagicMock()) + args = _stub_args() + + fake_marker_body = "2026-04-12 octavia-amphora-haproxy-2024.1.20260412.qcow2" + fake_checksum_body = ("a" * 64) + " octavia-amphora-haproxy-2024.1.20260412.qcow2" + + with patch.object(manage.utils, "check_task_lock_and_exit"), patch( + "docker.from_env", return_value=_stub_docker(release="2024.1") + ), patch("osism.commands.manage.fetch_text") as mock_fetch, patch( + "osism.tasks.openstack.image_manager" + ) as mock_im, patch( + "osism.tasks.handle_task" + ) as mock_handle: + mock_fetch.side_effect = [fake_marker_body, fake_checksum_body] + mock_im.si.return_value.apply_async.return_value = MagicMock(task_id="x") + mock_handle.return_value = 0 + + cmd.take_action(args) + + assert mock_fetch.call_count == 2 + marker_call, checksum_call = mock_fetch.call_args_list + + # Marker fetch + assert marker_call.args[0] == "https://example.com/octavia/last-2024.1" + assert marker_call.kwargs["validate"] is _validate_marker + + # Checksum fetch + assert checksum_call.args[0].endswith(".CHECKSUM") + assert "octavia-amphora-haproxy-2024.1.20260412.qcow2" in checksum_call.args[0] + assert checksum_call.kwargs["validate"] is _is_sha256 + + +def test_w2_clusterapi_wires_validators_to_call_sites(): + cmd = manage.ImageClusterapi(MagicMock(), MagicMock()) + args = _stub_args( + cloud="admin", + base_url="https://example.com/capi/", + filter="1.33", # restrict to a single release for a deterministic call count + ) + + fake_marker_body = "2026-04-12 ubuntu-2404-kube-v1.33.1.qcow2" + fake_checksum_body = ("a" * 64) + " ubuntu-2404-kube-v1.33.1.qcow2" + + with patch.object(manage.utils, "check_task_lock_and_exit"), patch( + "osism.commands.manage.fetch_text" + ) as mock_fetch, patch("osism.tasks.openstack.image_manager") as mock_im, patch( + "osism.tasks.handle_task" + ) as mock_handle: + mock_fetch.side_effect = [fake_marker_body, fake_checksum_body] + mock_im.si.return_value.apply_async.return_value = MagicMock(task_id="x") + mock_handle.return_value = 0 + + cmd.take_action(args) + + assert mock_fetch.call_count == 2 + marker_call, checksum_call = mock_fetch.call_args_list + assert marker_call.args[0] == "https://example.com/capi/last-1.33" + assert marker_call.kwargs["validate"] is _validate_marker + assert checksum_call.args[0].endswith(".CHECKSUM") + assert checksum_call.kwargs["validate"] is _is_sha256 + + +def test_w3_clusterapi_gardener_wires_validators_to_call_sites(): + cmd = manage.ImageClusterapiGardener(MagicMock(), MagicMock()) + args = _stub_args( + cloud="admin", + base_url="https://example.com/capi/", + filter="1.33", + ) + + fake_marker_body = "2026-04-12 ubuntu-2404-kube-v1.33.1.qcow2" + fake_checksum_body = ("a" * 64) + " ubuntu-2404-kube-v1.33.1.qcow2" + + with patch.object(manage.utils, "check_task_lock_and_exit"), patch( + "osism.commands.manage.fetch_text" + ) as mock_fetch, patch("osism.tasks.openstack.image_manager") as mock_im, patch( + "osism.tasks.handle_task" + ) as mock_handle: + mock_fetch.side_effect = [fake_marker_body, fake_checksum_body] + mock_im.si.return_value.apply_async.return_value = MagicMock(task_id="x") + mock_handle.return_value = 0 + + cmd.take_action(args) + + assert mock_fetch.call_count == 2 + marker_call, checksum_call = mock_fetch.call_args_list + assert marker_call.args[0] == "https://example.com/capi/last-1.33-gardener" + assert marker_call.kwargs["validate"] is _validate_marker + assert checksum_call.kwargs["validate"] is _is_sha256 + + +def test_w4_gardenlinux_wires_sha256_validator(): + cmd = manage.ImageGardenlinux(MagicMock(), MagicMock()) + args = _stub_args( + cloud="admin", + base_url="https://example.com/gardenlinux/", + filter="1877.7", # one entry from SUPPORTED_GARDENLINUX_VERSIONS + ) + + fake_checksum_body = ("a" * 64) + " openstack-gardener_prod-amd64-1877.7.qcow2" + + with patch.object(manage.utils, "check_task_lock_and_exit"), patch( + "osism.commands.manage.fetch_text" + ) as mock_fetch, patch("osism.tasks.openstack.image_manager") as mock_im, patch( + "osism.tasks.handle_task" + ) as mock_handle: + mock_fetch.return_value = fake_checksum_body + mock_im.si.return_value.apply_async.return_value = MagicMock(task_id="x") + mock_handle.return_value = 0 + + cmd.take_action(args) + + assert mock_fetch.call_count == 1 + (call,) = mock_fetch.call_args_list + assert call.args[0].endswith(".qcow2.sha256") + assert "1877.7" in call.args[0] + assert call.kwargs["validate"] is _is_sha256 diff --git a/tests/unit/utils/test_http.py b/tests/unit/utils/test_http.py new file mode 100644 index 00000000..be789d1f --- /dev/null +++ b/tests/unit/utils/test_http.py @@ -0,0 +1,273 @@ +# SPDX-License-Identifier: Apache-2.0 + +"""Unit tests for ``osism.utils.http.fetch_text`` retry and validation behavior.""" + +import logging as _logging +from unittest.mock import MagicMock, patch + +import pytest +import requests +from loguru import logger as _loguru_logger + +from osism.utils.http import fetch_text + + +def _make_response(status_code: int, text: str) -> MagicMock: + resp = MagicMock() + resp.status_code = status_code + resp.text = text + resp.raise_for_status = MagicMock() + if status_code >= 400: + resp.raise_for_status.side_effect = requests.HTTPError(response=resp) + return resp + + +def test_fetch_text_first_attempt_success(): + """Test #1: Happy path, first attempt succeeds. No sleep.""" + with patch("osism.utils.http.requests.get") as mock_get, patch( + "osism.utils.http.time.sleep" + ) as mock_sleep: + mock_get.return_value = _make_response(200, "hello\n") + result = fetch_text("https://example.com/x") + assert result == "hello\n" + assert mock_get.call_count == 1 + assert mock_sleep.call_count == 0 + + +def test_fetch_text_503_then_200(): + """Test #2: 503 then 200. One sleep at delays[0].""" + with patch("osism.utils.http.requests.get") as mock_get, patch( + "osism.utils.http.time.sleep" + ) as mock_sleep: + mock_get.side_effect = [ + _make_response(503, "'), + _make_response(200, "good-body"), + ] + + def is_good(b): + return b == "good-body" + + assert ( + fetch_text("https://example.com/x", delays=(1.0,), validate=is_good) + == "good-body" + ) + + +def test_fetch_text_all_validate_fail_raises_value_error(): + """Test #6: All attempts return 200 with invalid body. Raises ValueError.""" + with patch("osism.utils.http.requests.get") as mock_get, patch( + "osism.utils.http.time.sleep" + ): + mock_get.return_value = _make_response(200, " 200 invalid -> ConnectionError -> 200 invalid -> ValueError.""" + with patch("osism.utils.http.requests.get") as mock_get, patch( + "osism.utils.http.time.sleep" + ): + mock_get.side_effect = [ + _make_response(503, " 200 invalid -> 200 invalid -> ConnectionError -> ConnectionError.""" + with patch("osism.utils.http.requests.get") as mock_get, patch( + "osism.utils.http.time.sleep" + ): + mock_get.side_effect = [ + _make_response(503, " ConnectionError -> 200 invalid -> 503 -> HTTPError(503).""" + with patch("osism.utils.http.requests.get") as mock_get, patch( + "osism.utils.http.time.sleep" + ): + mock_get.side_effect = [ + _make_response(200, " stdlib logging so pytest's caplog fixture sees the records. + + Returns the handler_id so the caller can remove it during cleanup. + """ + return _loguru_logger.add( + lambda msg: _logging.getLogger("loguru").info(msg.record["message"]), + format="{message}", + ) + + +def test_fetch_text_logs_two_lines_per_attempt(caplog): + """Test #8: Two log lines per attempt; outcome line for winner has ' ok'.""" + handler_id = _capture_loguru_messages() + try: + with patch("osism.utils.http.requests.get") as mock_get, patch( + "osism.utils.http.time.sleep" + ): + mock_get.side_effect = [ + _make_response(503, " four lines + assert len(fetch_text_lines) == 4 + # First attempt: start + retry-on-status + assert "attempt=1/2" in fetch_text_lines[0] + assert "status=503" in fetch_text_lines[1] + assert "retrying" in fetch_text_lines[1] + # Second attempt: start + ok + assert "attempt=2/2" in fetch_text_lines[2] + assert " ok" in fetch_text_lines[3] + assert "attempt=2/2" in fetch_text_lines[3] + + +def test_fetch_text_404_emits_non_retryable_outcome_log(caplog): + """Test #15: 404 emits a start line and a status=404 non-retryable outcome line.""" + handler_id = _capture_loguru_messages() + try: + with patch("osism.utils.http.requests.get") as mock_get, patch( + "osism.utils.http.time.sleep" + ) as mock_sleep: + mock_get.return_value = _make_response(404, "not found") + with caplog.at_level(_logging.INFO, logger="loguru"): + with pytest.raises(requests.HTTPError): + fetch_text("https://example.com/x", delays=(2.0, 4.0, 8.0)) + assert mock_sleep.call_count == 0 + finally: + _loguru_logger.remove(handler_id) + + fetch_text_lines = [r.message for r in caplog.records if "fetch_text" in r.message] + # Exactly two lines: start + non-retryable outcome + assert len(fetch_text_lines) == 2 + assert "attempt=1/4" in fetch_text_lines[0] + assert "status=404" in fetch_text_lines[1] + assert "non-retryable" in fetch_text_lines[1]