Skip to content
Draft
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
129 changes: 70 additions & 59 deletions osism/commands/manage.py
Original file line number Diff line number Diff line change
@@ -1,13 +1,61 @@
# SPDX-License-Identifier: Apache-2.0

import re
from re import findall
from urllib.parse import urljoin

from cliff.command import Command
from loguru import logger
import requests

from osism import utils
from osism.utils.http import fetch_text

_MARKER_DATE_RE = re.compile(r"\d{4}-\d{2}-\d{2}")
_QCOW2_FILENAME_RE = re.compile(r"\S+\.qcow2")
_SHA256_RE = re.compile(r"[0-9a-f]{64}")


def _is_sha256(body: str) -> bool:
"""Lowercase-hex sha256, per sha256sum(1) output.

The OSISM image publishing pipeline produces .CHECKSUM and .sha256 files
via sha256sum, which emits the digest as lowercase hex. The existing
parsing code passes the digest verbatim to image-manager as
sha256:<digest>, with no case normalization, so accepting uppercase
here would only paper over a downstream mismatch.
"""
parts = body.strip().split()
return bool(parts) and bool(_SHA256_RE.fullmatch(parts[0]))


def _validate_marker(body: str) -> bool:
"""Generic marker contract: ``YYYY-MM-DD <filename>.qcow2``.

Intentionally does NOT enforce a specific image-name prefix. CI
exercises a narrow slice of OSISM image variants; production
deployments may publish images with names this code has never seen.
"""
parts = body.strip().split()
return (
len(parts) >= 2
and bool(_MARKER_DATE_RE.fullmatch(parts[0]))
and bool(_QCOW2_FILENAME_RE.fullmatch(parts[1]))
)


def _fetch_image_info(base_url, marker_url):
marker_body = fetch_text(marker_url, validate=_validate_marker)
date, image_filename = marker_body.strip().split()[:2]
logger.info(f"date: {date}")
logger.info(f"image: {image_filename}")
url = urljoin(base_url, image_filename)
logger.info(f"url: {url}")
logger.info(f"checksum_url: {url}.CHECKSUM")
checksum_body = fetch_text(f"{url}.CHECKSUM", validate=_is_sha256)
checksum = checksum_body.strip().split()[0]
logger.info(f"checksum: {checksum}")
return date, image_filename, url, checksum


SUPPORTED_CLUSTERAPI_GARDENER_K8S_IMAGES = ["1.33"]
SUPPORTED_CLUSTERAPI_K8S_IMAGES = ["1.32", "1.33", "1.34"]
Expand Down Expand Up @@ -72,27 +120,16 @@ def take_action(self, parsed_args):

result = []
for kubernetes_release in supported_cluterapi_k8s_images:
url = urljoin(base_url, f"last-{kubernetes_release}")

response = requests.get(url)
splitted = response.text.strip().split(" ")

logger.info(f"date: {splitted[0]}")
logger.info(f"image: {splitted[1]}")
marker_url = urljoin(base_url, f"last-{kubernetes_release}")
date, image_filename, url, checksum = _fetch_image_info(
base_url, marker_url
)

r = findall(
r".*ubuntu-[0-9][02468]04-kube-v(.*\..*\..*).qcow2", splitted[1]
r".*ubuntu-[0-9][02468]04-kube-v(.*\..*\..*).qcow2", image_filename
)
logger.info(f"version: {r[0].strip()}")

url = urljoin(base_url, splitted[1])
logger.info(f"url: {url}")

logger.info(f"checksum_url: {url}.CHECKSUM")
response_checksum = requests.get(f"{url}.CHECKSUM")
splitted_checksum = response_checksum.text.strip().split(" ")
logger.info(f"checksum: {splitted_checksum[0]}")

from jinja2 import Template
from osism.data import TEMPLATE_IMAGE_CLUSTERAPI

Expand All @@ -101,9 +138,9 @@ def take_action(self, parsed_args):
[
template.render(
image_url=url,
image_checksum=f"sha256:{splitted_checksum[0]}",
image_checksum=f"sha256:{checksum}",
image_version=r[0].strip(),
image_builddate=splitted[0],
image_builddate=date,
)
]
)
Expand Down Expand Up @@ -193,27 +230,16 @@ def take_action(self, parsed_args):

result = []
for kubernetes_release in supported_cluterapi_gardener_k8s_images:
url = urljoin(base_url, f"last-{kubernetes_release}-gardener")

response = requests.get(url)
splitted = response.text.strip().split(" ")

logger.info(f"date: {splitted[0]}")
logger.info(f"image: {splitted[1]}")
marker_url = urljoin(base_url, f"last-{kubernetes_release}-gardener")
date, image_filename, url, checksum = _fetch_image_info(
base_url, marker_url
)

r = findall(
r".*ubuntu-[0-9][02468]04-kube-v(.*\..*\..*)\.qcow2", splitted[1]
r".*ubuntu-[0-9][02468]04-kube-v(.*\..*\..*)\.qcow2", image_filename
)
logger.info(f"version: {r[0].strip()}")

url = urljoin(base_url, splitted[1])
logger.info(f"url: {url}")

logger.info(f"checksum_url: {url}.CHECKSUM")
response_checksum = requests.get(f"{url}.CHECKSUM")
splitted_checksum = response_checksum.text.strip().split(" ")
logger.info(f"checksum: {splitted_checksum[0]}")

from jinja2 import Template
from osism.data import TEMPLATE_IMAGE_CLUSTERAPI_GARDENER

Expand All @@ -222,9 +248,9 @@ def take_action(self, parsed_args):
[
template.render(
image_url=url,
image_checksum=f"sha256:{splitted_checksum[0]}",
image_checksum=f"sha256:{checksum}",
image_version=r[0].strip(),
image_builddate=splitted[0],
image_builddate=date,
)
]
)
Expand Down Expand Up @@ -319,11 +345,10 @@ def take_action(self, parsed_args):
)
logger.info(f"url: {url}")

# Get checksum file
checksum_url = f"{url}.sha256"
logger.info(f"checksum_url: {checksum_url}")
response_checksum = requests.get(checksum_url)
checksum = response_checksum.text.strip().split()[0]
checksum_body = fetch_text(checksum_url, validate=_is_sha256)
checksum = checksum_body.strip().split()[0]
logger.info(f"checksum: {checksum}")

from jinja2 import Template
Expand Down Expand Up @@ -406,32 +431,18 @@ def take_action(self, parsed_args):
client = docker.from_env()
container = client.containers.get("kolla-ansible")
openstack_release = container.labels["de.osism.release.openstack"]
url = urljoin(base_url, f"last-{openstack_release}")

response = requests.get(url)
splitted = response.text.strip().split(" ")

logger.info(f"date: {splitted[0]}")
logger.info(f"image: {splitted[1]}")

url = urljoin(base_url, splitted[1])
logger.info(f"url: {url}")

logger.info(f"checksum_url: {url}.CHECKSUM")
response_checksum = requests.get(f"{url}.CHECKSUM")
logger.info(f"checksum_url_status: {response_checksum.status_code}")
splitted_checksum = response_checksum.text.strip().split(" ")
logger.info(f"checksum: {splitted_checksum[0]}")
marker_url = urljoin(base_url, f"last-{openstack_release}")
date, _, url, checksum = _fetch_image_info(base_url, marker_url)

template = Template(TEMPLATE_IMAGE_OCTAVIA)
result = []
result.extend(
[
template.render(
image_url=url,
image_checksum=f"sha256:{splitted_checksum[0]}",
image_version=splitted[0],
image_builddate=splitted[0],
image_checksum=f"sha256:{checksum}",
image_version=date,
image_builddate=date,
)
]
)
Expand Down
91 changes: 91 additions & 0 deletions osism/utils/http.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,91 @@
# SPDX-License-Identifier: Apache-2.0

"""HTTP fetch helper with retry, content validation, and structured logging."""

from __future__ import annotations

import time
from typing import Callable, Optional

import requests
from loguru import logger

RETRYABLE_STATUSES = {408, 429} | set(range(500, 600))


def fetch_text(
url: str,
*,
delays: tuple[float, ...] = (2.0, 4.0, 8.0),
validate: Optional[Callable[[str], bool]] = None,
) -> str:
"""Fetch ``url`` as text with retry on transient failures."""
if not delays:
raise ValueError(
"fetch_text requires non-empty delays; the helper exists to retry"
)

attempts = len(delays) + 1
last_failure: Optional[BaseException] = None

for n in range(1, attempts + 1):
logger.info(f"fetch_text url={url} attempt={n}/{attempts}")
try:
response = requests.get(url)
response.raise_for_status()
except requests.HTTPError as exc:
status = exc.response.status_code if exc.response is not None else 0
if status not in RETRYABLE_STATUSES:
logger.info(
f"fetch_text url={url} attempt={n}/{attempts} status={status} non-retryable"
)
raise
last_failure = exc
if n < attempts:
logger.info(
f"fetch_text url={url} attempt={n}/{attempts} status={status} "
f"retrying in {delays[n - 1]}s"
)
time.sleep(delays[n - 1])
continue
logger.info(
f"fetch_text url={url} attempt={n}/{attempts} status={status} giving up"
)
raise
except requests.RequestException as exc:
last_failure = exc
if n < attempts:
logger.info(
f"fetch_text url={url} attempt={n}/{attempts} "
f"error={type(exc).__name__}({exc}) retrying in {delays[n - 1]}s"
)
time.sleep(delays[n - 1])
continue
logger.info(
f"fetch_text url={url} attempt={n}/{attempts} "
f"error={type(exc).__name__}({exc}) giving up"
)
raise

status = response.status_code
text = response.text
if validate is not None and not validate(text):
last_failure = ValueError(f"fetch_text validate rejected body for {url!r}")
excerpt = text[:40].replace("\n", "\\n")
if n < attempts:
logger.info(
f"fetch_text url={url} attempt={n}/{attempts} status={status} "
f"invalid_body={excerpt!r} retrying in {delays[n - 1]}s"
)
time.sleep(delays[n - 1])
continue
logger.info(
f"fetch_text url={url} attempt={n}/{attempts} status={status} "
f"invalid_body={excerpt!r} giving up"
)
raise last_failure

logger.info(f"fetch_text url={url} attempt={n}/{attempts} status={status} ok")
return text

raise RuntimeError("fetch_text loop exited without return or raise")
1 change: 1 addition & 0 deletions tests/unit/commands/__init__.py
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
# SPDX-License-Identifier: Apache-2.0
80 changes: 80 additions & 0 deletions tests/unit/commands/test_manage_validators.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,80 @@
# SPDX-License-Identifier: Apache-2.0

"""Unit tests for the marker and sha256 body validators in osism.commands.manage."""

from osism.commands.manage import _is_sha256, _validate_marker

# --- Marker validator (M1-M9) ---


def test_m1_validates_octavia_marker():
assert (
_validate_marker("2026-04-12 octavia-amphora-haproxy-2024.2.20260412.qcow2")
is True
)


def test_m2_validates_capi_marker():
assert _validate_marker("2026-04-12 ubuntu-2404-kube-v1.33.1.qcow2") is True


def test_m3_rejects_xml_error_body():
body = '<?xml version="1.0" encoding="UTF-8"?>\n<Error><Code>InternalError</Code></Error>'
assert _validate_marker(body) is False


def test_m4_rejects_empty_body():
assert _validate_marker("") is False


def test_m5_rejects_single_token():
assert _validate_marker("2026-04-12") is False


def test_m6_rejects_wrong_suffix():
assert _validate_marker("2026-04-12 random.txt") is False


def test_m7_rejects_wrong_date_shape():
assert _validate_marker("yesterday octavia-amphora-foo.qcow2") is False


def test_m8_accepts_unfamiliar_qcow2_name():
"""Production-diversity: validator must accept names CI has never seen."""
assert _validate_marker("2026-04-12 some-future-amphora-variant.qcow2") is True


def test_m9_rejects_filename_with_internal_whitespace():
"""Second token must be a single \\S+\\.qcow2 token."""
assert _validate_marker("2026-04-12 image-with-spaces in-name.qcow2") is False


# --- Checksum validator (S1-S6) ---


def test_s1_accepts_lowercase_hex_sha256():
body = "8ce3f3" + "a" * 58 + " octavia-amphora-haproxy-2024.2.20260412.qcow2"
assert _is_sha256(body) is True


def test_s2_rejects_xml_body():
assert _is_sha256('<?xml version="1.0"?> <Error>') is False


def test_s3_rejects_empty_body():
assert _is_sha256("") is False


def test_s4_rejects_non_hex_64_char_first_token():
assert (
_is_sha256("not-hex-but-64-chars-long-XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX")
is False
)


def test_s5_rejects_too_short_hex():
assert _is_sha256("abc123") is False


def test_s6_rejects_uppercase_hex():
assert _is_sha256("ABCDEF" + "0" * 58) is False
Loading