From 80b2cbf4abf2806a72a1cae7d209b193f869e070 Mon Sep 17 00:00:00 2001 From: Jen Gilbert Date: Wed, 10 Jun 2026 14:02:12 -0500 Subject: [PATCH 01/18] Add reorg scripts and config --- reorg.py | 61 +++++++++++++++++++++++ reorg_config.yaml | 123 ++++++++++++++++++++++++++++++++++++++++++++++ reorg_rollback.py | 23 +++++++++ 3 files changed, 207 insertions(+) create mode 100644 reorg.py create mode 100644 reorg_config.yaml create mode 100644 reorg_rollback.py diff --git a/reorg.py b/reorg.py new file mode 100644 index 00000000000..0eaa68d4fc6 --- /dev/null +++ b/reorg.py @@ -0,0 +1,61 @@ +#!/usr/bin/env python3 +import os +import sys +from pathlib import Path + +try: + import yaml +except ImportError: + print("Error: PyYAML is required. Install with: pip install pyyaml", file=sys.stderr) + sys.exit(1) + +repo_root = Path(__file__).parent +config_path = repo_root / "reorg_config.yaml" + +with config_path.open() as f: + config = yaml.safe_load(f) + +top_level = set(config.get("top_level", [])) +moves_to_hugo = set(config.get("moves_to_hugo", [])) +ignore = set(config.get("ignore", [])) + +# Sanity-check the config itself for conflicts. +conflicts = top_level & moves_to_hugo +if conflicts: + for name in sorted(conflicts): + print(f"ERROR: '{name}' appears in both top_level and moves_to_hugo", file=sys.stderr) + sys.exit(1) + +errors = [] + +for name in os.listdir(repo_root): + if name in ignore: + continue + + if name in top_level: + pass # keep in place + elif name in moves_to_hugo: + pass # will move below + else: + errors.append(name) + +if errors: + for name in sorted(errors): + print(f"ERROR: '{name}' is not listed in reorg_config.yaml", file=sys.stderr) + print(f"{len(errors)} error(s) found. Update reorg_config.yaml before running.", file=sys.stderr) + sys.exit(1) + +hugo_dir = repo_root / "hugo" +hugo_dir.mkdir(exist_ok=True) + +moved = 0 +for name in sorted(os.listdir(repo_root)): + if name in ignore or name in top_level or name == "hugo": + continue + src = repo_root / name + dst = hugo_dir / name + print(f"Moving {name} -> hugo/{name}") + src.rename(dst) + moved += 1 + +print(f"Done. {moved} item(s) moved into hugo/.") diff --git a/reorg_config.yaml b/reorg_config.yaml new file mode 100644 index 00000000000..befcf6030a1 --- /dev/null +++ b/reorg_config.yaml @@ -0,0 +1,123 @@ +# Files and folders that stay at the top level (not moved into hugo/). +# Everything else moves into hugo/. + +top_level: + # Sites + - astro + + # Repo scripts + - reorg.py + - reorg_rollback.py + - reorg_config.yaml + + # Created by reorg.py as the move target + - hugo + + # Git / CI / repo-wide tooling + - .git + - .github + - .gitlab-ci.yml + - .gitignore + - .gitattributes + - .husky + - .claude + + # Editor / IDE config + - .editorconfig + - .vscode + + # Repo docs + - README.md + - LICENSE + - CONTRIBUTING.md + - CLAUDE.md + + # Node / Yarn (shared by both sites) + - package.json + - package-lock.json + - yarn.lock + - .yarnrc.yml + - .yarn + - node_modules + - .nvmrc + + # Linting / formatting (shared) + - .eslintrc + - .eslintignore + - .vale.ini + - .rubocop.yml + - .prettierignore + - prettier.config.js + - static-analysis.datadog.yml + + # Datadog CI / synthetics + - datadog-ci.preview.json + - general.preview.synthetics.json + - repository.datadog.yml + + # Docker + - docker-compose-docs.yml + + # Go module (top-level) + - go.mod + - go.sum + +moves_to_hugo: + # Hugo core + - archetypes + - assets + - config + - content + - data + - i18n + - layouts + - static + - resources + - public + + # Hugo build tooling + - Makefile + - Makefile.config + - Makefile.config.example + - local + - hugpython + - gradle + - _vendor + + # Hugo build artifacts / state + - .hugo_build.lock + - logs + - integrations_data + + # Node / JS (Hugo-specific) + - babel.config.js + - jest.config.js + - postcss.config.js + - markdoc.config.json + - customization_config + + # Testing (Hugo-specific) + - playwright.config.ts + - playwright-report + - test-results + - e2e + + # Content tooling + - translate.yaml + - .translate + - typesense.config.json + - .htmltest.yml + - .apigentools-info + - agent_config_types_list.txt + + # Examples / docs + - examples + - docs + - plans + + # Static assets (Hugo) + - usage-notifications-email.png + +ignore: + # macOS metadata + - .DS_Store diff --git a/reorg_rollback.py b/reorg_rollback.py new file mode 100644 index 00000000000..050433f7e1d --- /dev/null +++ b/reorg_rollback.py @@ -0,0 +1,23 @@ +#!/usr/bin/env python3 +import os +import sys +from pathlib import Path + +repo_root = Path(__file__).parent +hugo_dir = repo_root / "hugo" + +if not hugo_dir.exists(): + print("Nothing to roll back: hugo/ does not exist.", file=sys.stderr) + sys.exit(1) + +moved = 0 +for name in sorted(os.listdir(hugo_dir)): + src = hugo_dir / name + dst = repo_root / name + print(f"Moving hugo/{name} -> {name}") + src.rename(dst) + moved += 1 + +# Only succeeds if hugo/ is now empty, preventing accidental data loss. +hugo_dir.rmdir() +print(f"Done. {moved} item(s) restored to repo root.") From fb3d0a6797a98919da5092a2f6f9af2fd5f208b5 Mon Sep 17 00:00:00 2001 From: Jen Gilbert Date: Wed, 10 Jun 2026 16:07:28 -0500 Subject: [PATCH 02/18] Review code and fill gaps --- reorg.py | 206 ++++++++++++ reorg_config.yaml | 30 +- reorg_context.md | 11 + reorg_harness.py | 837 ++++++++++++++++++++++++++++++++++++++++++++++ reorg_rollback.py | 25 ++ 5 files changed, 1096 insertions(+), 13 deletions(-) create mode 100644 reorg_context.md create mode 100644 reorg_harness.py diff --git a/reorg.py b/reorg.py index 0eaa68d4fc6..f50390583c2 100644 --- a/reorg.py +++ b/reorg.py @@ -59,3 +59,209 @@ moved += 1 print(f"Done. {moved} item(s) moved into hugo/.") + +# Split .gitignore between the root and hugo/ instead of copying it wholesale. +# +# A .gitignore is interpreted RELATIVE TO ITS OWN DIRECTORY, so — unlike +# CODEOWNERS — a moved rule needs NO "hugo/" prefix; it simply belongs in +# hugo/.gitignore. We therefore only ROUTE each line, never rewrite it, by its +# FIRST PATH SEGMENT (driven entirely by reorg_config.yaml, same as CODEOWNERS): +# first segment in moves_to_hugo -> hugo/.gitignore only +# first segment in top_level -> root .gitignore only +# first segment in neither -> generic/pure glob, kept in BOTH +# Comments and blank lines are kept in both to preserve section readability. +def route_gitignore_segment(line): + """Return the first path segment of a .gitignore pattern, or None for a + comment/blank line. A leading '!' (negation) and '/' (root anchor) are + stripped before taking segment 0; the line text itself is never altered. + """ + stripped = line.strip() + if not stripped or stripped.startswith("#"): + return None + body = stripped[1:] if stripped.startswith("!") else stripped # un-negate + body = body.lstrip("/") # un-anchor + return body.split("/", 1)[0] + + +print("\nSplitting .gitignore between root and hugo/...") +gitignore = repo_root / ".gitignore" +gi_lines = gitignore.read_text().splitlines(keepends=True) + +root_lines = [] +hugo_lines = [] +hugo_only_segments = set() # routed off root into hugo/ (for the summary) +both_segments = set() # in neither config list -> kept in both (surfaced) + +for raw in gi_lines: + segment = route_gitignore_segment(raw) + if segment is None: # comment / blank -> keep in both + root_lines.append(raw) + hugo_lines.append(raw) + elif segment in moves_to_hugo: + hugo_lines.append(raw) + hugo_only_segments.add(segment) + elif segment in top_level: + root_lines.append(raw) + else: + root_lines.append(raw) + hugo_lines.append(raw) + both_segments.add(segment) + +print(f"\n .gitignore: {len(hugo_only_segments)} segment(s) -> hugo/.gitignore only, " + f"{len(both_segments)} generic kept in both") +print(f" root: {len(root_lines)} line(s), hugo/: {len(hugo_lines)} line(s) " + f"(was {len(gi_lines)} duplicated wholesale)") +answer = input(" Apply? [y/N] ").strip().lower() +if answer == "y": + gitignore.write_text("".join(root_lines)) + (hugo_dir / ".gitignore").write_text("".join(hugo_lines)) + print(" Written: .gitignore (root, pruned) and hugo/.gitignore") + if both_segments: + print(" NOTE: kept in both (first path segment not in reorg_config.yaml): " + + ", ".join(sorted(both_segments))) + +# Update .github/workflows/ files to reference paths under hugo/. +WORKFLOW_SUBSTITUTIONS = [ + ("- 'content/en/", "- 'hugo/content/en/"), + ("- 'layouts/shortcodes/", "- 'hugo/layouts/shortcodes/"), + ("- 'static/images/", "- 'hugo/static/images/"), + ("python local/bin/", "python hugo/local/bin/"), + # vale_linter.yml passes the template path to vale via --output= + ("--output=local/bin/", "--output=hugo/local/bin/"), + (" static |", " hugo/static |"), + ("^static/images/", "^hugo/static/images/"), + ("-- 'content/en/**/*.md'", "-- 'hugo/content/en/**/*.md'"), + # bump_* and version_getter_shared workflows cp/commit data files by ./ path + ("./data/", "./hugo/data/"), +] + +print("\nUpdating .github/workflows/...") +workflows_dir = repo_root / ".github" / "workflows" +for yml_file in sorted(workflows_dir.glob("*.yml")): + original = yml_file.read_text() + updated = original + for old, new in WORKFLOW_SUBSTITUTIONS: + if old not in updated: + continue + print(f"\n {yml_file.name}: {old!r} → {new!r}") + answer = input(" Apply? [y/N] ").strip().lower() + if answer == "y": + updated = updated.replace(old, new) + if updated != original: + yml_file.write_text(updated) + print(f" Written: {yml_file.name}") + +# Update .husky/ hook scripts to reference paths under hugo/content/. +HUSKY_SUBSTITUTIONS = [ + ("content/en/{dir_name}/{name}", "hugo/content/en/{dir_name}/{name}"), + ("content/en/{dir_name}/", "hugo/content/en/{dir_name}/"), + ("Path('content/en')", "Path('hugo/content/en')"), + ('Path("content/.gitignore")', 'Path("hugo/content/.gitignore")'), + ("f.startswith('content/en/')", "f.startswith('hugo/content/en/')"), + (".replace('content/en/', '')", ".replace('hugo/content/en/', '')"), + ("repo_root / 'content' / 'en'", "repo_root / 'hugo' / 'content' / 'en'"), + ('repo_pattern = "content"', 'repo_pattern = "hugo/content"'), +] + +print("\nUpdating .husky/...") +husky_dir = repo_root / ".husky" +for py_file in sorted(husky_dir.glob("*.py")): + original = py_file.read_text() + updated = original + for old, new in HUSKY_SUBSTITUTIONS: + if old not in updated: + continue + print(f"\n {py_file.name}: {old!r} → {new!r}") + answer = input(" Apply? [y/N] ").strip().lower() + if answer == "y": + updated = updated.replace(old, new) + if updated != original: + py_file.write_text(updated) + print(f" Written: {py_file.name}") + +# Update .github/CODEOWNERS to reference paths under hugo/. +# +# CODEOWNERS is not YAML; it is a line-based format where each rule is +# " ". Rather than blanket str.replace() over the whole +# file (which let a short pattern like "config/" corrupt a longer one like +# "customization_config/"), we parse each line and route its pattern by its +# FIRST PATH SEGMENT. The decision is made on a whole segment, never a raw +# substring, so "config" can never match inside "customization_config" and the +# substitution list no longer has to be hand-ordered or kept in sync with the +# move list — it is derived entirely from reorg_config.yaml. +def route_codeowners_pattern(pattern): + """Rewrite one CODEOWNERS pattern for the hugo/ move. + + Returns (segment, new_pattern). new_pattern is None when the line is left + untouched; segment is None for the global '*' owner. A leading '/' + (repo-root anchor) is preserved; owners are never touched because we only + ever rewrite the pattern token. + """ + if pattern == "*": + return None, None + + anchored = pattern.startswith("/") # leading '/' = repo-root-anchored + body = pattern[1:] if anchored else pattern + segment = body.split("/", 1)[0] + + # Carried over from the original script: ".local/" is a misspelling of + # "local/" (which moves into hugo/). Normalize the segment before routing + # so the typo'd entry is repathed alongside the real one. + if segment == ".local": + body = "local" + body[len(".local"):] + segment = "local" + + if segment not in moves_to_hugo: + return segment, None + + new_body = "hugo/" + body + return segment, (("/" + new_body) if anchored else new_body) + + +print("\nUpdating .github/CODEOWNERS...") +codeowners = repo_root / ".github" / "CODEOWNERS" +lines = codeowners.read_text().splitlines(keepends=True) + +# Group rewritable lines by first path segment so we prompt once per segment +# (one y/N covers every "content/..." rule) instead of once per line. +changes_by_segment = {} # segment -> list of (line_index, old_pattern, new_line) +left_alone = set() # first segments in neither config list (surfaced below) + +for i, raw in enumerate(lines): + newline = "\n" if raw.endswith("\n") else "" + line = raw[:-len(newline)] if newline else raw + stripped = line.lstrip() + if not stripped or stripped.startswith("#"): + continue + indent = line[:len(line) - len(stripped)] + pattern = stripped.split(maxsplit=1)[0] + rest = stripped[len(pattern):] # original spacing + owners, verbatim + segment, new_pattern = route_codeowners_pattern(pattern) + if new_pattern is None: + if segment is not None and segment not in top_level: + left_alone.add(segment) + continue + changes_by_segment.setdefault(segment, []).append( + (i, pattern, indent + new_pattern + rest + newline) + ) + +applied = False +for segment in sorted(changes_by_segment): + entries = changes_by_segment[segment] + old_pattern = entries[0][1] + new_pattern = entries[0][2].lstrip().split(maxsplit=1)[0] + print(f"\n CODEOWNERS: prefix {len(entries)} pattern(s) under {segment!r} " + f"with hugo/ (e.g. {old_pattern!r} → {new_pattern!r})") + answer = input(" Apply? [y/N] ").strip().lower() + if answer == "y": + for idx, _, new_line in entries: + lines[idx] = new_line + applied = True + +if left_alone: + print("\n NOTE: left unchanged (first path segment not in reorg_config.yaml): " + + ", ".join(sorted(left_alone))) + +if applied: + codeowners.write_text("".join(lines)) + print(" Written: CODEOWNERS") diff --git a/reorg_config.yaml b/reorg_config.yaml index befcf6030a1..83f5d0eb6af 100644 --- a/reorg_config.yaml +++ b/reorg_config.yaml @@ -9,6 +9,9 @@ top_level: - reorg.py - reorg_rollback.py - reorg_config.yaml + - reorg_harness.py + - reorg_context.md + - reorg_issues.md # Created by reorg.py as the move target - hugo @@ -32,15 +35,6 @@ top_level: - CONTRIBUTING.md - CLAUDE.md - # Node / Yarn (shared by both sites) - - package.json - - package-lock.json - - yarn.lock - - .yarnrc.yml - - .yarn - - node_modules - - .nvmrc - # Linting / formatting (shared) - .eslintrc - .eslintignore @@ -58,10 +52,6 @@ top_level: # Docker - docker-compose-docs.yml - # Go module (top-level) - - go.mod - - go.sum - moves_to_hugo: # Hugo core - archetypes @@ -96,6 +86,20 @@ moves_to_hugo: - markdoc.config.json - customization_config + # Go module — powers Hugo Modules (imports websites-modules, vendors into + # _vendor/). Hugo requires go.mod in the project root; Astro uses no Go. + - go.mod + - go.sum + + # Node / Yarn (Hugo site owns these; Astro has its own under astro/) + - package.json + - package-lock.json + - yarn.lock + - .yarnrc.yml + - .yarn + - node_modules + - .nvmrc + # Testing (Hugo-specific) - playwright.config.ts - playwright-report diff --git a/reorg_context.md b/reorg_context.md new file mode 100644 index 00000000000..55c39a900e5 --- /dev/null +++ b/reorg_context.md @@ -0,0 +1,11 @@ +# Docs repository reorg context + +This repo is currently a Hugo site. We instead want it to contain a `hugo` and `astro` site side by side, with no overlap in their envs, `package.json` files, etc. + +The `reorg_config.yaml` describes the relocation target for every file and folder at the top level of the repo. + +`reorg.py` implements the file and folder path changes, and updates any dependencies on those paths, such as GitHub actions, CODEOWNERS, and Husky workflows. + +`reorg_rollback.py` functions as an "undo" action for the reorg. + +`reorg_harness.py` verifies the functionality of affected entities where possible. \ No newline at end of file diff --git a/reorg_harness.py b/reorg_harness.py new file mode 100644 index 00000000000..c2e908bd9df --- /dev/null +++ b/reorg_harness.py @@ -0,0 +1,837 @@ +#!/usr/bin/env python3 +""" +Post-reorg validation harness. + +Run this AFTER reorg.py, from the repo root, on a feature branch (not master). +It verifies that the things the reorg could silently break still work: + + A. Layout - hugo/ holds the moved dirs; nothing moved is left at root; + hugo/ and astro/ each self-own a package.json and no Hugo + Node/build file lingers at the root competing with Astro. + B. Workflows - no .github/workflows/ file references a moved path (dir OR + file) without the hugo/ prefix, in shell scalars and in + structured on.*.paths filters (catches gaps in reorg.py). + C. CODEOWNERS - every concrete path pattern still resolves on disk, and + every moved pattern (globs included) carries the hugo/ prefix. + D. Husky hooks - each pre-commit check still REJECTS a known-bad input planted + at the new hugo/ path. If a hook wasn't repathed it inspects + the now-missing content/en/ and passes vacuously -> we fail it. + E. Vale - vale still flags a known violation using the Datadog style, + proving StylesPath resolves from the new content location. + F. Hugo build - static presence check only; run `make start` manually (todo #5). + G. Rollback - on a throwaway repo, reorg.py then reorg_rollback.py must + restore the tree byte-for-byte (the only test of rollback). + +The harness is non-destructive. Every file it creates lives under a +'__reorg_harness__' prefix, and every change (staged paths, gitignore edits) is +reverted in a finally block. It never commits. +""" + +import os +import re +import shutil +import subprocess +import sys +import tempfile +from pathlib import Path + +try: + import yaml +except ImportError: + print("Error: PyYAML is required. Install with: pip install pyyaml", file=sys.stderr) + sys.exit(1) + +repo_root = Path(__file__).parent +hugo_dir = repo_root / "hugo" + +# Distinctive marker so anything we create is obvious and easy to clean up. +MARKER = "__reorg_harness__" + +# (status, name, detail) tuples collected as checks run. +results = [] + + +def record(status, name, detail=""): + """Record a check outcome. status is one of PASS, FAIL, SKIP, WARN.""" + results.append((status, name, detail)) + symbol = {"PASS": "✅", "FAIL": "❌", "SKIP": "⏭ ", "WARN": "⚠ "}[status] + line = f"{symbol} {status:4} {name}" + if detail: + line += f"\n {detail}" + print(line) + + +def git(*args, check=False): + """Run a git command from the repo root and return the CompletedProcess.""" + return subprocess.run( + ["git", *args], + cwd=repo_root, + capture_output=True, + text=True, + check=check, + ) + + +def load_config(): + """Load reorg_config.yaml and return (top_level, moves_to_hugo) name sets.""" + config_path = repo_root / "reorg_config.yaml" + with config_path.open() as f: + config = yaml.safe_load(f) + return set(config.get("top_level", [])), set(config.get("moves_to_hugo", [])) + + +# -------------------------------------------------------------------------- +# A. Layout +# -------------------------------------------------------------------------- + +def check_layout(top_level, moves_to_hugo): + """hugo/ holds the moved dirs; nothing that moved is left at the root.""" + # Moved items that the reorg actually had to relocate (present before). + expected_in_hugo = sorted(n for n in moves_to_hugo if (hugo_dir / n).exists()) + missing_from_hugo = sorted( + n for n in moves_to_hugo + if (repo_root / n).exists() and not (hugo_dir / n).exists() + ) + + # A moved name still sitting at the root means the move didn't happen. + still_at_root = sorted( + n for n in moves_to_hugo if (repo_root / n).exists() + ) + if still_at_root: + record("FAIL", "layout: moved items remain at repo root", + ", ".join(still_at_root)) + else: + record("PASS", "layout: no moved items remain at repo root", + f"{len(expected_in_hugo)} item(s) confirmed under hugo/") + + if missing_from_hugo: + record("FAIL", "layout: moved items missing under hugo/", + ", ".join(missing_from_hugo)) + + # No top_level item should have leaked into hugo/. 'hugo' itself is listed + # in top_level (it IS the move target) so it is excluded. + leaked = sorted( + n for n in top_level + if n != "hugo" and (hugo_dir / n).exists() + ) + if leaked: + record("FAIL", "layout: top-level items leaked into hugo/", + ", ".join(leaked)) + else: + record("PASS", "layout: no top-level items leaked into hugo/") + + # Critical anchors that must still exist at the root after the move. + # NB: package.json/node_modules/yarn.lock and go.mod/go.sum now move into + # hugo/ (each site owns its own Node setup; the Go module powers Hugo + # Modules), so they are deliberately NOT listed here. + must_stay = ["astro", ".husky", ".github", ".vale.ini"] + absent = [n for n in must_stay if not (repo_root / n).exists()] + if absent: + record("FAIL", "layout: expected top-level items are missing", + ", ".join(absent)) + else: + record("PASS", "layout: top-level anchors intact", + ", ".join(must_stay)) + + # Both .gitignore files should exist (reorg.py splits one into two; + # check_gitignore_split below verifies the split routed correctly). + if (repo_root / ".gitignore").exists() and (hugo_dir / ".gitignore").exists(): + record("PASS", "layout: .gitignore present at root and in hugo/") + else: + record("FAIL", "layout: missing a .gitignore copy", + "expected both ./.gitignore and ./hugo/.gitignore") + + +def check_gitignore_split(top_level, moves_to_hugo): + """Verify reorg.py routed .gitignore rules to the correct side. + + A .gitignore is relative to its own directory, so after the split no + surviving rule should point at a path that lives on the OTHER side: + - root .gitignore must not carry a rule whose first segment moved to hugo/ + - hugo/.gitignore must not carry a rule whose first segment stays at root + Generic globs (first segment in neither config list) legitimately live in + both files, so they are ignored here. + """ + def first_segment(line): + stripped = line.strip() + if not stripped or stripped.startswith("#"): + return None + body = stripped[1:] if stripped.startswith("!") else stripped + return body.lstrip("/").split("/", 1)[0] + + def leaks(path, wrong_side): + out = [] + for raw in path.read_text().splitlines(): + seg = first_segment(raw) + if seg in wrong_side: + out.append(f"{path.name}: {raw.strip()} (segment {seg!r})") + return out + + root_gi = repo_root / ".gitignore" + hugo_gi = hugo_dir / ".gitignore" + if not (root_gi.exists() and hugo_gi.exists()): + record("SKIP", "gitignore: split not verifiable (a .gitignore is missing)") + return + + problems = leaks(root_gi, moves_to_hugo) + leaks(hugo_gi, top_level - {"hugo"}) + if problems: + record("FAIL", "gitignore: rules survive on the wrong side of the split", + f"{len(problems)}:\n " + "\n ".join(problems[:20])) + else: + record("PASS", "gitignore: no moved-path rule left at root, " + "no root-path rule left in hugo/") + + +def check_site_separation(moves_to_hugo): + """The two sites must own non-overlapping Node/build setups. + + The point of the reorg is a clean hugo/ vs astro/ split: each site has its + OWN Node manifest, and any name the two sites share must not also linger at + the root where their copies would collide. check_layout already fails on ANY + moved item left at root (against the full config), so this check adds only + what that doesn't cover — and derives its clash set from the config rather + than a hand-maintained list: + - astro/package.json and hugo/package.json both exist (each self-owns) + - no name that moved into hugo/ AND is also owned by astro/ remains at root + """ + problems = [] + + # Each site self-owns its Node manifest. (package.json is in moves_to_hugo, + # so it belongs to Hugo; astro/ must carry its own copy.) + for site in ("hugo", "astro"): + if not (repo_root / site / "package.json").exists(): + problems.append(f"missing {site}/package.json") + + # A name that moved into hugo/ AND that astro/ also owns is a shared + # toolchain file (package.json, node_modules, yarn.lock, ...). If such a + # name is ALSO present at the root, the two sites' copies collide. The clash + # set is derived from moves_to_hugo intersected with astro/'s actual + # contents — no hand-maintained file list to drift from the config. + shared_with_astro = sorted( + n for n in moves_to_hugo if (repo_root / "astro" / n).exists() + ) + leaked = [n for n in shared_with_astro if (repo_root / n).exists()] + if leaked: + problems.append("left at root, colliding with astro/: " + ", ".join(leaked)) + + if problems: + record("FAIL", "separation: hugo/ and astro/ Node setups overlap or leak", + "; ".join(problems)) + else: + record("PASS", "separation: hugo/ and astro/ each self-own package.json; " + "no shared toolchain file left at root") + + +# -------------------------------------------------------------------------- +# B. Workflow paths +# -------------------------------------------------------------------------- + +def check_workflows(moves_to_hugo): + """Flag references to moved paths (directories AND files) lacking hugo/. + + This previously scanned only directory names ('.' not in n) using a + trailing-slash token, so a workflow that referenced a moved *file* — + babel.config.js, markdoc.config.json, the Makefile, go.mod — was never + validated. We now route every path-like token by its first segment exactly + as reorg.py does: a token whose first segment moved into hugo/ must be + hugo/-prefixed (after a correct reorg its first segment is 'hugo', so it no + longer matches). A token counts as a path reference when it contains a '/', + or when the whole token is the exact name of a moved file — so a bare word + like 'content' in prose isn't flagged, but a standalone 'babel.config.js' is. + + A moved file whose name ALSO exists under astro/ (package.json, yarn.lock, + node_modules, .nvmrc) is left out of the bare-name match: a standalone + 'package.json' is genuinely ambiguous (it may be Astro's), which is exactly + why reorg.py never blind-substitutes those names. They are still validated + when they appear inside a slashed path routed by its first segment. + """ + workflows_dir = repo_root / ".github" / "workflows" + if not workflows_dir.exists(): + record("SKIP", "workflows: .github/workflows/ not found") + return + + moved_files = { + n for n in moves_to_hugo + if (hugo_dir / n).is_file() and not (repo_root / "astro" / n).exists() + } + + # Lines that legitimately reference a moved name but are NOT paths to fix + # (illustrative prose, external URLs). Keyed by workflow filename; a line is + # exempt if it contains any of the listed markers. + allowlist = { + # Security-doc example of how untrusted paths are reported, not a real path. + "claude_review.yml": ["__untrusted/content/"], + } + + # A path-like token: a maximal run of path/glob characters. Whitespace, + # quotes, ':', '@' and '!' all act as boundaries, so a leading '!' negation + # or surrounding quotes fall away naturally. + token_re = re.compile(r"[A-Za-z0-9_.\-*/]+") + + def first_segment(token): + """(first path segment, ./-stripped token) for routing.""" + t = token + while t.startswith("./"): + t = t[2:] + return t.split("/", 1)[0], t + + suspects = [] + for yml in sorted(workflows_dir.glob("*.yml")): + exempt = allowlist.get(yml.name, []) + for lineno, line in enumerate(yml.read_text().splitlines(), 1): + if any(marker in line for marker in exempt): + continue + for token in token_re.findall(line): + seg, normalized = first_segment(token) + # Only treat a token as a path reference if it's an actual path + # (has a '/') or is exactly the name of a moved file. + if "/" not in normalized and normalized not in moved_files: + continue + if seg in moves_to_hugo: + suspects.append(f"{yml.name}:{lineno}: {line.strip()}") + break + + # De-duplicate while keeping order. + seen = set() + unique = [s for s in suspects if not (s in seen or seen.add(s))] + + if unique: + record("FAIL", "workflows: unprefixed moved paths found", + f"{len(unique)} line(s):\n " + "\n ".join(unique[:20])) + else: + record("PASS", "workflows: all moved paths (dirs and files) are hugo/-prefixed") + + +def check_workflow_path_filters(moves_to_hugo): + """Parse each workflow's on.*.paths filters and assert moved paths are prefixed. + + Unlike paths embedded in `run:` shell (which are opaque string scalars to a + parser), `on..paths` / `paths-ignore` are structured YAML list values, + so we can validate them precisely. Each entry is routed by its first path + segment: if that segment moved into hugo/, the entry must be hugo/-prefixed. + + Footgun handled: under YAML 1.1, PyYAML loads the `on:` key as the boolean + True, not the string "on" — so we look it up under both keys. + """ + workflows_dir = repo_root / ".github" / "workflows" + if not workflows_dir.exists(): + record("SKIP", "workflows: .github/workflows/ not found (path filters)") + return + + def first_segment(entry): + # Strip a leading '!' negation and any root anchor, then take segment 0. + p = entry[1:] if entry.startswith("!") else entry + return p.lstrip("/").split("/", 1)[0] + + problems = [] + for yml in sorted(workflows_dir.glob("*.yml")): + try: + doc = yaml.safe_load(yml.read_text()) + except yaml.YAMLError as exc: + record("WARN", f"workflows: {yml.name} did not parse as YAML", + str(exc).splitlines()[0][:120]) + continue + if not isinstance(doc, dict): + continue + triggers = doc.get("on", doc.get(True)) # `on:` -> True under YAML 1.1 + if not isinstance(triggers, dict): + continue + for event, spec in triggers.items(): + if not isinstance(spec, dict): + continue + for key in ("paths", "paths-ignore"): + for entry in spec.get(key) or []: + if not isinstance(entry, str): + continue + seg = first_segment(entry) + anchored = entry.lstrip("!").lstrip("/") + if seg in moves_to_hugo and not anchored.startswith("hugo/"): + problems.append(f"{yml.name}: on.{event}.{key}: {entry}") + + if problems: + record("FAIL", "workflows: on.*.paths filters missing hugo/ prefix", + f"{len(problems)}:\n " + "\n ".join(problems[:20])) + else: + record("PASS", "workflows: on.*.paths filters are hugo/-prefixed") + + +# -------------------------------------------------------------------------- +# C. CODEOWNERS +# -------------------------------------------------------------------------- + +def check_codeowners(): + """Every concrete (non-glob) path pattern should resolve on disk.""" + codeowners = repo_root / ".github" / "CODEOWNERS" + if not codeowners.exists(): + record("SKIP", "codeowners: .github/CODEOWNERS not found") + return + + missing = [] + for line in codeowners.read_text().splitlines(): + stripped = line.strip() + if not stripped or stripped.startswith("#"): + continue + pattern = stripped.split()[0] + if pattern == "*": + continue + # Skip glob patterns; we can't resolve them to a single path. + if any(ch in pattern for ch in "*?[]"): + continue + # Leading slash in CODEOWNERS is repo-root-anchored. + rel = pattern.lstrip("/") + if not (repo_root / rel).exists(): + missing.append(pattern) + + if missing: + record("FAIL", "codeowners: patterns that no longer resolve", + f"{len(missing)}:\n " + "\n ".join(missing[:20])) + else: + record("PASS", "codeowners: all concrete patterns resolve on disk") + + +def check_codeowners_prefixing(moves_to_hugo): + """Every CODEOWNERS pattern whose first segment moved into hugo/ must carry + the hugo/ prefix — globs included. + + check_codeowners() above only proves that *concrete* paths still RESOLVE on + disk; it skips every glob (layouts/shortcodes/**/*.md) and never confirms a + moved pattern was actually repathed. Here we route each pattern by its first + path segment exactly as reorg.py's route_codeowners_pattern does and assert + that a moved segment is prefixed. After a correct reorg the pattern's first + segment is 'hugo', so a flagged pattern means a substitution was missed. + """ + codeowners = repo_root / ".github" / "CODEOWNERS" + if not codeowners.exists(): + record("SKIP", "codeowners: .github/CODEOWNERS not found (prefixing)") + return + + def first_segment(pattern): + body = pattern[1:] if pattern.startswith("/") else pattern # un-anchor + seg = body.split("/", 1)[0] + # reorg.py normalizes the '.local' typo to 'local' before routing. + return "local" if seg == ".local" else seg + + unprefixed = [] + for line in codeowners.read_text().splitlines(): + stripped = line.strip() + if not stripped or stripped.startswith("#"): + continue + pattern = stripped.split()[0] + if pattern == "*": + continue + if first_segment(pattern) not in moves_to_hugo: + continue + if not pattern.lstrip("/").startswith("hugo/"): + unprefixed.append(pattern) + + if unprefixed: + record("FAIL", "codeowners: moved patterns missing the hugo/ prefix", + f"{len(unprefixed)} (globs included):\n " + + "\n ".join(unprefixed[:20])) + else: + record("PASS", "codeowners: every moved pattern is hugo/-prefixed " + "(globs included)") + + +# -------------------------------------------------------------------------- +# D. Husky behavioral checks +# -------------------------------------------------------------------------- + +def run_hook(script_name): + """Run a .husky hook script from the repo root; return CompletedProcess.""" + return subprocess.run( + ["python3", str(repo_root / ".husky" / script_name)], + cwd=repo_root, + capture_output=True, + text=True, + ) + + +def check_husky_circular_aliases(): + """Plant a self-aliasing page and confirm the hook rejects it.""" + rel = f"hugo/content/en/{MARKER}_alias/selftest.md" + target = repo_root / rel + if target.exists(): + record("WARN", "husky: circular-aliases test skipped (temp path exists)", rel) + return + + # An alias equal to the file's own location is the circular case. + body = ( + "---\n" + "title: Reorg Harness Self Test\n" + "aliases:\n" + f" - /{MARKER}_alias/selftest\n" + "---\n\n" + "Temporary file created by reorg_harness.py.\n" + ) + try: + target.parent.mkdir(parents=True, exist_ok=True) + target.write_text(body) + git("add", "-f", rel) + proc = run_hook("check-circular-aliases.py") + if proc.returncode != 0 and "circular" in (proc.stdout + proc.stderr).lower(): + record("PASS", "husky: circular-aliases hook rejects bad input") + else: + record("FAIL", "husky: circular-aliases hook did NOT reject bad input", + "hook may still point at the old content/en/ path " + f"(exit={proc.returncode})") + finally: + git("reset", "-q", "HEAD", rel) + if target.exists(): + target.unlink() + _rmdir_if_empty(target.parent) + + +def check_husky_section_index(): + """Plant a new top-level section with no _index.md and confirm rejection.""" + section = f"{MARKER}_section" + rel = f"hugo/content/en/{section}/page.md" + target = repo_root / rel + if target.exists() or (hugo_dir / "content" / "en" / section).exists(): + record("WARN", "husky: section-index test skipped (temp path exists)", rel) + return + + body = "---\ntitle: Reorg Harness Page\nprivate: true\n---\n\nTemp page.\n" + try: + target.parent.mkdir(parents=True, exist_ok=True) + target.write_text(body) + git("add", "-f", rel) + proc = run_hook("check-section-index.py") + if proc.returncode != 0 and "_index.md" in (proc.stdout + proc.stderr): + record("PASS", "husky: section-index hook rejects missing _index.md") + else: + record("FAIL", "husky: section-index hook did NOT reject bad input", + "hook may still point at the old content/en/ path " + f"(exit={proc.returncode})") + finally: + git("reset", "-q", "HEAD", rel) + if target.exists(): + target.unlink() + _rmdir_if_empty(target.parent) + + +def check_husky_cdocs_gitignore(): + """ + Append a harness pattern to hugo/content/.gitignore, force-track a matching + compiled file (with a .mdoc.md sibling), and confirm the hook flags it. + """ + content_gitignore = hugo_dir / "content" / ".gitignore" + if not content_gitignore.exists(): + record("SKIP", "husky: cdocs-gitignore test skipped (no hugo/content/.gitignore)") + return + + pattern = f"/en/{MARKER}_cdocs.md" + compiled_rel = f"hugo/content/en/{MARKER}_cdocs.md" + source_rel = f"hugo/content/en/{MARKER}_cdocs.mdoc.md" + compiled = repo_root / compiled_rel + source = repo_root / source_rel + if compiled.exists() or source.exists(): + record("WARN", "husky: cdocs-gitignore test skipped (temp path exists)", + compiled_rel) + return + + original_gitignore = content_gitignore.read_text() + try: + content_gitignore.write_text( + original_gitignore.rstrip("\n") + f"\n{pattern}\n" + ) + source.write_text("Temp Cdocs source.\n") + compiled.write_text("Temp compiled Cdocs output.\n") + git("add", "-f", compiled_rel) # force past the gitignore to simulate the mistake + proc = run_hook("check-cdocs-gitignore.py") + if proc.returncode != 0 and MARKER in (proc.stdout + proc.stderr): + record("PASS", "husky: cdocs-gitignore hook flags tracked compiled file") + else: + record("FAIL", "husky: cdocs-gitignore hook did NOT flag tracked file", + "hook may still read the old content/.gitignore path " + f"(exit={proc.returncode})") + finally: + git("reset", "-q", "HEAD", compiled_rel) + for p in (compiled, source): + if p.exists(): + p.unlink() + content_gitignore.write_text(original_gitignore) + + +def _rmdir_if_empty(path): + """Remove a directory only if it is empty (safety guard).""" + try: + path.rmdir() + except OSError: + pass + + +# -------------------------------------------------------------------------- +# E. Vale +# -------------------------------------------------------------------------- + +def check_vale(): + """Confirm vale still flags a known violation using the Datadog style.""" + if subprocess.run(["which", "vale"], capture_output=True).returncode != 0: + record("SKIP", "vale: vale not installed") + return + + styles = (repo_root / ".vale.ini") + if not styles.exists(): + record("SKIP", "vale: .vale.ini not found") + return + + rel = f"hugo/content/en/{MARKER}_vale.md" + target = repo_root / rel + if target.exists(): + record("WARN", "vale: test skipped (temp path exists)", rel) + return + + # Each of these trips a Datadog substitution rule. + body = ( + "---\ntitle: Reorg Harness Vale Test\n---\n\n" + "Simply leverage this feature to ensure it works.\n" + ) + try: + target.write_text(body) + proc = subprocess.run( + ["vale", rel], + cwd=repo_root, + capture_output=True, + text=True, + ) + output = proc.stdout + proc.stderr + if "Datadog." in output: + record("PASS", "vale: Datadog style flags violations from new path") + elif "StylesPath" in output or "ExecError" in output or not output.strip(): + record("FAIL", "vale: ran but produced no Datadog findings", + "StylesPath may not resolve from the new content location") + else: + record("WARN", "vale: ran but no Datadog.* rule fired", + output.strip()[:200]) + finally: + if target.exists(): + target.unlink() + + +# -------------------------------------------------------------------------- +# F. Hugo build (static presence only) +# -------------------------------------------------------------------------- + +def check_build_presence(): + """Static check; the real build is the manual `make start` in todo #5.""" + # Hugo's build entrypoints all need to be co-located under hugo/: the Makefile, + # the Node manifest, and go.mod (required by Hugo Modules at the project root). + required = ["Makefile", "package.json", "go.mod"] + missing = [n for n in required if not (hugo_dir / n).exists()] + if not missing: + record("PASS", "build: hugo/{Makefile,package.json,go.mod} present", + "run `cd hugo && make start` (todo #5) to verify the full build") + else: + record("FAIL", "build: missing Hugo build entrypoint(s) under hugo/", + ", ".join(f"hugo/{n}" for n in missing)) + + +# -------------------------------------------------------------------------- +# G. Rollback round-trip +# -------------------------------------------------------------------------- + +def check_rollback_roundtrip(): + """reorg.py then reorg_rollback.py must restore the tree byte-for-byte. + + This is the only check that exercises reorg_rollback.py at all. Rather than + mutate the live repo (and risk leaving it half-reorganized if a step fails), + it builds a small throwaway git repo holding one representative item for + every code path the two scripts touch — a mixed .gitignore (rules that route + to hugo/, to root, and to both), a workflow with substitutable paths, a + CODEOWNERS with a moved rule, a husky hook with a substitutable path, plus a + few moved/stayed files — then: + + snapshot -> run reorg.py (answering y to every prompt) + -> run reorg_rollback.py -> snapshot again + -> assert byte-identical. + + It specifically catches the easy-to-miss case where reorg.py edits a file in + place (the root .gitignore split) that rollback must restore from git rather + than merely delete. + """ + if shutil.which("python3") is None or shutil.which("git") is None: + record("SKIP", "rollback: python3/git not both available") + return + + workdir = tempfile.mkdtemp(prefix=f"{MARKER}_rollback_") + work = Path(workdir) + + def write(rel, text): + p = work / rel + p.parent.mkdir(parents=True, exist_ok=True) + p.write_text(text) + + def git_work(*args): + return subprocess.run(["git", *args], cwd=work, + capture_output=True, text=True) + + def snapshot(): + """relpath -> bytes for every file under work/, excluding .git/.""" + tree = {} + for path in sorted(work.rglob("*")): + if path.is_dir(): + continue + rel = path.relative_to(work) + if rel.parts and rel.parts[0] == ".git": + continue + tree[str(rel)] = path.read_bytes() + return tree + + try: + # Representative fixture. Every top-level name here must appear in + # reorg_config.yaml or reorg.py refuses to run; the mix below routes + # across moves_to_hugo, top_level, and generic-glob cases. + write(".gitignore", + "# build\n" + "public/*\n" + "data/generated\n" + "content/en/api/**/*.go\n" + "node_modules\n" + "\n" + "# generic (kept in both)\n" + "*.log\n" + "\n" + "# root-only tooling\n" + ".github/preview-links-template.md\n") + write(".github/workflows/sample.yml", + "name: sample\n" + "on:\n" + " pull_request:\n" + " paths:\n" + " - 'content/en/**/*.md'\n" + "jobs:\n" + " build:\n" + " runs-on: ubuntu-latest\n" + " steps:\n" + " - run: python local/bin/foo.py\n") + write(".github/CODEOWNERS", + "* @DataDog/documentation\n" + "/content/ @DataDog/team-a\n" + "data/ @DataDog/team-b\n" + "README.md @DataDog/team-c\n") + write(".husky/check-sample.py", + "from pathlib import Path\n" + 'repo_pattern = "content"\n' + "p = Path('content/en')\n") + write("README.md", "# Fixture\n") + write("astro/package.json", '{"name": "astro-fixture"}\n') + write("content/en/page.md", "---\ntitle: Page\n---\n\nBody.\n") + write("data/sample.yaml", "key: value\n") + write("Makefile", "start:\n\techo build\n") + write("package.json", '{"name": "hugo-fixture"}\n') + write("go.mod", "module example.com/fixture\n\ngo 1.21\n") + write("babel.config.js", "module.exports = {};\n") + + # The scripts resolve repo_root from their own location, so copy them in. + for tool in ("reorg.py", "reorg_rollback.py", "reorg_config.yaml"): + shutil.copy2(repo_root / tool, work / tool) + + git_work("init", "-q") + git_work("add", "-A") + commit = git_work("-c", "user.email=harness@example.com", + "-c", "user.name=reorg harness", + "-c", "commit.gpgsign=false", + "commit", "-q", "-m", "fixture") + if commit.returncode != 0: + record("FAIL", "rollback: could not commit fixture repo", + (commit.stderr or commit.stdout).strip()[:200]) + return + + before = snapshot() + + # reorg.py is interactive (single y/N per mutation section); answer y to + # all. Far more lines than prompts is fine — the extras are ignored. + reorg = subprocess.run( + ["python3", str(work / "reorg.py")], + cwd=work, capture_output=True, text=True, input="y\n" * 100, + ) + if reorg.returncode != 0 or not (work / "hugo").exists(): + record("FAIL", "rollback: reorg.py failed on the fixture", + (reorg.stderr or reorg.stdout).strip()[:200]) + return + + rollback = subprocess.run( + ["python3", str(work / "reorg_rollback.py")], + cwd=work, capture_output=True, text=True, + ) + if rollback.returncode != 0 or (work / "hugo").exists(): + record("FAIL", "rollback: reorg_rollback.py failed on the fixture", + (rollback.stderr or rollback.stdout).strip()[:200]) + return + + after = snapshot() + + added = sorted(set(after) - set(before)) + removed = sorted(set(before) - set(after)) + changed = sorted(p for p in before.keys() & after.keys() + if before[p] != after[p]) + if added or removed or changed: + diffs = ([f"+ {p}" for p in added] + + [f"- {p}" for p in removed] + + [f"~ {p} (in-place edit not reverted)" for p in changed]) + record("FAIL", "rollback: tree not byte-identical after reorg + rollback", + f"{len(diffs)} diff(s):\n " + "\n ".join(diffs[:20])) + else: + record("PASS", "rollback: reorg + rollback restores the tree byte-for-byte", + f"{len(before)} file(s) round-tripped, incl. the .gitignore split") + finally: + shutil.rmtree(work, ignore_errors=True) + + +# -------------------------------------------------------------------------- +# Main +# -------------------------------------------------------------------------- + +def main(): + if not hugo_dir.exists(): + print("hugo/ does not exist — run reorg.py first.", file=sys.stderr) + sys.exit(1) + + branch = git("branch", "--show-current").stdout.strip() + if branch == "master": + print("Refusing to run on master; check out a feature branch.", file=sys.stderr) + sys.exit(1) + + top_level, moves_to_hugo = load_config() + + print("== A. Layout ==") + check_layout(top_level, moves_to_hugo) + check_gitignore_split(top_level, moves_to_hugo) + check_site_separation(moves_to_hugo) + + print("\n== B. Workflow paths ==") + check_workflows(moves_to_hugo) + check_workflow_path_filters(moves_to_hugo) + + print("\n== C. CODEOWNERS ==") + check_codeowners() + check_codeowners_prefixing(moves_to_hugo) + + print("\n== D. Husky hooks ==") + check_husky_circular_aliases() + check_husky_section_index() + check_husky_cdocs_gitignore() + + print("\n== E. Vale ==") + check_vale() + + print("\n== F. Hugo build ==") + check_build_presence() + + print("\n== G. Rollback round-trip ==") + check_rollback_roundtrip() + + # Summary. + counts = {"PASS": 0, "FAIL": 0, "SKIP": 0, "WARN": 0} + for status, _, _ in results: + counts[status] += 1 + print("\n" + "=" * 50) + print(f"PASS {counts['PASS']} FAIL {counts['FAIL']} " + f"WARN {counts['WARN']} SKIP {counts['SKIP']}") + + sys.exit(1 if counts["FAIL"] else 0) + + +if __name__ == "__main__": + main() diff --git a/reorg_rollback.py b/reorg_rollback.py index 050433f7e1d..4c061adfbfd 100644 --- a/reorg_rollback.py +++ b/reorg_rollback.py @@ -1,5 +1,6 @@ #!/usr/bin/env python3 import os +import subprocess import sys from pathlib import Path @@ -12,12 +13,36 @@ moved = 0 for name in sorted(os.listdir(hugo_dir)): + # reorg.py SPLITS .gitignore in place (it prunes the root copy and writes a + # routed subset to hugo/.gitignore). Skip it here so this rename can't clobber + # the pruned root copy with the hugo subset; the root copy is restored from + # git below, and hugo/.gitignore is discarded with the now-empty directory. + if name == ".gitignore": + continue src = hugo_dir / name dst = repo_root / name print(f"Moving hugo/{name} -> {name}") src.rename(dst) moved += 1 +# Discard the hugo/.gitignore the split created before emptying the directory. +gitignore_copy = hugo_dir / ".gitignore" +if gitignore_copy.exists(): + gitignore_copy.unlink() + print("Removed hugo/.gitignore (created by the split)") + # Only succeeds if hugo/ is now empty, preventing accidental data loss. hugo_dir.rmdir() print(f"Done. {moved} item(s) restored to repo root.") + +# Restore the files reorg.py edited in place to their committed state. The root +# .gitignore was pruned by the split, and reorg.py may have applied partial +# workflow/CODEOWNERS/husky substitutions interactively, so reversing them +# individually isn't reliable — let git restore them wholesale. +subprocess.run( + ["git", "checkout", "--", + ".gitignore", ".github/workflows/", ".github/CODEOWNERS", ".husky/"], + cwd=repo_root, + check=True, +) +print("Restored .gitignore, .github/workflows/, .github/CODEOWNERS, and .husky/ from git.") From d1401437663cce2d92388695fbd709abc05c3548 Mon Sep 17 00:00:00 2001 From: Jen Gilbert Date: Wed, 10 Jun 2026 16:14:45 -0500 Subject: [PATCH 03/18] Organize reorg files --- reorg_config.yaml => reorg/config.yaml | 6 +-- reorg/context.md | 11 +++++ reorg.py => reorg/execute_reorg.py | 16 +++---- reorg_harness.py => reorg/harness.py | 58 +++++++++++++------------- reorg_rollback.py => reorg/rollback.py | 2 +- reorg_context.md | 11 ----- 6 files changed, 51 insertions(+), 53 deletions(-) rename reorg_config.yaml => reorg/config.yaml (95%) create mode 100644 reorg/context.md rename reorg.py => reorg/execute_reorg.py (96%) rename reorg_harness.py => reorg/harness.py (93%) rename reorg_rollback.py => reorg/rollback.py (97%) delete mode 100644 reorg_context.md diff --git a/reorg_config.yaml b/reorg/config.yaml similarity index 95% rename from reorg_config.yaml rename to reorg/config.yaml index 83f5d0eb6af..e1e489238f4 100644 --- a/reorg_config.yaml +++ b/reorg/config.yaml @@ -6,11 +6,7 @@ top_level: - astro # Repo scripts - - reorg.py - - reorg_rollback.py - - reorg_config.yaml - - reorg_harness.py - - reorg_context.md + - reorg - reorg_issues.md # Created by reorg.py as the move target diff --git a/reorg/context.md b/reorg/context.md new file mode 100644 index 00000000000..ea141f6122e --- /dev/null +++ b/reorg/context.md @@ -0,0 +1,11 @@ +# Docs repository reorg context + +This repo is currently a Hugo site. We instead want it to contain a `hugo` and `astro` site side by side, with no overlap in their envs, `package.json` files, etc. + +`reorg/config.yaml` describes the relocation target for every file and folder at the top level of the repo. + +`reorg/execute_reorg.py` implements the file and folder path changes, and updates any dependencies on those paths, such as GitHub actions, CODEOWNERS, and Husky workflows. + +`reorg/rollback.py` functions as an "undo" action for the reorg. + +`reorg/harness.py` verifies the functionality of affected entities where possible. \ No newline at end of file diff --git a/reorg.py b/reorg/execute_reorg.py similarity index 96% rename from reorg.py rename to reorg/execute_reorg.py index f50390583c2..1a71b9dfb99 100644 --- a/reorg.py +++ b/reorg/execute_reorg.py @@ -9,8 +9,8 @@ print("Error: PyYAML is required. Install with: pip install pyyaml", file=sys.stderr) sys.exit(1) -repo_root = Path(__file__).parent -config_path = repo_root / "reorg_config.yaml" +repo_root = Path(__file__).parent.parent +config_path = Path(__file__).parent / "config.yaml" with config_path.open() as f: config = yaml.safe_load(f) @@ -41,8 +41,8 @@ if errors: for name in sorted(errors): - print(f"ERROR: '{name}' is not listed in reorg_config.yaml", file=sys.stderr) - print(f"{len(errors)} error(s) found. Update reorg_config.yaml before running.", file=sys.stderr) + print(f"ERROR: '{name}' is not listed in reorg/config.yaml", file=sys.stderr) + print(f"{len(errors)} error(s) found. Update reorg/config.yaml before running.", file=sys.stderr) sys.exit(1) hugo_dir = repo_root / "hugo" @@ -65,7 +65,7 @@ # A .gitignore is interpreted RELATIVE TO ITS OWN DIRECTORY, so — unlike # CODEOWNERS — a moved rule needs NO "hugo/" prefix; it simply belongs in # hugo/.gitignore. We therefore only ROUTE each line, never rewrite it, by its -# FIRST PATH SEGMENT (driven entirely by reorg_config.yaml, same as CODEOWNERS): +# FIRST PATH SEGMENT (driven entirely by reorg/config.yaml, same as CODEOWNERS): # first segment in moves_to_hugo -> hugo/.gitignore only # first segment in top_level -> root .gitignore only # first segment in neither -> generic/pure glob, kept in BOTH @@ -117,7 +117,7 @@ def route_gitignore_segment(line): (hugo_dir / ".gitignore").write_text("".join(hugo_lines)) print(" Written: .gitignore (root, pruned) and hugo/.gitignore") if both_segments: - print(" NOTE: kept in both (first path segment not in reorg_config.yaml): " + print(" NOTE: kept in both (first path segment not in reorg/config.yaml): " + ", ".join(sorted(both_segments))) # Update .github/workflows/ files to reference paths under hugo/. @@ -188,7 +188,7 @@ def route_gitignore_segment(line): # FIRST PATH SEGMENT. The decision is made on a whole segment, never a raw # substring, so "config" can never match inside "customization_config" and the # substitution list no longer has to be hand-ordered or kept in sync with the -# move list — it is derived entirely from reorg_config.yaml. +# move list — it is derived entirely from reorg/config.yaml. def route_codeowners_pattern(pattern): """Rewrite one CODEOWNERS pattern for the hugo/ move. @@ -259,7 +259,7 @@ def route_codeowners_pattern(pattern): applied = True if left_alone: - print("\n NOTE: left unchanged (first path segment not in reorg_config.yaml): " + print("\n NOTE: left unchanged (first path segment not in reorg/config.yaml): " + ", ".join(sorted(left_alone))) if applied: diff --git a/reorg_harness.py b/reorg/harness.py similarity index 93% rename from reorg_harness.py rename to reorg/harness.py index c2e908bd9df..f49a6b754ae 100644 --- a/reorg_harness.py +++ b/reorg/harness.py @@ -2,7 +2,7 @@ """ Post-reorg validation harness. -Run this AFTER reorg.py, from the repo root, on a feature branch (not master). +Run this AFTER execute_reorg.py, from the repo root, on a feature branch (not master). It verifies that the things the reorg could silently break still work: A. Layout - hugo/ holds the moved dirs; nothing moved is left at root; @@ -10,7 +10,7 @@ Node/build file lingers at the root competing with Astro. B. Workflows - no .github/workflows/ file references a moved path (dir OR file) without the hugo/ prefix, in shell scalars and in - structured on.*.paths filters (catches gaps in reorg.py). + structured on.*.paths filters (catches gaps in execute_reorg.py). C. CODEOWNERS - every concrete path pattern still resolves on disk, and every moved pattern (globs included) carries the hugo/ prefix. D. Husky hooks - each pre-commit check still REJECTS a known-bad input planted @@ -19,7 +19,7 @@ E. Vale - vale still flags a known violation using the Datadog style, proving StylesPath resolves from the new content location. F. Hugo build - static presence check only; run `make start` manually (todo #5). - G. Rollback - on a throwaway repo, reorg.py then reorg_rollback.py must + G. Rollback - on a throwaway repo, execute_reorg.py then rollback.py must restore the tree byte-for-byte (the only test of rollback). The harness is non-destructive. Every file it creates lives under a @@ -41,7 +41,7 @@ print("Error: PyYAML is required. Install with: pip install pyyaml", file=sys.stderr) sys.exit(1) -repo_root = Path(__file__).parent +repo_root = Path(__file__).parent.parent hugo_dir = repo_root / "hugo" # Distinctive marker so anything we create is obvious and easy to clean up. @@ -73,8 +73,8 @@ def git(*args, check=False): def load_config(): - """Load reorg_config.yaml and return (top_level, moves_to_hugo) name sets.""" - config_path = repo_root / "reorg_config.yaml" + """Load reorg/config.yaml and return (top_level, moves_to_hugo) name sets.""" + config_path = Path(__file__).parent / "config.yaml" with config_path.open() as f: config = yaml.safe_load(f) return set(config.get("top_level", [])), set(config.get("moves_to_hugo", [])) @@ -133,7 +133,7 @@ def check_layout(top_level, moves_to_hugo): record("PASS", "layout: top-level anchors intact", ", ".join(must_stay)) - # Both .gitignore files should exist (reorg.py splits one into two; + # Both .gitignore files should exist (execute_reorg.py splits one into two; # check_gitignore_split below verifies the split routed correctly). if (repo_root / ".gitignore").exists() and (hugo_dir / ".gitignore").exists(): record("PASS", "layout: .gitignore present at root and in hugo/") @@ -143,7 +143,7 @@ def check_layout(top_level, moves_to_hugo): def check_gitignore_split(top_level, moves_to_hugo): - """Verify reorg.py routed .gitignore rules to the correct side. + """Verify execute_reorg.py routed .gitignore rules to the correct side. A .gitignore is relative to its own directory, so after the split no surviving rule should point at a path that lives on the OTHER side: @@ -233,7 +233,7 @@ def check_workflows(moves_to_hugo): trailing-slash token, so a workflow that referenced a moved *file* — babel.config.js, markdoc.config.json, the Makefile, go.mod — was never validated. We now route every path-like token by its first segment exactly - as reorg.py does: a token whose first segment moved into hugo/ must be + as execute_reorg.py does: a token whose first segment moved into hugo/ must be hugo/-prefixed (after a correct reorg its first segment is 'hugo', so it no longer matches). A token counts as a path reference when it contains a '/', or when the whole token is the exact name of a moved file — so a bare word @@ -242,7 +242,7 @@ def check_workflows(moves_to_hugo): A moved file whose name ALSO exists under astro/ (package.json, yarn.lock, node_modules, .nvmrc) is left out of the bare-name match: a standalone 'package.json' is genuinely ambiguous (it may be Astro's), which is exactly - why reorg.py never blind-substitutes those names. They are still validated + why execute_reorg.py never blind-substitutes those names. They are still validated when they appear inside a slashed path routed by its first segment. """ workflows_dir = repo_root / ".github" / "workflows" @@ -396,7 +396,7 @@ def check_codeowners_prefixing(moves_to_hugo): check_codeowners() above only proves that *concrete* paths still RESOLVE on disk; it skips every glob (layouts/shortcodes/**/*.md) and never confirms a moved pattern was actually repathed. Here we route each pattern by its first - path segment exactly as reorg.py's route_codeowners_pattern does and assert + path segment exactly as execute_reorg.py's route_codeowners_pattern does and assert that a moved segment is prefixed. After a correct reorg the pattern's first segment is 'hugo', so a flagged pattern means a substitution was missed. """ @@ -408,7 +408,7 @@ def check_codeowners_prefixing(moves_to_hugo): def first_segment(pattern): body = pattern[1:] if pattern.startswith("/") else pattern # un-anchor seg = body.split("/", 1)[0] - # reorg.py normalizes the '.local' typo to 'local' before routing. + # execute_reorg.py normalizes the '.local' typo to 'local' before routing. return "local" if seg == ".local" else seg unprefixed = [] @@ -462,7 +462,7 @@ def check_husky_circular_aliases(): "aliases:\n" f" - /{MARKER}_alias/selftest\n" "---\n\n" - "Temporary file created by reorg_harness.py.\n" + "Temporary file created by reorg/harness.py.\n" ) try: target.parent.mkdir(parents=True, exist_ok=True) @@ -632,9 +632,9 @@ def check_build_presence(): # -------------------------------------------------------------------------- def check_rollback_roundtrip(): - """reorg.py then reorg_rollback.py must restore the tree byte-for-byte. + """execute_reorg.py then rollback.py must restore the tree byte-for-byte. - This is the only check that exercises reorg_rollback.py at all. Rather than + This is the only check that exercises rollback.py at all. Rather than mutate the live repo (and risk leaving it half-reorganized if a step fails), it builds a small throwaway git repo holding one representative item for every code path the two scripts touch — a mixed .gitignore (rules that route @@ -642,11 +642,11 @@ def check_rollback_roundtrip(): CODEOWNERS with a moved rule, a husky hook with a substitutable path, plus a few moved/stayed files — then: - snapshot -> run reorg.py (answering y to every prompt) - -> run reorg_rollback.py -> snapshot again + snapshot -> run execute_reorg.py (answering y to every prompt) + -> run rollback.py -> snapshot again -> assert byte-identical. - It specifically catches the easy-to-miss case where reorg.py edits a file in + It specifically catches the easy-to-miss case where execute_reorg.py edits a file in place (the root .gitignore split) that rollback must restore from git rather than merely delete. """ @@ -680,7 +680,7 @@ def snapshot(): try: # Representative fixture. Every top-level name here must appear in - # reorg_config.yaml or reorg.py refuses to run; the mix below routes + # reorg/config.yaml or execute_reorg.py refuses to run; the mix below routes # across moves_to_hugo, top_level, and generic-glob cases. write(".gitignore", "# build\n" @@ -723,9 +723,11 @@ def snapshot(): write("go.mod", "module example.com/fixture\n\ngo 1.21\n") write("babel.config.js", "module.exports = {};\n") - # The scripts resolve repo_root from their own location, so copy them in. - for tool in ("reorg.py", "reorg_rollback.py", "reorg_config.yaml"): - shutil.copy2(repo_root / tool, work / tool) + # The scripts resolve repo_root as their parent's parent, so place them + # under a reorg/ subfolder that mirrors the real repo layout. + (work / "reorg").mkdir() + for tool in ("execute_reorg.py", "rollback.py", "config.yaml"): + shutil.copy2(repo_root / "reorg" / tool, work / "reorg" / tool) git_work("init", "-q") git_work("add", "-A") @@ -740,23 +742,23 @@ def snapshot(): before = snapshot() - # reorg.py is interactive (single y/N per mutation section); answer y to + # execute_reorg.py is interactive (single y/N per mutation section); answer y to # all. Far more lines than prompts is fine — the extras are ignored. reorg = subprocess.run( - ["python3", str(work / "reorg.py")], + ["python3", str(work / "reorg" / "execute_reorg.py")], cwd=work, capture_output=True, text=True, input="y\n" * 100, ) if reorg.returncode != 0 or not (work / "hugo").exists(): - record("FAIL", "rollback: reorg.py failed on the fixture", + record("FAIL", "rollback: execute_reorg.py failed on the fixture", (reorg.stderr or reorg.stdout).strip()[:200]) return rollback = subprocess.run( - ["python3", str(work / "reorg_rollback.py")], + ["python3", str(work / "reorg" / "rollback.py")], cwd=work, capture_output=True, text=True, ) if rollback.returncode != 0 or (work / "hugo").exists(): - record("FAIL", "rollback: reorg_rollback.py failed on the fixture", + record("FAIL", "rollback: rollback.py failed on the fixture", (rollback.stderr or rollback.stdout).strip()[:200]) return @@ -785,7 +787,7 @@ def snapshot(): def main(): if not hugo_dir.exists(): - print("hugo/ does not exist — run reorg.py first.", file=sys.stderr) + print("hugo/ does not exist — run reorg/execute_reorg.py first.", file=sys.stderr) sys.exit(1) branch = git("branch", "--show-current").stdout.strip() diff --git a/reorg_rollback.py b/reorg/rollback.py similarity index 97% rename from reorg_rollback.py rename to reorg/rollback.py index 4c061adfbfd..b2bda7fd1c2 100644 --- a/reorg_rollback.py +++ b/reorg/rollback.py @@ -4,7 +4,7 @@ import sys from pathlib import Path -repo_root = Path(__file__).parent +repo_root = Path(__file__).parent.parent hugo_dir = repo_root / "hugo" if not hugo_dir.exists(): diff --git a/reorg_context.md b/reorg_context.md deleted file mode 100644 index 55c39a900e5..00000000000 --- a/reorg_context.md +++ /dev/null @@ -1,11 +0,0 @@ -# Docs repository reorg context - -This repo is currently a Hugo site. We instead want it to contain a `hugo` and `astro` site side by side, with no overlap in their envs, `package.json` files, etc. - -The `reorg_config.yaml` describes the relocation target for every file and folder at the top level of the repo. - -`reorg.py` implements the file and folder path changes, and updates any dependencies on those paths, such as GitHub actions, CODEOWNERS, and Husky workflows. - -`reorg_rollback.py` functions as an "undo" action for the reorg. - -`reorg_harness.py` verifies the functionality of affected entities where possible. \ No newline at end of file From d511ddb66e92e7b15747957a86f8a401d7f86c45 Mon Sep 17 00:00:00 2001 From: Jen Gilbert Date: Wed, 10 Jun 2026 16:18:33 -0500 Subject: [PATCH 04/18] Rename file --- reorg/context.md | 2 +- reorg/{harness.py => validate_reorg.py} | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) rename reorg/{harness.py => validate_reorg.py} (99%) diff --git a/reorg/context.md b/reorg/context.md index ea141f6122e..49e59ebc781 100644 --- a/reorg/context.md +++ b/reorg/context.md @@ -8,4 +8,4 @@ This repo is currently a Hugo site. We instead want it to contain a `hugo` and ` `reorg/rollback.py` functions as an "undo" action for the reorg. -`reorg/harness.py` verifies the functionality of affected entities where possible. \ No newline at end of file +`reorg/validate_reorg.py` verifies the functionality of affected entities where possible. \ No newline at end of file diff --git a/reorg/harness.py b/reorg/validate_reorg.py similarity index 99% rename from reorg/harness.py rename to reorg/validate_reorg.py index f49a6b754ae..b998dbc606e 100644 --- a/reorg/harness.py +++ b/reorg/validate_reorg.py @@ -462,7 +462,7 @@ def check_husky_circular_aliases(): "aliases:\n" f" - /{MARKER}_alias/selftest\n" "---\n\n" - "Temporary file created by reorg/harness.py.\n" + "Temporary file created by reorg/validate_reorg.py.\n" ) try: target.parent.mkdir(parents=True, exist_ok=True) From 5ee8f2689c94450f0eeaa5193720516169f3a8d0 Mon Sep 17 00:00:00 2001 From: Jen Gilbert Date: Wed, 10 Jun 2026 16:21:12 -0500 Subject: [PATCH 05/18] Rename files --- {reorg => astro_reorg}/config.yaml | 2 +- astro_reorg/context.md | 11 +++++++++++ {reorg => astro_reorg}/execute_reorg.py | 12 ++++++------ {reorg => astro_reorg}/rollback.py | 0 {reorg => astro_reorg}/validate_reorg.py | 18 +++++++++--------- reorg/context.md | 11 ----------- 6 files changed, 27 insertions(+), 27 deletions(-) rename {reorg => astro_reorg}/config.yaml (99%) create mode 100644 astro_reorg/context.md rename {reorg => astro_reorg}/execute_reorg.py (94%) rename {reorg => astro_reorg}/rollback.py (100%) rename {reorg => astro_reorg}/validate_reorg.py (98%) delete mode 100644 reorg/context.md diff --git a/reorg/config.yaml b/astro_reorg/config.yaml similarity index 99% rename from reorg/config.yaml rename to astro_reorg/config.yaml index e1e489238f4..b8a182be4fb 100644 --- a/reorg/config.yaml +++ b/astro_reorg/config.yaml @@ -6,7 +6,7 @@ top_level: - astro # Repo scripts - - reorg + - astro_reorg - reorg_issues.md # Created by reorg.py as the move target diff --git a/astro_reorg/context.md b/astro_reorg/context.md new file mode 100644 index 00000000000..31c2924595d --- /dev/null +++ b/astro_reorg/context.md @@ -0,0 +1,11 @@ +# Docs repository reorg context + +This repo is currently a Hugo site. We instead want it to contain a `hugo` and `astro` site side by side, with no overlap in their envs, `package.json` files, etc. + +`astro_reorg/config.yaml` describes the relocation target for every file and folder at the top level of the repo. + +`astro_reorg/execute_reorg.py` implements the file and folder path changes, and updates any dependencies on those paths, such as GitHub actions, CODEOWNERS, and Husky workflows. + +`astro_reorg/rollback.py` functions as an "undo" action for the reorg. + +`astro_reorg/validate_reorg.py` verifies the functionality of affected entities where possible. \ No newline at end of file diff --git a/reorg/execute_reorg.py b/astro_reorg/execute_reorg.py similarity index 94% rename from reorg/execute_reorg.py rename to astro_reorg/execute_reorg.py index 1a71b9dfb99..53524555e8b 100644 --- a/reorg/execute_reorg.py +++ b/astro_reorg/execute_reorg.py @@ -41,8 +41,8 @@ if errors: for name in sorted(errors): - print(f"ERROR: '{name}' is not listed in reorg/config.yaml", file=sys.stderr) - print(f"{len(errors)} error(s) found. Update reorg/config.yaml before running.", file=sys.stderr) + print(f"ERROR: '{name}' is not listed in astro_reorg/config.yaml", file=sys.stderr) + print(f"{len(errors)} error(s) found. Update astro_reorg/config.yaml before running.", file=sys.stderr) sys.exit(1) hugo_dir = repo_root / "hugo" @@ -65,7 +65,7 @@ # A .gitignore is interpreted RELATIVE TO ITS OWN DIRECTORY, so — unlike # CODEOWNERS — a moved rule needs NO "hugo/" prefix; it simply belongs in # hugo/.gitignore. We therefore only ROUTE each line, never rewrite it, by its -# FIRST PATH SEGMENT (driven entirely by reorg/config.yaml, same as CODEOWNERS): +# FIRST PATH SEGMENT (driven entirely by astro_reorg/config.yaml, same as CODEOWNERS): # first segment in moves_to_hugo -> hugo/.gitignore only # first segment in top_level -> root .gitignore only # first segment in neither -> generic/pure glob, kept in BOTH @@ -117,7 +117,7 @@ def route_gitignore_segment(line): (hugo_dir / ".gitignore").write_text("".join(hugo_lines)) print(" Written: .gitignore (root, pruned) and hugo/.gitignore") if both_segments: - print(" NOTE: kept in both (first path segment not in reorg/config.yaml): " + print(" NOTE: kept in both (first path segment not in astro_reorg/config.yaml): " + ", ".join(sorted(both_segments))) # Update .github/workflows/ files to reference paths under hugo/. @@ -188,7 +188,7 @@ def route_gitignore_segment(line): # FIRST PATH SEGMENT. The decision is made on a whole segment, never a raw # substring, so "config" can never match inside "customization_config" and the # substitution list no longer has to be hand-ordered or kept in sync with the -# move list — it is derived entirely from reorg/config.yaml. +# move list — it is derived entirely from astro_reorg/config.yaml. def route_codeowners_pattern(pattern): """Rewrite one CODEOWNERS pattern for the hugo/ move. @@ -259,7 +259,7 @@ def route_codeowners_pattern(pattern): applied = True if left_alone: - print("\n NOTE: left unchanged (first path segment not in reorg/config.yaml): " + print("\n NOTE: left unchanged (first path segment not in astro_reorg/config.yaml): " + ", ".join(sorted(left_alone))) if applied: diff --git a/reorg/rollback.py b/astro_reorg/rollback.py similarity index 100% rename from reorg/rollback.py rename to astro_reorg/rollback.py diff --git a/reorg/validate_reorg.py b/astro_reorg/validate_reorg.py similarity index 98% rename from reorg/validate_reorg.py rename to astro_reorg/validate_reorg.py index b998dbc606e..8f1c00e7a00 100644 --- a/reorg/validate_reorg.py +++ b/astro_reorg/validate_reorg.py @@ -73,7 +73,7 @@ def git(*args, check=False): def load_config(): - """Load reorg/config.yaml and return (top_level, moves_to_hugo) name sets.""" + """Load astro_reorg/config.yaml and return (top_level, moves_to_hugo) name sets.""" config_path = Path(__file__).parent / "config.yaml" with config_path.open() as f: config = yaml.safe_load(f) @@ -462,7 +462,7 @@ def check_husky_circular_aliases(): "aliases:\n" f" - /{MARKER}_alias/selftest\n" "---\n\n" - "Temporary file created by reorg/validate_reorg.py.\n" + "Temporary file created by astro_reorg/validate_reorg.py.\n" ) try: target.parent.mkdir(parents=True, exist_ok=True) @@ -680,7 +680,7 @@ def snapshot(): try: # Representative fixture. Every top-level name here must appear in - # reorg/config.yaml or execute_reorg.py refuses to run; the mix below routes + # astro_reorg/config.yaml or execute_reorg.py refuses to run; the mix below routes # across moves_to_hugo, top_level, and generic-glob cases. write(".gitignore", "# build\n" @@ -724,10 +724,10 @@ def snapshot(): write("babel.config.js", "module.exports = {};\n") # The scripts resolve repo_root as their parent's parent, so place them - # under a reorg/ subfolder that mirrors the real repo layout. - (work / "reorg").mkdir() + # under an astro_reorg/ subfolder that mirrors the real repo layout. + (work / "astro_reorg").mkdir() for tool in ("execute_reorg.py", "rollback.py", "config.yaml"): - shutil.copy2(repo_root / "reorg" / tool, work / "reorg" / tool) + shutil.copy2(repo_root / "astro_reorg" / tool, work / "astro_reorg" / tool) git_work("init", "-q") git_work("add", "-A") @@ -745,7 +745,7 @@ def snapshot(): # execute_reorg.py is interactive (single y/N per mutation section); answer y to # all. Far more lines than prompts is fine — the extras are ignored. reorg = subprocess.run( - ["python3", str(work / "reorg" / "execute_reorg.py")], + ["python3", str(work / "astro_reorg" / "execute_reorg.py")], cwd=work, capture_output=True, text=True, input="y\n" * 100, ) if reorg.returncode != 0 or not (work / "hugo").exists(): @@ -754,7 +754,7 @@ def snapshot(): return rollback = subprocess.run( - ["python3", str(work / "reorg" / "rollback.py")], + ["python3", str(work / "astro_reorg" / "rollback.py")], cwd=work, capture_output=True, text=True, ) if rollback.returncode != 0 or (work / "hugo").exists(): @@ -787,7 +787,7 @@ def snapshot(): def main(): if not hugo_dir.exists(): - print("hugo/ does not exist — run reorg/execute_reorg.py first.", file=sys.stderr) + print("hugo/ does not exist — run astro_reorg/execute_reorg.py first.", file=sys.stderr) sys.exit(1) branch = git("branch", "--show-current").stdout.strip() diff --git a/reorg/context.md b/reorg/context.md deleted file mode 100644 index 49e59ebc781..00000000000 --- a/reorg/context.md +++ /dev/null @@ -1,11 +0,0 @@ -# Docs repository reorg context - -This repo is currently a Hugo site. We instead want it to contain a `hugo` and `astro` site side by side, with no overlap in their envs, `package.json` files, etc. - -`reorg/config.yaml` describes the relocation target for every file and folder at the top level of the repo. - -`reorg/execute_reorg.py` implements the file and folder path changes, and updates any dependencies on those paths, such as GitHub actions, CODEOWNERS, and Husky workflows. - -`reorg/rollback.py` functions as an "undo" action for the reorg. - -`reorg/validate_reorg.py` verifies the functionality of affected entities where possible. \ No newline at end of file From 93e4efb05906519fab796f6ee65da2be842002a2 Mon Sep 17 00:00:00 2001 From: Jen Gilbert Date: Wed, 10 Jun 2026 16:22:56 -0500 Subject: [PATCH 06/18] Add file to config --- astro_reorg/config.yaml | 2 ++ 1 file changed, 2 insertions(+) diff --git a/astro_reorg/config.yaml b/astro_reorg/config.yaml index b8a182be4fb..47bc16e6276 100644 --- a/astro_reorg/config.yaml +++ b/astro_reorg/config.yaml @@ -121,3 +121,5 @@ moves_to_hugo: ignore: # macOS metadata - .DS_Store + # Python bytecode cache + - __pycache__ From 7e4fef0f5af51d2c48b95f8e3105c80b3316b767 Mon Sep 17 00:00:00 2001 From: Jen Gilbert Date: Wed, 10 Jun 2026 16:31:09 -0500 Subject: [PATCH 07/18] Add visual diffs for file updates --- astro_reorg/execute_reorg.py | 33 +++++++++++++++++++++++++++++++++ 1 file changed, 33 insertions(+) diff --git a/astro_reorg/execute_reorg.py b/astro_reorg/execute_reorg.py index 53524555e8b..4dfc925df39 100644 --- a/astro_reorg/execute_reorg.py +++ b/astro_reorg/execute_reorg.py @@ -1,4 +1,5 @@ #!/usr/bin/env python3 +import difflib import os import sys from pathlib import Path @@ -19,6 +20,28 @@ moves_to_hugo = set(config.get("moves_to_hugo", [])) ignore = set(config.get("ignore", [])) + +def show_diff(original: str, updated: str, filename: str = "") -> None: + """Print a colored unified diff between original and updated text.""" + orig_lines = original.splitlines(keepends=True) + upd_lines = updated.splitlines(keepends=True) + diff = list(difflib.unified_diff( + orig_lines, upd_lines, + fromfile=f"a/{filename}", tofile=f"b/{filename}", + n=2, + )) + if not diff: + return + for line in diff: + if line.startswith("+") and not line.startswith("+++"): + print(f"\033[32m{line}\033[0m", end="") + elif line.startswith("-") and not line.startswith("---"): + print(f"\033[31m{line}\033[0m", end="") + elif line.startswith("@@"): + print(f"\033[36m{line}\033[0m", end="") + else: + print(line, end="") + # Sanity-check the config itself for conflicts. conflicts = top_level & moves_to_hugo if conflicts: @@ -111,6 +134,10 @@ def route_gitignore_segment(line): f"{len(both_segments)} generic kept in both") print(f" root: {len(root_lines)} line(s), hugo/: {len(hugo_lines)} line(s) " f"(was {len(gi_lines)} duplicated wholesale)") +original_gi = "".join(gi_lines) +show_diff(original_gi, "".join(root_lines), ".gitignore") +print(" --- new file: hugo/.gitignore ---") +show_diff("", "".join(hugo_lines), "hugo/.gitignore") answer = input(" Apply? [y/N] ").strip().lower() if answer == "y": gitignore.write_text("".join(root_lines)) @@ -144,6 +171,7 @@ def route_gitignore_segment(line): if old not in updated: continue print(f"\n {yml_file.name}: {old!r} → {new!r}") + show_diff(updated, updated.replace(old, new), yml_file.name) answer = input(" Apply? [y/N] ").strip().lower() if answer == "y": updated = updated.replace(old, new) @@ -172,6 +200,7 @@ def route_gitignore_segment(line): if old not in updated: continue print(f"\n {py_file.name}: {old!r} → {new!r}") + show_diff(updated, updated.replace(old, new), py_file.name) answer = input(" Apply? [y/N] ").strip().lower() if answer == "y": updated = updated.replace(old, new) @@ -252,6 +281,10 @@ def route_codeowners_pattern(pattern): new_pattern = entries[0][2].lstrip().split(maxsplit=1)[0] print(f"\n CODEOWNERS: prefix {len(entries)} pattern(s) under {segment!r} " f"with hugo/ (e.g. {old_pattern!r} → {new_pattern!r})") + preview = lines[:] + for idx, _, new_line in entries: + preview[idx] = new_line + show_diff("".join(lines), "".join(preview), "CODEOWNERS") answer = input(" Apply? [y/N] ").strip().lower() if answer == "y": for idx, _, new_line in entries: From 6092c839925fb1b91afaa8f91560dd49e6c0cf1c Mon Sep 17 00:00:00 2001 From: Jen Gilbert Date: Wed, 10 Jun 2026 16:37:54 -0500 Subject: [PATCH 08/18] Skip .gitignore previews --- astro_reorg/execute_reorg.py | 4 ---- 1 file changed, 4 deletions(-) diff --git a/astro_reorg/execute_reorg.py b/astro_reorg/execute_reorg.py index 4dfc925df39..d28f1358ad7 100644 --- a/astro_reorg/execute_reorg.py +++ b/astro_reorg/execute_reorg.py @@ -134,10 +134,6 @@ def route_gitignore_segment(line): f"{len(both_segments)} generic kept in both") print(f" root: {len(root_lines)} line(s), hugo/: {len(hugo_lines)} line(s) " f"(was {len(gi_lines)} duplicated wholesale)") -original_gi = "".join(gi_lines) -show_diff(original_gi, "".join(root_lines), ".gitignore") -print(" --- new file: hugo/.gitignore ---") -show_diff("", "".join(hugo_lines), "hugo/.gitignore") answer = input(" Apply? [y/N] ").strip().lower() if answer == "y": gitignore.write_text("".join(root_lines)) From 4ff7a3204bac911abd7980fad48eab4363807550 Mon Sep 17 00:00:00 2001 From: Jen Gilbert Date: Wed, 10 Jun 2026 16:41:43 -0500 Subject: [PATCH 09/18] Simplify script --- astro_reorg/execute_reorg.py | 87 ++++++------------------------------ 1 file changed, 13 insertions(+), 74 deletions(-) diff --git a/astro_reorg/execute_reorg.py b/astro_reorg/execute_reorg.py index d28f1358ad7..146792b032b 100644 --- a/astro_reorg/execute_reorg.py +++ b/astro_reorg/execute_reorg.py @@ -1,5 +1,4 @@ #!/usr/bin/env python3 -import difflib import os import sys from pathlib import Path @@ -20,28 +19,6 @@ moves_to_hugo = set(config.get("moves_to_hugo", [])) ignore = set(config.get("ignore", [])) - -def show_diff(original: str, updated: str, filename: str = "") -> None: - """Print a colored unified diff between original and updated text.""" - orig_lines = original.splitlines(keepends=True) - upd_lines = updated.splitlines(keepends=True) - diff = list(difflib.unified_diff( - orig_lines, upd_lines, - fromfile=f"a/{filename}", tofile=f"b/{filename}", - n=2, - )) - if not diff: - return - for line in diff: - if line.startswith("+") and not line.startswith("+++"): - print(f"\033[32m{line}\033[0m", end="") - elif line.startswith("-") and not line.startswith("---"): - print(f"\033[31m{line}\033[0m", end="") - elif line.startswith("@@"): - print(f"\033[36m{line}\033[0m", end="") - else: - print(line, end="") - # Sanity-check the config itself for conflicts. conflicts = top_level & moves_to_hugo if conflicts: @@ -130,18 +107,13 @@ def route_gitignore_segment(line): hugo_lines.append(raw) both_segments.add(segment) -print(f"\n .gitignore: {len(hugo_only_segments)} segment(s) -> hugo/.gitignore only, " - f"{len(both_segments)} generic kept in both") -print(f" root: {len(root_lines)} line(s), hugo/: {len(hugo_lines)} line(s) " - f"(was {len(gi_lines)} duplicated wholesale)") -answer = input(" Apply? [y/N] ").strip().lower() -if answer == "y": - gitignore.write_text("".join(root_lines)) - (hugo_dir / ".gitignore").write_text("".join(hugo_lines)) - print(" Written: .gitignore (root, pruned) and hugo/.gitignore") - if both_segments: - print(" NOTE: kept in both (first path segment not in astro_reorg/config.yaml): " - + ", ".join(sorted(both_segments))) +gitignore.write_text("".join(root_lines)) +(hugo_dir / ".gitignore").write_text("".join(hugo_lines)) +print(f" Written: .gitignore (root, pruned) and hugo/.gitignore " + f"({len(hugo_only_segments)} segment(s) -> hugo/ only, {len(both_segments)} generic kept in both)") +if both_segments: + print(" NOTE: kept in both (first path segment not in astro_reorg/config.yaml): " + + ", ".join(sorted(both_segments))) # Update .github/workflows/ files to reference paths under hugo/. WORKFLOW_SUBSTITUTIONS = [ @@ -164,13 +136,7 @@ def route_gitignore_segment(line): original = yml_file.read_text() updated = original for old, new in WORKFLOW_SUBSTITUTIONS: - if old not in updated: - continue - print(f"\n {yml_file.name}: {old!r} → {new!r}") - show_diff(updated, updated.replace(old, new), yml_file.name) - answer = input(" Apply? [y/N] ").strip().lower() - if answer == "y": - updated = updated.replace(old, new) + updated = updated.replace(old, new) if updated != original: yml_file.write_text(updated) print(f" Written: {yml_file.name}") @@ -193,13 +159,7 @@ def route_gitignore_segment(line): original = py_file.read_text() updated = original for old, new in HUSKY_SUBSTITUTIONS: - if old not in updated: - continue - print(f"\n {py_file.name}: {old!r} → {new!r}") - show_diff(updated, updated.replace(old, new), py_file.name) - answer = input(" Apply? [y/N] ").strip().lower() - if answer == "y": - updated = updated.replace(old, new) + updated = updated.replace(old, new) if updated != original: py_file.write_text(updated) print(f" Written: {py_file.name}") @@ -246,12 +206,9 @@ def route_codeowners_pattern(pattern): print("\nUpdating .github/CODEOWNERS...") codeowners = repo_root / ".github" / "CODEOWNERS" lines = codeowners.read_text().splitlines(keepends=True) - -# Group rewritable lines by first path segment so we prompt once per segment -# (one y/N covers every "content/..." rule) instead of once per line. -changes_by_segment = {} # segment -> list of (line_index, old_pattern, new_line) left_alone = set() # first segments in neither config list (surfaced below) +changed = False for i, raw in enumerate(lines): newline = "\n" if raw.endswith("\n") else "" line = raw[:-len(newline)] if newline else raw @@ -266,31 +223,13 @@ def route_codeowners_pattern(pattern): if segment is not None and segment not in top_level: left_alone.add(segment) continue - changes_by_segment.setdefault(segment, []).append( - (i, pattern, indent + new_pattern + rest + newline) - ) - -applied = False -for segment in sorted(changes_by_segment): - entries = changes_by_segment[segment] - old_pattern = entries[0][1] - new_pattern = entries[0][2].lstrip().split(maxsplit=1)[0] - print(f"\n CODEOWNERS: prefix {len(entries)} pattern(s) under {segment!r} " - f"with hugo/ (e.g. {old_pattern!r} → {new_pattern!r})") - preview = lines[:] - for idx, _, new_line in entries: - preview[idx] = new_line - show_diff("".join(lines), "".join(preview), "CODEOWNERS") - answer = input(" Apply? [y/N] ").strip().lower() - if answer == "y": - for idx, _, new_line in entries: - lines[idx] = new_line - applied = True + lines[i] = indent + new_pattern + rest + newline + changed = True if left_alone: print("\n NOTE: left unchanged (first path segment not in astro_reorg/config.yaml): " + ", ".join(sorted(left_alone))) -if applied: +if changed: codeowners.write_text("".join(lines)) print(" Written: CODEOWNERS") From b896be101529d1bd512ef80697a510927eb8cdc3 Mon Sep 17 00:00:00 2001 From: Jen Gilbert Date: Wed, 10 Jun 2026 17:18:45 -0500 Subject: [PATCH 10/18] Code tweaks --- astro_reorg/context.md | 4 +- astro_reorg/helpers.py | 662 +++++++++++++++++++++++++++ astro_reorg/validate_reorg.py | 826 ++-------------------------------- 3 files changed, 704 insertions(+), 788 deletions(-) create mode 100644 astro_reorg/helpers.py diff --git a/astro_reorg/context.md b/astro_reorg/context.md index 31c2924595d..f92cc1951ac 100644 --- a/astro_reorg/context.md +++ b/astro_reorg/context.md @@ -8,4 +8,6 @@ This repo is currently a Hugo site. We instead want it to contain a `hugo` and ` `astro_reorg/rollback.py` functions as an "undo" action for the reorg. -`astro_reorg/validate_reorg.py` verifies the functionality of affected entities where possible. \ No newline at end of file +`astro_reorg/validate_reorg.py` verifies the functionality of affected entities where possible. + +You can ignore the `astro` folder, it's a remnant from another branch where an Astro site is being developed. It is completely out of scope. \ No newline at end of file diff --git a/astro_reorg/helpers.py b/astro_reorg/helpers.py new file mode 100644 index 00000000000..cd4ec79c455 --- /dev/null +++ b/astro_reorg/helpers.py @@ -0,0 +1,662 @@ +import os +import re +import shutil +import subprocess +import sys +import tempfile +from pathlib import Path + +try: + import yaml +except ImportError: + print("Error: PyYAML is required. Install with: pip install pyyaml", file=sys.stderr) + sys.exit(1) + +repo_root = Path(__file__).parent.parent +hugo_dir = repo_root / "hugo" + +# Distinctive marker so anything we create is obvious and easy to clean up. +MARKER = "__reorg_harness__" + +# (status, name, detail) tuples collected as checks run. +results = [] + + +def record(status, name, detail=""): + """Record a check outcome. status is one of PASS, FAIL, SKIP, WARN.""" + results.append((status, name, detail)) + symbol = {"PASS": "✅", "FAIL": "❌", "SKIP": "⏭ ", "WARN": "⚠ "}[status] + line = f"{symbol} {status:4} {name}" + if detail: + line += f"\n {detail}" + print(line) + + +def git(*args, check=False): + """Run a git command from the repo root and return the CompletedProcess.""" + return subprocess.run( + ["git", *args], + cwd=repo_root, + capture_output=True, + text=True, + check=check, + ) + + +def load_config(): + """Load astro_reorg/config.yaml and return (top_level, moves_to_hugo) name sets.""" + config_path = Path(__file__).parent / "config.yaml" + with config_path.open() as f: + config = yaml.safe_load(f) + return set(config.get("top_level", [])), set(config.get("moves_to_hugo", [])) + + +def check_layout(top_level, moves_to_hugo): + """hugo/ holds the moved dirs; nothing that moved is left at the root.""" + # Moved items that the reorg actually had to relocate (present before). + expected_in_hugo = sorted(n for n in moves_to_hugo if (hugo_dir / n).exists()) + missing_from_hugo = sorted( + n for n in moves_to_hugo + if (repo_root / n).exists() and not (hugo_dir / n).exists() + ) + + # A moved name still sitting at the root means the move didn't happen. + still_at_root = sorted( + n for n in moves_to_hugo if (repo_root / n).exists() + ) + if still_at_root: + record("FAIL", "layout: moved items remain at repo root", + ", ".join(still_at_root)) + else: + record("PASS", "layout: no moved items remain at repo root", + f"{len(expected_in_hugo)} item(s) confirmed under hugo/") + + if missing_from_hugo: + record("FAIL", "layout: moved items missing under hugo/", + ", ".join(missing_from_hugo)) + + # No top_level item should have leaked into hugo/. Two names are expected to + # appear under hugo/ and are NOT leaks: 'hugo' itself (the move target), and + # '.gitignore' — execute_reorg.py deliberately SPLITS the root .gitignore, + # writing a routed subset to hugo/.gitignore (verified by check_gitignore_split + # and the "present at root and in hugo/" check below). + expected_under_hugo = {"hugo", ".gitignore"} + leaked = sorted( + n for n in top_level + if n not in expected_under_hugo and (hugo_dir / n).exists() + ) + if leaked: + record("FAIL", "layout: top-level items leaked into hugo/", + ", ".join(leaked)) + else: + record("PASS", "layout: no top-level items leaked into hugo/") + + # Critical anchors that must still exist at the root after the move. + # package.json/node_modules/yarn.lock and go.mod/go.sum belong under hugo/ + # (each site owns its own Node setup; the Go module powers Hugo Modules), + # so they are deliberately NOT listed here. + must_stay = ["astro", ".husky", ".github", ".vale.ini"] + absent = [n for n in must_stay if not (repo_root / n).exists()] + if absent: + record("FAIL", "layout: expected top-level items are missing", + ", ".join(absent)) + else: + record("PASS", "layout: top-level anchors intact", + ", ".join(must_stay)) + + # Both .gitignore files should exist (execute_reorg.py splits one into two; + # check_gitignore_split below verifies the split routed correctly). + if (repo_root / ".gitignore").exists() and (hugo_dir / ".gitignore").exists(): + record("PASS", "layout: .gitignore present at root and in hugo/") + else: + record("FAIL", "layout: missing a .gitignore copy", + "expected both ./.gitignore and ./hugo/.gitignore") + + +def check_gitignore_split(top_level, moves_to_hugo): + """Verify execute_reorg.py routed .gitignore rules to the correct side. + + A .gitignore is relative to its own directory, so after the split no + surviving rule should point at a path that lives on the OTHER side: + - root .gitignore must not carry a rule whose first segment moved to hugo/ + - hugo/.gitignore must not carry a rule whose first segment stays at root + Generic globs (first segment in neither config list) legitimately live in + both files, so they are ignored here. + """ + def first_segment(line): + stripped = line.strip() + if not stripped or stripped.startswith("#"): + return None + body = stripped[1:] if stripped.startswith("!") else stripped + return body.lstrip("/").split("/", 1)[0] + + def leaks(path, wrong_side): + out = [] + for raw in path.read_text().splitlines(): + seg = first_segment(raw) + if seg in wrong_side: + out.append(f"{path.name}: {raw.strip()} (segment {seg!r})") + return out + + root_gi = repo_root / ".gitignore" + hugo_gi = hugo_dir / ".gitignore" + if not (root_gi.exists() and hugo_gi.exists()): + record("SKIP", "gitignore: split not verifiable (a .gitignore is missing)") + return + + problems = leaks(root_gi, moves_to_hugo) + leaks(hugo_gi, top_level - {"hugo"}) + if problems: + record("FAIL", "gitignore: rules survive on the wrong side of the split", + f"{len(problems)}:\n " + "\n ".join(problems[:20])) + else: + record("PASS", "gitignore: no moved-path rule left at root, " + "no root-path rule left in hugo/") + + +def check_workflows(moves_to_hugo): + """Flag references to moved paths (directories AND files) lacking hugo/. + + Routes every path-like token by its first segment, mirroring execute_reorg.py: + a token whose first segment moved into hugo/ must be hugo/-prefixed (after a + correct reorg its first segment is 'hugo', so it no longer matches). A token + counts as a path reference when it contains a '/', or when the whole token is + the exact name of a moved file — so a bare word like 'content' in prose isn't + flagged, but a standalone 'babel.config.js' is. + + Moved files whose names also exist under astro/ (package.json, yarn.lock, + node_modules, .nvmrc) are excluded from bare-name matching: a standalone + 'package.json' is genuinely ambiguous. They are still validated when they + appear inside a slashed path routed by first segment. + """ + workflows_dir = repo_root / ".github" / "workflows" + if not workflows_dir.exists(): + record("SKIP", "workflows: .github/workflows/ not found") + return + + moved_files = { + n for n in moves_to_hugo + if (hugo_dir / n).is_file() and not (repo_root / "astro" / n).exists() + } + + # Lines that legitimately reference a moved name but are NOT paths to fix + # (illustrative prose, external URLs). Keyed by workflow filename; a line is + # exempt if it contains any of the listed markers. + allowlist = { + # Security-doc example of how untrusted paths are reported, not a real path. + "claude_review.yml": ["__untrusted/content/"], + } + + # A path-like token: a maximal run of path/glob characters. Whitespace, + # quotes, ':', '@' and '!' all act as boundaries, so a leading '!' negation + # or surrounding quotes fall away naturally. + token_re = re.compile(r"[A-Za-z0-9_.\-*/]+") + + def first_segment(token): + """(first path segment, ./-stripped token) for routing.""" + t = token + while t.startswith("./"): + t = t[2:] + return t.split("/", 1)[0], t + + suspects = [] + for yml in sorted(workflows_dir.glob("*.yml")): + exempt = allowlist.get(yml.name, []) + for lineno, line in enumerate(yml.read_text().splitlines(), 1): + if any(marker in line for marker in exempt): + continue + for token in token_re.findall(line): + seg, normalized = first_segment(token) + # Only treat a token as a path reference if it's an actual path + # (has a '/') or is exactly the name of a moved file. + if "/" not in normalized and normalized not in moved_files: + continue + if seg in moves_to_hugo: + suspects.append(f"{yml.name}:{lineno}: {line.strip()}") + break + + # De-duplicate while keeping order. + seen = set() + unique = [s for s in suspects if not (s in seen or seen.add(s))] + + if unique: + record("FAIL", "workflows: unprefixed moved paths found", + f"{len(unique)} line(s):\n " + "\n ".join(unique[:20])) + else: + record("PASS", "workflows: all moved paths (dirs and files) are hugo/-prefixed") + + +def check_workflow_path_filters(moves_to_hugo): + """Parse each workflow's on.*.paths filters and assert moved paths are prefixed. + + Unlike paths embedded in `run:` shell (which are opaque string scalars to a + parser), `on..paths` / `paths-ignore` are structured YAML list values, + so we can validate them precisely. Each entry is routed by its first path + segment: if that segment moved into hugo/, the entry must be hugo/-prefixed. + + Footgun handled: under YAML 1.1, PyYAML loads the `on:` key as the boolean + True, not the string "on" — so we look it up under both keys. + """ + workflows_dir = repo_root / ".github" / "workflows" + if not workflows_dir.exists(): + record("SKIP", "workflows: .github/workflows/ not found (path filters)") + return + + def first_segment(entry): + # Strip a leading '!' negation and any root anchor, then take segment 0. + p = entry[1:] if entry.startswith("!") else entry + return p.lstrip("/").split("/", 1)[0] + + problems = [] + for yml in sorted(workflows_dir.glob("*.yml")): + try: + doc = yaml.safe_load(yml.read_text()) + except yaml.YAMLError as exc: + record("WARN", f"workflows: {yml.name} did not parse as YAML", + str(exc).splitlines()[0][:120]) + continue + if not isinstance(doc, dict): + continue + triggers = doc.get("on", doc.get(True)) # `on:` -> True under YAML 1.1 + if not isinstance(triggers, dict): + continue + for event, spec in triggers.items(): + if not isinstance(spec, dict): + continue + for key in ("paths", "paths-ignore"): + for entry in spec.get(key) or []: + if not isinstance(entry, str): + continue + seg = first_segment(entry) + anchored = entry.lstrip("!").lstrip("/") + if seg in moves_to_hugo and not anchored.startswith("hugo/"): + problems.append(f"{yml.name}: on.{event}.{key}: {entry}") + + if problems: + record("FAIL", "workflows: on.*.paths filters missing hugo/ prefix", + f"{len(problems)}:\n " + "\n ".join(problems[:20])) + else: + record("PASS", "workflows: on.*.paths filters are hugo/-prefixed") + + +def check_codeowners(): + """Flag concrete (non-glob) CODEOWNERS patterns that don't resolve correctly. + + Stale entries pointing at files absent from both hugo/ and the repo root are + reported as informational notes but never failed. The two cases for a + non-resolving concrete pattern: + + - hugo/-prefixed AND its de-prefixed path resolves at the repo root + -> REGRESSION: the pattern was prefixed but the file is still at + root (the move didn't happen / went to the wrong place). FAIL. + - anything else that doesn't resolve -> absent from both hugo/ and root. + Reported as an informational note, never a failure. + """ + codeowners = repo_root / ".github" / "CODEOWNERS" + if not codeowners.exists(): + record("SKIP", "codeowners: .github/CODEOWNERS not found") + return + + regressions = [] + preexisting = [] + for line in codeowners.read_text().splitlines(): + stripped = line.strip() + if not stripped or stripped.startswith("#"): + continue + pattern = stripped.split()[0] + if pattern == "*": + continue + # Skip glob patterns; we can't resolve them to a single path. + if any(ch in pattern for ch in "*?[]"): + continue + # Leading slash in CODEOWNERS is repo-root-anchored. + rel = pattern.lstrip("/") + if (repo_root / rel).exists(): + continue + # Doesn't resolve. Is it something the reorg broke, or pre-existing rot? + if rel.startswith("hugo/") and (repo_root / rel[len("hugo/"):]).exists(): + regressions.append(pattern) + else: + preexisting.append(pattern) + + if regressions: + record("FAIL", "codeowners: patterns the reorg moved to hugo/ but whose " + "file is still at root", + f"{len(regressions)}:\n " + "\n ".join(regressions[:20])) + elif preexisting: + record("PASS", "codeowners: no reorg-introduced breakage", + f"{len(preexisting)} pre-existing dangling entry(ies) ignored " + f"(absent on master too): e.g. {', '.join(preexisting[:3])}") + else: + record("PASS", "codeowners: all concrete patterns resolve on disk") + + +def check_codeowners_prefixing(moves_to_hugo): + """Every CODEOWNERS pattern whose first segment moved into hugo/ must carry + the hugo/ prefix — globs included. + + check_codeowners() above only proves that *concrete* paths still RESOLVE on + disk; it skips every glob (layouts/shortcodes/**/*.md) and never confirms a + moved pattern was actually repathed. Here we route each pattern by its first + path segment exactly as execute_reorg.py's route_codeowners_pattern does and assert + that a moved segment is prefixed. After a correct reorg the pattern's first + segment is 'hugo', so a flagged pattern means a substitution was missed. + """ + codeowners = repo_root / ".github" / "CODEOWNERS" + if not codeowners.exists(): + record("SKIP", "codeowners: .github/CODEOWNERS not found (prefixing)") + return + + def first_segment(pattern): + body = pattern[1:] if pattern.startswith("/") else pattern # un-anchor + seg = body.split("/", 1)[0] + # execute_reorg.py normalizes the '.local' typo to 'local' before routing. + return "local" if seg == ".local" else seg + + unprefixed = [] + for line in codeowners.read_text().splitlines(): + stripped = line.strip() + if not stripped or stripped.startswith("#"): + continue + pattern = stripped.split()[0] + if pattern == "*": + continue + if first_segment(pattern) not in moves_to_hugo: + continue + if not pattern.lstrip("/").startswith("hugo/"): + unprefixed.append(pattern) + + if unprefixed: + record("FAIL", "codeowners: moved patterns missing the hugo/ prefix", + f"{len(unprefixed)} (globs included):\n " + + "\n ".join(unprefixed[:20])) + else: + record("PASS", "codeowners: every moved pattern is hugo/-prefixed " + "(globs included)") + + +def run_hook(script_name): + """Run a .husky hook script from the repo root; return CompletedProcess.""" + return subprocess.run( + ["python3", str(repo_root / ".husky" / script_name)], + cwd=repo_root, + capture_output=True, + text=True, + ) + + +def check_husky_circular_aliases(): + """Plant a self-aliasing page and confirm the hook rejects it.""" + rel = f"hugo/content/en/{MARKER}_alias/selftest.md" + target = repo_root / rel + if target.exists(): + record("WARN", "husky: circular-aliases test skipped (temp path exists)", rel) + return + + # An alias equal to the file's own location is the circular case. + body = ( + "---\n" + "title: Reorg Harness Self Test\n" + "aliases:\n" + f" - /{MARKER}_alias/selftest\n" + "---\n\n" + "Temporary file created by astro_reorg/validate_reorg.py.\n" + ) + try: + target.parent.mkdir(parents=True, exist_ok=True) + target.write_text(body) + git("add", "-f", rel) + proc = run_hook("check-circular-aliases.py") + if proc.returncode != 0 and "circular" in (proc.stdout + proc.stderr).lower(): + record("PASS", "husky: circular-aliases hook rejects bad input") + else: + record("FAIL", "husky: circular-aliases hook did NOT reject bad input", + "hook may not be referencing hugo/content/en/ " + f"(exit={proc.returncode})") + finally: + git("reset", "-q", "HEAD", rel) + if target.exists(): + target.unlink() + _rmdir_if_empty(target.parent) + + +def check_husky_section_index(): + """Plant a new top-level section with no _index.md and confirm rejection.""" + section = f"{MARKER}_section" + rel = f"hugo/content/en/{section}/page.md" + target = repo_root / rel + if target.exists() or (hugo_dir / "content" / "en" / section).exists(): + record("WARN", "husky: section-index test skipped (temp path exists)", rel) + return + + body = "---\ntitle: Reorg Harness Page\nprivate: true\n---\n\nTemp page.\n" + try: + target.parent.mkdir(parents=True, exist_ok=True) + target.write_text(body) + git("add", "-f", rel) + proc = run_hook("check-section-index.py") + if proc.returncode != 0 and "_index.md" in (proc.stdout + proc.stderr): + record("PASS", "husky: section-index hook rejects missing _index.md") + else: + record("FAIL", "husky: section-index hook did NOT reject bad input", + "hook may not be referencing hugo/content/en/ " + f"(exit={proc.returncode})") + finally: + git("reset", "-q", "HEAD", rel) + if target.exists(): + target.unlink() + _rmdir_if_empty(target.parent) + + +def check_husky_cdocs_gitignore(): + """ + Append a harness pattern to hugo/content/.gitignore, force-track a matching + compiled file (with a .mdoc.md sibling), and confirm the hook flags it. + """ + content_gitignore = hugo_dir / "content" / ".gitignore" + if not content_gitignore.exists(): + record("SKIP", "husky: cdocs-gitignore test skipped (no hugo/content/.gitignore)") + return + + pattern = f"/en/{MARKER}_cdocs.md" + compiled_rel = f"hugo/content/en/{MARKER}_cdocs.md" + source_rel = f"hugo/content/en/{MARKER}_cdocs.mdoc.md" + compiled = repo_root / compiled_rel + source = repo_root / source_rel + if compiled.exists() or source.exists(): + record("WARN", "husky: cdocs-gitignore test skipped (temp path exists)", + compiled_rel) + return + + original_gitignore = content_gitignore.read_text() + try: + content_gitignore.write_text( + original_gitignore.rstrip("\n") + f"\n{pattern}\n" + ) + source.write_text("Temp Cdocs source.\n") + compiled.write_text("Temp compiled Cdocs output.\n") + git("add", "-f", compiled_rel) # force past the gitignore to simulate the mistake + proc = run_hook("check-cdocs-gitignore.py") + if proc.returncode != 0 and MARKER in (proc.stdout + proc.stderr): + record("PASS", "husky: cdocs-gitignore hook flags tracked compiled file") + else: + record("FAIL", "husky: cdocs-gitignore hook did NOT flag tracked file", + "hook may not be referencing hugo/content/.gitignore " + f"(exit={proc.returncode})") + finally: + git("reset", "-q", "HEAD", compiled_rel) + for p in (compiled, source): + if p.exists(): + p.unlink() + content_gitignore.write_text(original_gitignore) + + +def _rmdir_if_empty(path): + """Remove a directory only if it is empty (safety guard).""" + try: + path.rmdir() + except OSError: + pass + + +def check_build_presence(): + """Static check; the real build is the manual `make start` in todo #5.""" + # Hugo's build entrypoints all need to be co-located under hugo/: the Makefile, + # the Node manifest, and go.mod (required by Hugo Modules at the project root). + required = ["Makefile", "package.json", "go.mod"] + missing = [n for n in required if not (hugo_dir / n).exists()] + if not missing: + record("PASS", "build: hugo/{Makefile,package.json,go.mod} present", + "run `cd hugo && make start` (todo #5) to verify the full build") + else: + record("FAIL", "build: missing Hugo build entrypoint(s) under hugo/", + ", ".join(f"hugo/{n}" for n in missing)) + + +def check_rollback_roundtrip(): + """execute_reorg.py then rollback.py must restore the tree byte-for-byte. + + This is the only check that exercises rollback.py at all. Rather than + mutate the live repo (and risk leaving it half-reorganized if a step fails), + it builds a small throwaway git repo holding one representative item for + every code path the two scripts touch — a mixed .gitignore (rules that route + to hugo/, to root, and to both), a workflow with substitutable paths, a + CODEOWNERS with a moved rule, a husky hook with a substitutable path, plus a + few moved/stayed files — then: + + snapshot -> run execute_reorg.py (answering y to every prompt) + -> run rollback.py -> snapshot again + -> assert byte-identical. + + It specifically catches the easy-to-miss case where execute_reorg.py edits a file in + place (the root .gitignore split) that rollback must restore from git rather + than merely delete. + """ + if shutil.which("python3") is None or shutil.which("git") is None: + record("SKIP", "rollback: python3/git not both available") + return + + workdir = tempfile.mkdtemp(prefix=f"{MARKER}_rollback_") + work = Path(workdir) + + def write(rel, text): + p = work / rel + p.parent.mkdir(parents=True, exist_ok=True) + p.write_text(text) + + def git_work(*args): + return subprocess.run(["git", *args], cwd=work, + capture_output=True, text=True) + + def snapshot(): + """relpath -> bytes for every file under work/, excluding .git/.""" + tree = {} + for path in sorted(work.rglob("*")): + if path.is_dir(): + continue + rel = path.relative_to(work) + if rel.parts and rel.parts[0] == ".git": + continue + tree[str(rel)] = path.read_bytes() + return tree + + try: + # Representative fixture. Every top-level name here must appear in + # astro_reorg/config.yaml or execute_reorg.py refuses to run; the mix below routes + # across moves_to_hugo, top_level, and generic-glob cases. + write(".gitignore", + "# build\n" + "public/*\n" + "data/generated\n" + "content/en/api/**/*.go\n" + "node_modules\n" + "\n" + "# generic (kept in both)\n" + "*.log\n" + "\n" + "# root-only tooling\n" + ".github/preview-links-template.md\n") + write(".github/workflows/sample.yml", + "name: sample\n" + "on:\n" + " pull_request:\n" + " paths:\n" + " - 'content/en/**/*.md'\n" + "jobs:\n" + " build:\n" + " runs-on: ubuntu-latest\n" + " steps:\n" + " - run: python local/bin/foo.py\n") + write(".github/CODEOWNERS", + "* @DataDog/documentation\n" + "/content/ @DataDog/team-a\n" + "data/ @DataDog/team-b\n" + "README.md @DataDog/team-c\n") + write(".husky/check-sample.py", + "from pathlib import Path\n" + 'repo_pattern = "content"\n' + "p = Path('content/en')\n") + write("README.md", "# Fixture\n") + write("astro/package.json", '{"name": "astro-fixture"}\n') + write("content/en/page.md", "---\ntitle: Page\n---\n\nBody.\n") + write("data/sample.yaml", "key: value\n") + write("Makefile", "start:\n\techo build\n") + write("package.json", '{"name": "hugo-fixture"}\n') + write("go.mod", "module example.com/fixture\n\ngo 1.21\n") + write("babel.config.js", "module.exports = {};\n") + + # The scripts resolve repo_root as their parent's parent, so place them + # under an astro_reorg/ subfolder that mirrors the real repo layout. + (work / "astro_reorg").mkdir() + for tool in ("execute_reorg.py", "rollback.py", "config.yaml"): + shutil.copy2(repo_root / "astro_reorg" / tool, work / "astro_reorg" / tool) + + git_work("init", "-q") + git_work("add", "-A") + commit = git_work("-c", "user.email=harness@example.com", + "-c", "user.name=reorg harness", + "-c", "commit.gpgsign=false", + "commit", "-q", "-m", "fixture") + if commit.returncode != 0: + record("FAIL", "rollback: could not commit fixture repo", + (commit.stderr or commit.stdout).strip()[:200]) + return + + before = snapshot() + + # execute_reorg.py is interactive (single y/N per mutation section); answer y to + # all. Far more lines than prompts is fine — the extras are ignored. + reorg = subprocess.run( + ["python3", str(work / "astro_reorg" / "execute_reorg.py")], + cwd=work, capture_output=True, text=True, input="y\n" * 100, + ) + if reorg.returncode != 0 or not (work / "hugo").exists(): + record("FAIL", "rollback: execute_reorg.py failed on the fixture", + (reorg.stderr or reorg.stdout).strip()[:200]) + return + + rollback = subprocess.run( + ["python3", str(work / "astro_reorg" / "rollback.py")], + cwd=work, capture_output=True, text=True, + ) + if rollback.returncode != 0 or (work / "hugo").exists(): + record("FAIL", "rollback: rollback.py failed on the fixture", + (rollback.stderr or rollback.stdout).strip()[:200]) + return + + after = snapshot() + + added = sorted(set(after) - set(before)) + removed = sorted(set(before) - set(after)) + changed = sorted(p for p in before.keys() & after.keys() + if before[p] != after[p]) + if added or removed or changed: + diffs = ([f"+ {p}" for p in added] + + [f"- {p}" for p in removed] + + [f"~ {p} (in-place edit not reverted)" for p in changed]) + record("FAIL", "rollback: tree not byte-identical after reorg + rollback", + f"{len(diffs)} diff(s):\n " + "\n ".join(diffs[:20])) + else: + record("PASS", "rollback: reorg + rollback restores the tree byte-for-byte", + f"{len(before)} file(s) round-tripped, incl. the .gitignore split") + finally: + shutil.rmtree(work, ignore_errors=True) diff --git a/astro_reorg/validate_reorg.py b/astro_reorg/validate_reorg.py index 8f1c00e7a00..68521c5ac57 100644 --- a/astro_reorg/validate_reorg.py +++ b/astro_reorg/validate_reorg.py @@ -3,787 +3,31 @@ Post-reorg validation harness. Run this AFTER execute_reorg.py, from the repo root, on a feature branch (not master). -It verifies that the things the reorg could silently break still work: - - A. Layout - hugo/ holds the moved dirs; nothing moved is left at root; - hugo/ and astro/ each self-own a package.json and no Hugo - Node/build file lingers at the root competing with Astro. - B. Workflows - no .github/workflows/ file references a moved path (dir OR - file) without the hugo/ prefix, in shell scalars and in - structured on.*.paths filters (catches gaps in execute_reorg.py). - C. CODEOWNERS - every concrete path pattern still resolves on disk, and - every moved pattern (globs included) carries the hugo/ prefix. - D. Husky hooks - each pre-commit check still REJECTS a known-bad input planted - at the new hugo/ path. If a hook wasn't repathed it inspects - the now-missing content/en/ and passes vacuously -> we fail it. - E. Vale - vale still flags a known violation using the Datadog style, - proving StylesPath resolves from the new content location. - F. Hugo build - static presence check only; run `make start` manually (todo #5). - G. Rollback - on a throwaway repo, execute_reorg.py then rollback.py must - restore the tree byte-for-byte (the only test of rollback). - -The harness is non-destructive. Every file it creates lives under a -'__reorg_harness__' prefix, and every change (staged paths, gitignore edits) is -reverted in a finally block. It never commits. +The harness is non-destructive: every file it creates lives under a '__reorg_harness__' +prefix, and every change (staged paths, gitignore edits) is reverted in a finally block. +It never commits. """ -import os -import re -import shutil -import subprocess import sys -import tempfile -from pathlib import Path - -try: - import yaml -except ImportError: - print("Error: PyYAML is required. Install with: pip install pyyaml", file=sys.stderr) - sys.exit(1) - -repo_root = Path(__file__).parent.parent -hugo_dir = repo_root / "hugo" - -# Distinctive marker so anything we create is obvious and easy to clean up. -MARKER = "__reorg_harness__" - -# (status, name, detail) tuples collected as checks run. -results = [] - - -def record(status, name, detail=""): - """Record a check outcome. status is one of PASS, FAIL, SKIP, WARN.""" - results.append((status, name, detail)) - symbol = {"PASS": "✅", "FAIL": "❌", "SKIP": "⏭ ", "WARN": "⚠ "}[status] - line = f"{symbol} {status:4} {name}" - if detail: - line += f"\n {detail}" - print(line) - - -def git(*args, check=False): - """Run a git command from the repo root and return the CompletedProcess.""" - return subprocess.run( - ["git", *args], - cwd=repo_root, - capture_output=True, - text=True, - check=check, - ) - - -def load_config(): - """Load astro_reorg/config.yaml and return (top_level, moves_to_hugo) name sets.""" - config_path = Path(__file__).parent / "config.yaml" - with config_path.open() as f: - config = yaml.safe_load(f) - return set(config.get("top_level", [])), set(config.get("moves_to_hugo", [])) - - -# -------------------------------------------------------------------------- -# A. Layout -# -------------------------------------------------------------------------- - -def check_layout(top_level, moves_to_hugo): - """hugo/ holds the moved dirs; nothing that moved is left at the root.""" - # Moved items that the reorg actually had to relocate (present before). - expected_in_hugo = sorted(n for n in moves_to_hugo if (hugo_dir / n).exists()) - missing_from_hugo = sorted( - n for n in moves_to_hugo - if (repo_root / n).exists() and not (hugo_dir / n).exists() - ) - - # A moved name still sitting at the root means the move didn't happen. - still_at_root = sorted( - n for n in moves_to_hugo if (repo_root / n).exists() - ) - if still_at_root: - record("FAIL", "layout: moved items remain at repo root", - ", ".join(still_at_root)) - else: - record("PASS", "layout: no moved items remain at repo root", - f"{len(expected_in_hugo)} item(s) confirmed under hugo/") - - if missing_from_hugo: - record("FAIL", "layout: moved items missing under hugo/", - ", ".join(missing_from_hugo)) - - # No top_level item should have leaked into hugo/. 'hugo' itself is listed - # in top_level (it IS the move target) so it is excluded. - leaked = sorted( - n for n in top_level - if n != "hugo" and (hugo_dir / n).exists() - ) - if leaked: - record("FAIL", "layout: top-level items leaked into hugo/", - ", ".join(leaked)) - else: - record("PASS", "layout: no top-level items leaked into hugo/") - - # Critical anchors that must still exist at the root after the move. - # NB: package.json/node_modules/yarn.lock and go.mod/go.sum now move into - # hugo/ (each site owns its own Node setup; the Go module powers Hugo - # Modules), so they are deliberately NOT listed here. - must_stay = ["astro", ".husky", ".github", ".vale.ini"] - absent = [n for n in must_stay if not (repo_root / n).exists()] - if absent: - record("FAIL", "layout: expected top-level items are missing", - ", ".join(absent)) - else: - record("PASS", "layout: top-level anchors intact", - ", ".join(must_stay)) - - # Both .gitignore files should exist (execute_reorg.py splits one into two; - # check_gitignore_split below verifies the split routed correctly). - if (repo_root / ".gitignore").exists() and (hugo_dir / ".gitignore").exists(): - record("PASS", "layout: .gitignore present at root and in hugo/") - else: - record("FAIL", "layout: missing a .gitignore copy", - "expected both ./.gitignore and ./hugo/.gitignore") - - -def check_gitignore_split(top_level, moves_to_hugo): - """Verify execute_reorg.py routed .gitignore rules to the correct side. - - A .gitignore is relative to its own directory, so after the split no - surviving rule should point at a path that lives on the OTHER side: - - root .gitignore must not carry a rule whose first segment moved to hugo/ - - hugo/.gitignore must not carry a rule whose first segment stays at root - Generic globs (first segment in neither config list) legitimately live in - both files, so they are ignored here. - """ - def first_segment(line): - stripped = line.strip() - if not stripped or stripped.startswith("#"): - return None - body = stripped[1:] if stripped.startswith("!") else stripped - return body.lstrip("/").split("/", 1)[0] - - def leaks(path, wrong_side): - out = [] - for raw in path.read_text().splitlines(): - seg = first_segment(raw) - if seg in wrong_side: - out.append(f"{path.name}: {raw.strip()} (segment {seg!r})") - return out - - root_gi = repo_root / ".gitignore" - hugo_gi = hugo_dir / ".gitignore" - if not (root_gi.exists() and hugo_gi.exists()): - record("SKIP", "gitignore: split not verifiable (a .gitignore is missing)") - return - - problems = leaks(root_gi, moves_to_hugo) + leaks(hugo_gi, top_level - {"hugo"}) - if problems: - record("FAIL", "gitignore: rules survive on the wrong side of the split", - f"{len(problems)}:\n " + "\n ".join(problems[:20])) - else: - record("PASS", "gitignore: no moved-path rule left at root, " - "no root-path rule left in hugo/") - - -def check_site_separation(moves_to_hugo): - """The two sites must own non-overlapping Node/build setups. - - The point of the reorg is a clean hugo/ vs astro/ split: each site has its - OWN Node manifest, and any name the two sites share must not also linger at - the root where their copies would collide. check_layout already fails on ANY - moved item left at root (against the full config), so this check adds only - what that doesn't cover — and derives its clash set from the config rather - than a hand-maintained list: - - astro/package.json and hugo/package.json both exist (each self-owns) - - no name that moved into hugo/ AND is also owned by astro/ remains at root - """ - problems = [] - - # Each site self-owns its Node manifest. (package.json is in moves_to_hugo, - # so it belongs to Hugo; astro/ must carry its own copy.) - for site in ("hugo", "astro"): - if not (repo_root / site / "package.json").exists(): - problems.append(f"missing {site}/package.json") - - # A name that moved into hugo/ AND that astro/ also owns is a shared - # toolchain file (package.json, node_modules, yarn.lock, ...). If such a - # name is ALSO present at the root, the two sites' copies collide. The clash - # set is derived from moves_to_hugo intersected with astro/'s actual - # contents — no hand-maintained file list to drift from the config. - shared_with_astro = sorted( - n for n in moves_to_hugo if (repo_root / "astro" / n).exists() - ) - leaked = [n for n in shared_with_astro if (repo_root / n).exists()] - if leaked: - problems.append("left at root, colliding with astro/: " + ", ".join(leaked)) - - if problems: - record("FAIL", "separation: hugo/ and astro/ Node setups overlap or leak", - "; ".join(problems)) - else: - record("PASS", "separation: hugo/ and astro/ each self-own package.json; " - "no shared toolchain file left at root") - - -# -------------------------------------------------------------------------- -# B. Workflow paths -# -------------------------------------------------------------------------- - -def check_workflows(moves_to_hugo): - """Flag references to moved paths (directories AND files) lacking hugo/. - - This previously scanned only directory names ('.' not in n) using a - trailing-slash token, so a workflow that referenced a moved *file* — - babel.config.js, markdoc.config.json, the Makefile, go.mod — was never - validated. We now route every path-like token by its first segment exactly - as execute_reorg.py does: a token whose first segment moved into hugo/ must be - hugo/-prefixed (after a correct reorg its first segment is 'hugo', so it no - longer matches). A token counts as a path reference when it contains a '/', - or when the whole token is the exact name of a moved file — so a bare word - like 'content' in prose isn't flagged, but a standalone 'babel.config.js' is. - - A moved file whose name ALSO exists under astro/ (package.json, yarn.lock, - node_modules, .nvmrc) is left out of the bare-name match: a standalone - 'package.json' is genuinely ambiguous (it may be Astro's), which is exactly - why execute_reorg.py never blind-substitutes those names. They are still validated - when they appear inside a slashed path routed by its first segment. - """ - workflows_dir = repo_root / ".github" / "workflows" - if not workflows_dir.exists(): - record("SKIP", "workflows: .github/workflows/ not found") - return - - moved_files = { - n for n in moves_to_hugo - if (hugo_dir / n).is_file() and not (repo_root / "astro" / n).exists() - } - - # Lines that legitimately reference a moved name but are NOT paths to fix - # (illustrative prose, external URLs). Keyed by workflow filename; a line is - # exempt if it contains any of the listed markers. - allowlist = { - # Security-doc example of how untrusted paths are reported, not a real path. - "claude_review.yml": ["__untrusted/content/"], - } - - # A path-like token: a maximal run of path/glob characters. Whitespace, - # quotes, ':', '@' and '!' all act as boundaries, so a leading '!' negation - # or surrounding quotes fall away naturally. - token_re = re.compile(r"[A-Za-z0-9_.\-*/]+") - - def first_segment(token): - """(first path segment, ./-stripped token) for routing.""" - t = token - while t.startswith("./"): - t = t[2:] - return t.split("/", 1)[0], t - - suspects = [] - for yml in sorted(workflows_dir.glob("*.yml")): - exempt = allowlist.get(yml.name, []) - for lineno, line in enumerate(yml.read_text().splitlines(), 1): - if any(marker in line for marker in exempt): - continue - for token in token_re.findall(line): - seg, normalized = first_segment(token) - # Only treat a token as a path reference if it's an actual path - # (has a '/') or is exactly the name of a moved file. - if "/" not in normalized and normalized not in moved_files: - continue - if seg in moves_to_hugo: - suspects.append(f"{yml.name}:{lineno}: {line.strip()}") - break - - # De-duplicate while keeping order. - seen = set() - unique = [s for s in suspects if not (s in seen or seen.add(s))] - - if unique: - record("FAIL", "workflows: unprefixed moved paths found", - f"{len(unique)} line(s):\n " + "\n ".join(unique[:20])) - else: - record("PASS", "workflows: all moved paths (dirs and files) are hugo/-prefixed") - - -def check_workflow_path_filters(moves_to_hugo): - """Parse each workflow's on.*.paths filters and assert moved paths are prefixed. - - Unlike paths embedded in `run:` shell (which are opaque string scalars to a - parser), `on..paths` / `paths-ignore` are structured YAML list values, - so we can validate them precisely. Each entry is routed by its first path - segment: if that segment moved into hugo/, the entry must be hugo/-prefixed. - - Footgun handled: under YAML 1.1, PyYAML loads the `on:` key as the boolean - True, not the string "on" — so we look it up under both keys. - """ - workflows_dir = repo_root / ".github" / "workflows" - if not workflows_dir.exists(): - record("SKIP", "workflows: .github/workflows/ not found (path filters)") - return - - def first_segment(entry): - # Strip a leading '!' negation and any root anchor, then take segment 0. - p = entry[1:] if entry.startswith("!") else entry - return p.lstrip("/").split("/", 1)[0] - - problems = [] - for yml in sorted(workflows_dir.glob("*.yml")): - try: - doc = yaml.safe_load(yml.read_text()) - except yaml.YAMLError as exc: - record("WARN", f"workflows: {yml.name} did not parse as YAML", - str(exc).splitlines()[0][:120]) - continue - if not isinstance(doc, dict): - continue - triggers = doc.get("on", doc.get(True)) # `on:` -> True under YAML 1.1 - if not isinstance(triggers, dict): - continue - for event, spec in triggers.items(): - if not isinstance(spec, dict): - continue - for key in ("paths", "paths-ignore"): - for entry in spec.get(key) or []: - if not isinstance(entry, str): - continue - seg = first_segment(entry) - anchored = entry.lstrip("!").lstrip("/") - if seg in moves_to_hugo and not anchored.startswith("hugo/"): - problems.append(f"{yml.name}: on.{event}.{key}: {entry}") - - if problems: - record("FAIL", "workflows: on.*.paths filters missing hugo/ prefix", - f"{len(problems)}:\n " + "\n ".join(problems[:20])) - else: - record("PASS", "workflows: on.*.paths filters are hugo/-prefixed") - - -# -------------------------------------------------------------------------- -# C. CODEOWNERS -# -------------------------------------------------------------------------- -def check_codeowners(): - """Every concrete (non-glob) path pattern should resolve on disk.""" - codeowners = repo_root / ".github" / "CODEOWNERS" - if not codeowners.exists(): - record("SKIP", "codeowners: .github/CODEOWNERS not found") - return +from helpers import ( + hugo_dir, + git, + load_config, + results, + check_layout, + check_gitignore_split, + check_workflows, + check_workflow_path_filters, + check_codeowners, + check_codeowners_prefixing, + check_husky_circular_aliases, + check_husky_section_index, + check_husky_cdocs_gitignore, + check_build_presence, + check_rollback_roundtrip, +) - missing = [] - for line in codeowners.read_text().splitlines(): - stripped = line.strip() - if not stripped or stripped.startswith("#"): - continue - pattern = stripped.split()[0] - if pattern == "*": - continue - # Skip glob patterns; we can't resolve them to a single path. - if any(ch in pattern for ch in "*?[]"): - continue - # Leading slash in CODEOWNERS is repo-root-anchored. - rel = pattern.lstrip("/") - if not (repo_root / rel).exists(): - missing.append(pattern) - - if missing: - record("FAIL", "codeowners: patterns that no longer resolve", - f"{len(missing)}:\n " + "\n ".join(missing[:20])) - else: - record("PASS", "codeowners: all concrete patterns resolve on disk") - - -def check_codeowners_prefixing(moves_to_hugo): - """Every CODEOWNERS pattern whose first segment moved into hugo/ must carry - the hugo/ prefix — globs included. - - check_codeowners() above only proves that *concrete* paths still RESOLVE on - disk; it skips every glob (layouts/shortcodes/**/*.md) and never confirms a - moved pattern was actually repathed. Here we route each pattern by its first - path segment exactly as execute_reorg.py's route_codeowners_pattern does and assert - that a moved segment is prefixed. After a correct reorg the pattern's first - segment is 'hugo', so a flagged pattern means a substitution was missed. - """ - codeowners = repo_root / ".github" / "CODEOWNERS" - if not codeowners.exists(): - record("SKIP", "codeowners: .github/CODEOWNERS not found (prefixing)") - return - - def first_segment(pattern): - body = pattern[1:] if pattern.startswith("/") else pattern # un-anchor - seg = body.split("/", 1)[0] - # execute_reorg.py normalizes the '.local' typo to 'local' before routing. - return "local" if seg == ".local" else seg - - unprefixed = [] - for line in codeowners.read_text().splitlines(): - stripped = line.strip() - if not stripped or stripped.startswith("#"): - continue - pattern = stripped.split()[0] - if pattern == "*": - continue - if first_segment(pattern) not in moves_to_hugo: - continue - if not pattern.lstrip("/").startswith("hugo/"): - unprefixed.append(pattern) - - if unprefixed: - record("FAIL", "codeowners: moved patterns missing the hugo/ prefix", - f"{len(unprefixed)} (globs included):\n " - + "\n ".join(unprefixed[:20])) - else: - record("PASS", "codeowners: every moved pattern is hugo/-prefixed " - "(globs included)") - - -# -------------------------------------------------------------------------- -# D. Husky behavioral checks -# -------------------------------------------------------------------------- - -def run_hook(script_name): - """Run a .husky hook script from the repo root; return CompletedProcess.""" - return subprocess.run( - ["python3", str(repo_root / ".husky" / script_name)], - cwd=repo_root, - capture_output=True, - text=True, - ) - - -def check_husky_circular_aliases(): - """Plant a self-aliasing page and confirm the hook rejects it.""" - rel = f"hugo/content/en/{MARKER}_alias/selftest.md" - target = repo_root / rel - if target.exists(): - record("WARN", "husky: circular-aliases test skipped (temp path exists)", rel) - return - - # An alias equal to the file's own location is the circular case. - body = ( - "---\n" - "title: Reorg Harness Self Test\n" - "aliases:\n" - f" - /{MARKER}_alias/selftest\n" - "---\n\n" - "Temporary file created by astro_reorg/validate_reorg.py.\n" - ) - try: - target.parent.mkdir(parents=True, exist_ok=True) - target.write_text(body) - git("add", "-f", rel) - proc = run_hook("check-circular-aliases.py") - if proc.returncode != 0 and "circular" in (proc.stdout + proc.stderr).lower(): - record("PASS", "husky: circular-aliases hook rejects bad input") - else: - record("FAIL", "husky: circular-aliases hook did NOT reject bad input", - "hook may still point at the old content/en/ path " - f"(exit={proc.returncode})") - finally: - git("reset", "-q", "HEAD", rel) - if target.exists(): - target.unlink() - _rmdir_if_empty(target.parent) - - -def check_husky_section_index(): - """Plant a new top-level section with no _index.md and confirm rejection.""" - section = f"{MARKER}_section" - rel = f"hugo/content/en/{section}/page.md" - target = repo_root / rel - if target.exists() or (hugo_dir / "content" / "en" / section).exists(): - record("WARN", "husky: section-index test skipped (temp path exists)", rel) - return - - body = "---\ntitle: Reorg Harness Page\nprivate: true\n---\n\nTemp page.\n" - try: - target.parent.mkdir(parents=True, exist_ok=True) - target.write_text(body) - git("add", "-f", rel) - proc = run_hook("check-section-index.py") - if proc.returncode != 0 and "_index.md" in (proc.stdout + proc.stderr): - record("PASS", "husky: section-index hook rejects missing _index.md") - else: - record("FAIL", "husky: section-index hook did NOT reject bad input", - "hook may still point at the old content/en/ path " - f"(exit={proc.returncode})") - finally: - git("reset", "-q", "HEAD", rel) - if target.exists(): - target.unlink() - _rmdir_if_empty(target.parent) - - -def check_husky_cdocs_gitignore(): - """ - Append a harness pattern to hugo/content/.gitignore, force-track a matching - compiled file (with a .mdoc.md sibling), and confirm the hook flags it. - """ - content_gitignore = hugo_dir / "content" / ".gitignore" - if not content_gitignore.exists(): - record("SKIP", "husky: cdocs-gitignore test skipped (no hugo/content/.gitignore)") - return - - pattern = f"/en/{MARKER}_cdocs.md" - compiled_rel = f"hugo/content/en/{MARKER}_cdocs.md" - source_rel = f"hugo/content/en/{MARKER}_cdocs.mdoc.md" - compiled = repo_root / compiled_rel - source = repo_root / source_rel - if compiled.exists() or source.exists(): - record("WARN", "husky: cdocs-gitignore test skipped (temp path exists)", - compiled_rel) - return - - original_gitignore = content_gitignore.read_text() - try: - content_gitignore.write_text( - original_gitignore.rstrip("\n") + f"\n{pattern}\n" - ) - source.write_text("Temp Cdocs source.\n") - compiled.write_text("Temp compiled Cdocs output.\n") - git("add", "-f", compiled_rel) # force past the gitignore to simulate the mistake - proc = run_hook("check-cdocs-gitignore.py") - if proc.returncode != 0 and MARKER in (proc.stdout + proc.stderr): - record("PASS", "husky: cdocs-gitignore hook flags tracked compiled file") - else: - record("FAIL", "husky: cdocs-gitignore hook did NOT flag tracked file", - "hook may still read the old content/.gitignore path " - f"(exit={proc.returncode})") - finally: - git("reset", "-q", "HEAD", compiled_rel) - for p in (compiled, source): - if p.exists(): - p.unlink() - content_gitignore.write_text(original_gitignore) - - -def _rmdir_if_empty(path): - """Remove a directory only if it is empty (safety guard).""" - try: - path.rmdir() - except OSError: - pass - - -# -------------------------------------------------------------------------- -# E. Vale -# -------------------------------------------------------------------------- - -def check_vale(): - """Confirm vale still flags a known violation using the Datadog style.""" - if subprocess.run(["which", "vale"], capture_output=True).returncode != 0: - record("SKIP", "vale: vale not installed") - return - - styles = (repo_root / ".vale.ini") - if not styles.exists(): - record("SKIP", "vale: .vale.ini not found") - return - - rel = f"hugo/content/en/{MARKER}_vale.md" - target = repo_root / rel - if target.exists(): - record("WARN", "vale: test skipped (temp path exists)", rel) - return - - # Each of these trips a Datadog substitution rule. - body = ( - "---\ntitle: Reorg Harness Vale Test\n---\n\n" - "Simply leverage this feature to ensure it works.\n" - ) - try: - target.write_text(body) - proc = subprocess.run( - ["vale", rel], - cwd=repo_root, - capture_output=True, - text=True, - ) - output = proc.stdout + proc.stderr - if "Datadog." in output: - record("PASS", "vale: Datadog style flags violations from new path") - elif "StylesPath" in output or "ExecError" in output or not output.strip(): - record("FAIL", "vale: ran but produced no Datadog findings", - "StylesPath may not resolve from the new content location") - else: - record("WARN", "vale: ran but no Datadog.* rule fired", - output.strip()[:200]) - finally: - if target.exists(): - target.unlink() - - -# -------------------------------------------------------------------------- -# F. Hugo build (static presence only) -# -------------------------------------------------------------------------- - -def check_build_presence(): - """Static check; the real build is the manual `make start` in todo #5.""" - # Hugo's build entrypoints all need to be co-located under hugo/: the Makefile, - # the Node manifest, and go.mod (required by Hugo Modules at the project root). - required = ["Makefile", "package.json", "go.mod"] - missing = [n for n in required if not (hugo_dir / n).exists()] - if not missing: - record("PASS", "build: hugo/{Makefile,package.json,go.mod} present", - "run `cd hugo && make start` (todo #5) to verify the full build") - else: - record("FAIL", "build: missing Hugo build entrypoint(s) under hugo/", - ", ".join(f"hugo/{n}" for n in missing)) - - -# -------------------------------------------------------------------------- -# G. Rollback round-trip -# -------------------------------------------------------------------------- - -def check_rollback_roundtrip(): - """execute_reorg.py then rollback.py must restore the tree byte-for-byte. - - This is the only check that exercises rollback.py at all. Rather than - mutate the live repo (and risk leaving it half-reorganized if a step fails), - it builds a small throwaway git repo holding one representative item for - every code path the two scripts touch — a mixed .gitignore (rules that route - to hugo/, to root, and to both), a workflow with substitutable paths, a - CODEOWNERS with a moved rule, a husky hook with a substitutable path, plus a - few moved/stayed files — then: - - snapshot -> run execute_reorg.py (answering y to every prompt) - -> run rollback.py -> snapshot again - -> assert byte-identical. - - It specifically catches the easy-to-miss case where execute_reorg.py edits a file in - place (the root .gitignore split) that rollback must restore from git rather - than merely delete. - """ - if shutil.which("python3") is None or shutil.which("git") is None: - record("SKIP", "rollback: python3/git not both available") - return - - workdir = tempfile.mkdtemp(prefix=f"{MARKER}_rollback_") - work = Path(workdir) - - def write(rel, text): - p = work / rel - p.parent.mkdir(parents=True, exist_ok=True) - p.write_text(text) - - def git_work(*args): - return subprocess.run(["git", *args], cwd=work, - capture_output=True, text=True) - - def snapshot(): - """relpath -> bytes for every file under work/, excluding .git/.""" - tree = {} - for path in sorted(work.rglob("*")): - if path.is_dir(): - continue - rel = path.relative_to(work) - if rel.parts and rel.parts[0] == ".git": - continue - tree[str(rel)] = path.read_bytes() - return tree - - try: - # Representative fixture. Every top-level name here must appear in - # astro_reorg/config.yaml or execute_reorg.py refuses to run; the mix below routes - # across moves_to_hugo, top_level, and generic-glob cases. - write(".gitignore", - "# build\n" - "public/*\n" - "data/generated\n" - "content/en/api/**/*.go\n" - "node_modules\n" - "\n" - "# generic (kept in both)\n" - "*.log\n" - "\n" - "# root-only tooling\n" - ".github/preview-links-template.md\n") - write(".github/workflows/sample.yml", - "name: sample\n" - "on:\n" - " pull_request:\n" - " paths:\n" - " - 'content/en/**/*.md'\n" - "jobs:\n" - " build:\n" - " runs-on: ubuntu-latest\n" - " steps:\n" - " - run: python local/bin/foo.py\n") - write(".github/CODEOWNERS", - "* @DataDog/documentation\n" - "/content/ @DataDog/team-a\n" - "data/ @DataDog/team-b\n" - "README.md @DataDog/team-c\n") - write(".husky/check-sample.py", - "from pathlib import Path\n" - 'repo_pattern = "content"\n' - "p = Path('content/en')\n") - write("README.md", "# Fixture\n") - write("astro/package.json", '{"name": "astro-fixture"}\n') - write("content/en/page.md", "---\ntitle: Page\n---\n\nBody.\n") - write("data/sample.yaml", "key: value\n") - write("Makefile", "start:\n\techo build\n") - write("package.json", '{"name": "hugo-fixture"}\n') - write("go.mod", "module example.com/fixture\n\ngo 1.21\n") - write("babel.config.js", "module.exports = {};\n") - - # The scripts resolve repo_root as their parent's parent, so place them - # under an astro_reorg/ subfolder that mirrors the real repo layout. - (work / "astro_reorg").mkdir() - for tool in ("execute_reorg.py", "rollback.py", "config.yaml"): - shutil.copy2(repo_root / "astro_reorg" / tool, work / "astro_reorg" / tool) - - git_work("init", "-q") - git_work("add", "-A") - commit = git_work("-c", "user.email=harness@example.com", - "-c", "user.name=reorg harness", - "-c", "commit.gpgsign=false", - "commit", "-q", "-m", "fixture") - if commit.returncode != 0: - record("FAIL", "rollback: could not commit fixture repo", - (commit.stderr or commit.stdout).strip()[:200]) - return - - before = snapshot() - - # execute_reorg.py is interactive (single y/N per mutation section); answer y to - # all. Far more lines than prompts is fine — the extras are ignored. - reorg = subprocess.run( - ["python3", str(work / "astro_reorg" / "execute_reorg.py")], - cwd=work, capture_output=True, text=True, input="y\n" * 100, - ) - if reorg.returncode != 0 or not (work / "hugo").exists(): - record("FAIL", "rollback: execute_reorg.py failed on the fixture", - (reorg.stderr or reorg.stdout).strip()[:200]) - return - - rollback = subprocess.run( - ["python3", str(work / "astro_reorg" / "rollback.py")], - cwd=work, capture_output=True, text=True, - ) - if rollback.returncode != 0 or (work / "hugo").exists(): - record("FAIL", "rollback: rollback.py failed on the fixture", - (rollback.stderr or rollback.stdout).strip()[:200]) - return - - after = snapshot() - - added = sorted(set(after) - set(before)) - removed = sorted(set(before) - set(after)) - changed = sorted(p for p in before.keys() & after.keys() - if before[p] != after[p]) - if added or removed or changed: - diffs = ([f"+ {p}" for p in added] - + [f"- {p}" for p in removed] - + [f"~ {p} (in-place edit not reverted)" for p in changed]) - record("FAIL", "rollback: tree not byte-identical after reorg + rollback", - f"{len(diffs)} diff(s):\n " + "\n ".join(diffs[:20])) - else: - record("PASS", "rollback: reorg + rollback restores the tree byte-for-byte", - f"{len(before)} file(s) round-tripped, incl. the .gitignore split") - finally: - shutil.rmtree(work, ignore_errors=True) - - -# -------------------------------------------------------------------------- -# Main -# -------------------------------------------------------------------------- def main(): if not hugo_dir.exists(): @@ -797,34 +41,42 @@ def main(): top_level, moves_to_hugo = load_config() - print("== A. Layout ==") + # hugo/ holds the moved dirs; nothing moved is left at root; + # the root .gitignore split routed each rule to the right side. + print("== Layout ==") check_layout(top_level, moves_to_hugo) check_gitignore_split(top_level, moves_to_hugo) - check_site_separation(moves_to_hugo) - print("\n== B. Workflow paths ==") + # No .github/workflows/ file references a moved path without the hugo/ prefix, + # in shell scalars and in structured on.*.paths filters. + print("\n== Workflow paths ==") check_workflows(moves_to_hugo) check_workflow_path_filters(moves_to_hugo) - print("\n== C. CODEOWNERS ==") + # No pattern was moved to hugo/ while its file stayed at root, and every + # moved pattern (globs included) carries the hugo/ prefix. Pre-existing + # dangling entries are reported but not failed. + print("\n== CODEOWNERS ==") check_codeowners() check_codeowners_prefixing(moves_to_hugo) - print("\n== D. Husky hooks ==") + # Each pre-commit check still REJECTS a known-bad input planted at the + # hugo/ path. A hook that still points at the old path passes vacuously -> we fail it. + print("\n== Husky hooks ==") check_husky_circular_aliases() check_husky_section_index() check_husky_cdocs_gitignore() - print("\n== E. Vale ==") - check_vale() - - print("\n== F. Hugo build ==") + # Static presence check only; run `make start` manually. + print("\n== Hugo build ==") check_build_presence() - print("\n== G. Rollback round-trip ==") + # On a throwaway repo, execute_reorg.py then rollback.py must restore the + # tree byte-for-byte (the only test of rollback). + print("\n== Rollback round-trip ==") check_rollback_roundtrip() - # Summary. + # Summary counts = {"PASS": 0, "FAIL": 0, "SKIP": 0, "WARN": 0} for status, _, _ in results: counts[status] += 1 From 883496eef0389c68ba008384fa52f82836708989 Mon Sep 17 00:00:00 2001 From: Jen Gilbert Date: Wed, 10 Jun 2026 17:42:28 -0500 Subject: [PATCH 11/18] Handle Python env --- astro_reorg/execute_reorg.py | 16 +++++++++++++++- astro_reorg/rollback.py | 26 ++++++++++++++++++++++++++ 2 files changed, 41 insertions(+), 1 deletion(-) diff --git a/astro_reorg/execute_reorg.py b/astro_reorg/execute_reorg.py index 146792b032b..29dea2b3c6d 100644 --- a/astro_reorg/execute_reorg.py +++ b/astro_reorg/execute_reorg.py @@ -1,5 +1,6 @@ #!/usr/bin/env python3 import os +import shutil import sys from pathlib import Path @@ -49,16 +50,29 @@ hugo_dir.mkdir(exist_ok=True) moved = 0 +deleted = 0 for name in sorted(os.listdir(repo_root)): if name in ignore or name in top_level or name == "hugo": continue src = repo_root / name dst = hugo_dir / name + + # A Python virtualenv bakes its own ABSOLUTE path into bin/activate, the + # bin/python* symlinks, and pyvenv.cfg, so relocating the directory leaves + # those pointing at the old path — activation then falls back to the system + # Python (no project deps). A venv is a regenerable artifact, so delete it + # rather than move it; `make` rebuilds it in place with correct paths. + if (src / "pyvenv.cfg").is_file(): + print(f"Deleting venv {name} (regenerated by make in hugo/{name})") + shutil.rmtree(src) + deleted += 1 + continue + print(f"Moving {name} -> hugo/{name}") src.rename(dst) moved += 1 -print(f"Done. {moved} item(s) moved into hugo/.") +print(f"Done. {moved} item(s) moved into hugo/, {deleted} venv(s) deleted for regeneration.") # Split .gitignore between the root and hugo/ instead of copying it wholesale. # diff --git a/astro_reorg/rollback.py b/astro_reorg/rollback.py index b2bda7fd1c2..4404265d5ba 100644 --- a/astro_reorg/rollback.py +++ b/astro_reorg/rollback.py @@ -12,6 +12,7 @@ sys.exit(1) moved = 0 +moved_names = [] for name in sorted(os.listdir(hugo_dir)): # reorg.py SPLITS .gitignore in place (it prunes the root copy and writes a # routed subset to hugo/.gitignore). Skip it here so this rename can't clobber @@ -24,6 +25,7 @@ print(f"Moving hugo/{name} -> {name}") src.rename(dst) moved += 1 + moved_names.append(name) # Discard the hugo/.gitignore the split created before emptying the directory. gitignore_copy = hugo_dir / ".gitignore" @@ -46,3 +48,27 @@ check=True, ) print("Restored .gitignore, .github/workflows/, .github/CODEOWNERS, and .husky/ from git.") + +# The move-back above restores whatever was in hugo/ verbatim — but a build run +# between execute_reorg.py and this rollback (e.g. `make start`) can clean +# committed-but-generated files (API code examples, service_checks JSON, +# integration pages) and, if interrupted, never regenerate them. That leaves the +# moved tree INCOMPLETE, so rollback alone wouldn't restore a clean working tree. +# Restore the moved, git-tracked paths to their committed state to absorb that. +# +# Scope strictly to the names we moved: astro_reorg/ and other untouched root +# paths are never passed to checkout. Filter to paths git actually tracks so +# untracked/ignored moves (node_modules, public, _vendor, resources) are skipped +# — passing one of those to `git checkout` would match no tracked files and +# abort the whole restore. +tracked = subprocess.run( + ["git", "ls-files", "--", *moved_names], + cwd=repo_root, capture_output=True, text=True, check=True, +).stdout.split() +tracked_top = sorted({Path(p).parts[0] for p in tracked}) +if tracked_top: + subprocess.run( + ["git", "checkout", "--", *tracked_top], + cwd=repo_root, check=True, + ) + print(f"Restored moved tracked paths to HEAD: {', '.join(tracked_top)}") From 52473320d5429575beb60ca429f09433c4aeaa12 Mon Sep 17 00:00:00 2001 From: Jen Gilbert Date: Thu, 11 Jun 2026 09:28:33 -0500 Subject: [PATCH 12/18] Simplify rollback script --- astro_reorg/rollback.py | 57 +++-------------------------------------- 1 file changed, 3 insertions(+), 54 deletions(-) diff --git a/astro_reorg/rollback.py b/astro_reorg/rollback.py index 4404265d5ba..6ad6da5ab7a 100644 --- a/astro_reorg/rollback.py +++ b/astro_reorg/rollback.py @@ -1,5 +1,5 @@ #!/usr/bin/env python3 -import os +import shutil import subprocess import sys from pathlib import Path @@ -11,36 +11,9 @@ print("Nothing to roll back: hugo/ does not exist.", file=sys.stderr) sys.exit(1) -moved = 0 -moved_names = [] -for name in sorted(os.listdir(hugo_dir)): - # reorg.py SPLITS .gitignore in place (it prunes the root copy and writes a - # routed subset to hugo/.gitignore). Skip it here so this rename can't clobber - # the pruned root copy with the hugo subset; the root copy is restored from - # git below, and hugo/.gitignore is discarded with the now-empty directory. - if name == ".gitignore": - continue - src = hugo_dir / name - dst = repo_root / name - print(f"Moving hugo/{name} -> {name}") - src.rename(dst) - moved += 1 - moved_names.append(name) +shutil.rmtree(hugo_dir) +print("Removed hugo/") -# Discard the hugo/.gitignore the split created before emptying the directory. -gitignore_copy = hugo_dir / ".gitignore" -if gitignore_copy.exists(): - gitignore_copy.unlink() - print("Removed hugo/.gitignore (created by the split)") - -# Only succeeds if hugo/ is now empty, preventing accidental data loss. -hugo_dir.rmdir() -print(f"Done. {moved} item(s) restored to repo root.") - -# Restore the files reorg.py edited in place to their committed state. The root -# .gitignore was pruned by the split, and reorg.py may have applied partial -# workflow/CODEOWNERS/husky substitutions interactively, so reversing them -# individually isn't reliable — let git restore them wholesale. subprocess.run( ["git", "checkout", "--", ".gitignore", ".github/workflows/", ".github/CODEOWNERS", ".husky/"], @@ -48,27 +21,3 @@ check=True, ) print("Restored .gitignore, .github/workflows/, .github/CODEOWNERS, and .husky/ from git.") - -# The move-back above restores whatever was in hugo/ verbatim — but a build run -# between execute_reorg.py and this rollback (e.g. `make start`) can clean -# committed-but-generated files (API code examples, service_checks JSON, -# integration pages) and, if interrupted, never regenerate them. That leaves the -# moved tree INCOMPLETE, so rollback alone wouldn't restore a clean working tree. -# Restore the moved, git-tracked paths to their committed state to absorb that. -# -# Scope strictly to the names we moved: astro_reorg/ and other untouched root -# paths are never passed to checkout. Filter to paths git actually tracks so -# untracked/ignored moves (node_modules, public, _vendor, resources) are skipped -# — passing one of those to `git checkout` would match no tracked files and -# abort the whole restore. -tracked = subprocess.run( - ["git", "ls-files", "--", *moved_names], - cwd=repo_root, capture_output=True, text=True, check=True, -).stdout.split() -tracked_top = sorted({Path(p).parts[0] for p in tracked}) -if tracked_top: - subprocess.run( - ["git", "checkout", "--", *tracked_top], - cwd=repo_root, check=True, - ) - print(f"Restored moved tracked paths to HEAD: {', '.join(tracked_top)}") From 902a46b44cd21743c0cfc2fdb03b48d493463880 Mon Sep 17 00:00:00 2001 From: Jen Gilbert Date: Thu, 11 Jun 2026 09:33:53 -0500 Subject: [PATCH 13/18] Tweak script name --- astro_reorg/{rollback.py => local_rollback.py} | 0 1 file changed, 0 insertions(+), 0 deletions(-) rename astro_reorg/{rollback.py => local_rollback.py} (100%) diff --git a/astro_reorg/rollback.py b/astro_reorg/local_rollback.py similarity index 100% rename from astro_reorg/rollback.py rename to astro_reorg/local_rollback.py From fc68cb10e92cd4fbcfe4334072b83bebee9c9158 Mon Sep 17 00:00:00 2001 From: Jen Gilbert Date: Thu, 11 Jun 2026 10:59:19 -0500 Subject: [PATCH 14/18] Add test plan --- astro_reorg/{context.md => CLAUDE.md} | 6 +- astro_reorg/pr_conflicts_test_plan.md | 484 ++++++++++++++++++ astro_reorg/resolve_pr_conflicts.py | 704 ++++++++++++++++++++++++++ 3 files changed, 1193 insertions(+), 1 deletion(-) rename astro_reorg/{context.md => CLAUDE.md} (56%) create mode 100644 astro_reorg/pr_conflicts_test_plan.md create mode 100644 astro_reorg/resolve_pr_conflicts.py diff --git a/astro_reorg/context.md b/astro_reorg/CLAUDE.md similarity index 56% rename from astro_reorg/context.md rename to astro_reorg/CLAUDE.md index f92cc1951ac..0f67727fffd 100644 --- a/astro_reorg/context.md +++ b/astro_reorg/CLAUDE.md @@ -6,8 +6,12 @@ This repo is currently a Hugo site. We instead want it to contain a `hugo` and ` `astro_reorg/execute_reorg.py` implements the file and folder path changes, and updates any dependencies on those paths, such as GitHub actions, CODEOWNERS, and Husky workflows. -`astro_reorg/rollback.py` functions as an "undo" action for the reorg. +`astro_reorg/helpers.py` contains shared utilities used by the other scripts (path manipulation, git/shell helpers, YAML config loading). + +`astro_reorg/local_rollback.py` functions as an "undo" action for the reorg: removes `hugo/` and restores `.gitignore`, `.github/`, and `.husky/` from git. `astro_reorg/validate_reorg.py` verifies the functionality of affected entities where possible. +`astro_reorg/resolve_pr_conflicts.py` finds open PRs with merge conflicts caused by the reorg, and either auto-fixes them (by replaying commits at post-reorg paths) or labels them for manual review. Defaults to dry-run mode; use `--no-dry-run` to apply changes. + You can ignore the `astro` folder, it's a remnant from another branch where an Astro site is being developed. It is completely out of scope. \ No newline at end of file diff --git a/astro_reorg/pr_conflicts_test_plan.md b/astro_reorg/pr_conflicts_test_plan.md new file mode 100644 index 00000000000..0c66f7c5fc9 --- /dev/null +++ b/astro_reorg/pr_conflicts_test_plan.md @@ -0,0 +1,484 @@ +# Manual test plan: resolve_pr_conflicts.py + +Uses `astro-reorg-master` as a fake post-reorg base branch throughout, so no +real master is touched and all test PRs can be cleaned up afterward. + +Script invocation shorthand used below: +```bash +python3 astro_reorg/resolve_pr_conflicts.py --base-branch astro-reorg-master +``` + +--- + +## Part 0: One-time setup + +These steps create the fake base branch and the labels the script manages. +Do them once before running any test case. + +### 0.1 Create `astro-reorg-master` + +This simulates post-reorg master: it contains files at their new `hugo/` +paths. The script treats this branch as the merge target for all PRs under +test. + +The key point is that the file must be **moved**, not just copied: the old +path (`content/en/getting_started/_index.md`) must no longer exist on this +branch, and the new path (`hugo/content/en/getting_started/_index.md`) must. +A PR that edits the old path then conflicts (modify/delete) exactly the way a +real pre-reorg PR does. A simple copy would leave the old path in place and +the merge would succeed cleanly — no conflict to test. + +```bash +# Start from current master +git fetch origin +git checkout -b astro-reorg-master origin/master + +# Simulate the reorg: MOVE the test file into hugo/ (git mv = delete old + +# add new), so PRs touching the old path produce a real reorg conflict. +mkdir -p hugo/content/en/getting_started +git mv content/en/getting_started/_index.md hugo/content/en/getting_started/_index.md +git commit -m "test: simulate reorg — move getting_started/_index.md into hugo/" + +git push origin astro-reorg-master +``` + +- [ ] Branch `astro-reorg-master` exists on origin +- [ ] `hugo/content/en/getting_started/_index.md` is present on that branch +- [ ] `content/en/getting_started/_index.md` no longer exists on that branch + +### 0.2 Verify labels are created on first run + +```bash +python3 astro_reorg/resolve_pr_conflicts.py \ + --base-branch astro-reorg-master \ + --dry-run \ + --pr 1 # any real PR number; it won't be modified in dry-run +``` + +- [ ] Script prints `[dry-run] would create label: 'astro-reorg-manual-review'` +- [ ] Script prints `[dry-run] would create label: 'astro-reorg-stale'` + +Run once without `--dry-run` (still with `--pr 1`) to actually create them: +```bash +python3 astro_reorg/resolve_pr_conflicts.py --base-branch astro-reorg-master --pr 1 +``` + +- [ ] Both labels now exist in the repo (check via GitHub Labels page or + `gh label list --repo DataDog/documentation`) +- [ ] Labels are not re-created on a second run (script is idempotent) + +--- + +## Part 1: MERGEABLE PR — skipped + +**Behavior being verified:** PRs that have no conflicts are detected and +skipped immediately without creating any branches, labels, or comments. + +### 1.1 Create a PR with no conflicts + +```bash +git checkout -b test/no-conflict origin/master +# Edit a file that does NOT exist at a reorg-moved path, e.g. README.md +echo "" >> README.md +git add README.md +git commit -m "test: no-conflict PR" +git push origin test/no-conflict +gh pr create --repo DataDog/documentation \ + --head test/no-conflict \ + --base astro-reorg-master \ + --title "TEST no-conflict PR" \ + --body "Test PR for resolve_pr_conflicts.py — delete after testing." +``` + +Note the PR number (referred to as `` below). + +### 1.2 Run the script against it + +```bash +python3 astro_reorg/resolve_pr_conflicts.py \ + --base-branch astro-reorg-master \ + --pr +``` + +- [ ] Output says `mergeable: MERGEABLE` and `No conflicts — skipping.` +- [ ] No `reorg-fix/` branch was pushed +- [ ] No labels added to the PR +- [ ] No comment posted on the PR + +### 1.3 Cleanup + +```bash +gh pr close --repo DataDog/documentation +git push origin --delete test/no-conflict +``` + +--- + +## Part 2: Reorg-only conflicts — auto-fix, single commit + +**Behavior being verified:** A PR that edits a file at a pre-reorg path +(e.g. `content/en/`) conflicts only because the reorg moved that file to +`hugo/content/en/`. The script classifies all conflicts as reorg-caused, +runs `format-patch` + `git am`, opens a fix PR, labels the original PR +`astro-reorg-stale`, and posts a comment pointing to the fix PR. + +### 2.1 Create the PR + +```bash +# Branch off a commit BEFORE the reorg file was added to astro-reorg-master +git checkout -b test/reorg-only-conflict origin/master +# Edit the same file that the "reorg" touched, but at its old path +echo "" >> content/en/getting_started/_index.md +git add content/en/getting_started/_index.md +git commit -m "test: edit getting_started at old path" +git push origin test/reorg-only-conflict +gh pr create --repo DataDog/documentation \ + --head test/reorg-only-conflict \ + --base astro-reorg-master \ + --title "TEST reorg-only conflict PR" \ + --body "Test PR — single commit, reorg-only conflict." +``` + +Note the PR number as ``. + +### 2.2 Dry-run first + +```bash +python3 astro_reorg/resolve_pr_conflicts.py \ + --base-branch astro-reorg-master \ + --dry-run \ + --pr +``` + +- [ ] Output classifies the conflict as reorg-caused (not in "Unrelated conflicts") +- [ ] Output prints `[dry-run] would apply 1 commit(s) to reorg-fix/pr-` +- [ ] Output prints the commit subject line +- [ ] Output prints `[dry-run] would open PR: '[reorg fix] TEST reorg-only conflict PR'` +- [ ] No branch was pushed, no PR opened, no labels added (dry-run) + +### 2.3 Run for real + +```bash +python3 astro_reorg/resolve_pr_conflicts.py \ + --base-branch astro-reorg-master \ + --pr +``` + +- [ ] Script prints `Pushed fix to branch 'reorg-fix/pr-'` +- [ ] Script prints `Opened fix PR: https://github.com/DataDog/documentation/pull/` +- [ ] Branch `reorg-fix/pr-` exists on origin + +On the fix PR: +- [ ] Title is `[reorg fix] TEST reorg-only conflict PR` +- [ ] Body references the original PR number and contains the original PR description +- [ ] Base branch is `astro-reorg-master` +- [ ] Commits on the fix PR have the **original author name/email** (not `reorg-fix-script`) +- [ ] Commit messages match the original PR's commits exactly +- [ ] The file path in the fix PR is `hugo/content/en/getting_started/_index.md` + (not `content/en/...`) +- [ ] Fix PR is mergeable (no conflicts) + +On the original PR ``: +- [ ] Label `astro-reorg-stale` has been added +- [ ] A comment was posted referencing the fix PR number +- [ ] No `astro-reorg-manual-review` label was added + +### 2.4 Verify re-run is idempotent + +Run the script against the same PR a second time. Because the original PR was +labeled `astro-reorg-stale` on the first run, it is now skipped before any +merge test, fix branch, or comment. + +```bash +python3 astro_reorg/resolve_pr_conflicts.py \ + --base-branch astro-reorg-master \ + --pr +``` + +- [ ] Output says `Already has a fix PR (astro-reorg-stale) — skipping.` +- [ ] Script exits without error (no `gh pr create` failure) +- [ ] No second fix PR is opened +- [ ] No duplicate comment is posted on the original PR +- [ ] The fix branch is NOT re-pushed (no force-push) + +### 2.5 Cleanup + +```bash +gh pr close --repo DataDog/documentation +gh pr close --repo DataDog/documentation +git push origin --delete test/reorg-only-conflict +git push origin --delete reorg-fix/pr- +``` + +--- + +## Part 3: Reorg-only conflicts — auto-fix, multiple commits + +**Behavior being verified:** A PR with multiple commits all touching +reorg-moved paths produces a fix PR with the same number of commits, each +with the original message and authorship. This verifies `format-patch`/`am` +replay rather than squashing. + +### 3.1 Create the PR + +```bash +git checkout -b test/reorg-multi-commit origin/master +echo "" >> content/en/getting_started/_index.md +git add content/en/getting_started/_index.md +git commit -m "test: first edit at old path" + +echo "" >> content/en/getting_started/_index.md +git add content/en/getting_started/_index.md +git commit -m "test: second edit at old path" + +git push origin test/reorg-multi-commit +gh pr create --repo DataDog/documentation \ + --head test/reorg-multi-commit \ + --base astro-reorg-master \ + --title "TEST multi-commit reorg PR" \ + --body "Two commits, both at pre-reorg paths." +``` + +Note the PR number as ``. + +### 3.2 Run for real + +```bash +python3 astro_reorg/resolve_pr_conflicts.py \ + --base-branch astro-reorg-master \ + --pr +``` + +- [ ] Output says `would apply 2 commit(s)` (or applies 2 commits when not dry-run) +- [ ] Fix PR has exactly 2 commits +- [ ] Commit 1 message: `test: first edit at old path` +- [ ] Commit 2 message: `test: second edit at old path` +- [ ] Both commits have the original author, not `reorg-fix-script` +- [ ] Original PR labeled `astro-reorg-stale` + +### 3.3 Cleanup + +```bash +gh pr close --repo DataDog/documentation +gh pr close --repo DataDog/documentation +git push origin --delete test/reorg-multi-commit +git push origin --delete reorg-fix/pr- +``` + +--- + +## Part 4: Mixed conflicts — reorg + unrelated + +**Behavior being verified:** When a PR has at least one conflict that is NOT +at a reorg-moved path, the script must not touch the PR content. It labels +the PR `astro-reorg-manual-review` only, opens no fix PR, and adds no +`astro-reorg-stale` label. + +### 4.1 Create the PR + +```bash +git checkout -b test/mixed-conflict origin/master +# Reorg-path edit (will conflict with astro-reorg-master) +echo "" >> content/en/getting_started/_index.md +git add content/en/getting_started/_index.md +# Non-reorg-path edit: edit README.md too, and make astro-reorg-master conflict it +echo "" >> README.md +git add README.md +git commit -m "test: mixed reorg + non-reorg edits" +git push origin test/mixed-conflict +``` + +Now create a conflicting edit to README.md on `astro-reorg-master`: +```bash +git checkout astro-reorg-master +echo "" >> README.md +git add README.md +git commit -m "test: conflicting README edit on base branch" +git push origin astro-reorg-master +``` + +```bash +gh pr create --repo DataDog/documentation \ + --head test/mixed-conflict \ + --base astro-reorg-master \ + --title "TEST mixed conflict PR" \ + --body "Has both reorg and non-reorg conflicts." +``` + +Note the PR number as ``. + +### 4.2 Run the script + +```bash +python3 astro_reorg/resolve_pr_conflicts.py \ + --base-branch astro-reorg-master \ + --pr +``` + +- [ ] Output lists both a reorg conflict (`hugo/content/en/...` or `content/en/...`) + and an unrelated conflict (`README.md`) +- [ ] Output says `Non-reorg conflicts present — labeling for manual review.` +- [ ] Label `astro-reorg-manual-review` added to `` +- [ ] Label `astro-reorg-stale` NOT added +- [ ] No fix PR opened +- [ ] No comment posted on the PR + +### 4.3 Cleanup + +```bash +gh pr close --repo DataDog/documentation +git push origin --delete test/mixed-conflict +# Revert the README edit on astro-reorg-master if desired +``` + +--- + +## Part 5: Wrong-path addition (no conflict marker) + +**Behavior being verified:** When a PR adds a brand-new file at a pre-reorg +path (e.g. `content/en/new_file.md`), git merges it silently at the wrong +path with no conflict marker. The script detects this via +`get_wrong_path_additions()` and treats it as a reorg conflict, triggering +the auto-fix. + +### 5.1 Create the PR + +```bash +git checkout -b test/wrong-path-addition origin/master +# Add a brand-new file that doesn't exist anywhere yet +echo "# New page" > content/en/brand_new_test_page.md +git add content/en/brand_new_test_page.md +git commit -m "test: add new page at pre-reorg path" +git push origin test/wrong-path-addition +gh pr create --repo DataDog/documentation \ + --head test/wrong-path-addition \ + --base astro-reorg-master \ + --title "TEST wrong-path addition PR" \ + --body "Adds a new file at a pre-reorg path — no conflict marker expected." +``` + +Note the PR number as ``. + +### 5.2 Run the script + +```bash +python3 astro_reorg/resolve_pr_conflicts.py \ + --base-branch astro-reorg-master \ + --pr +``` + +- [ ] Output shows `Wrong-path additions: ['content/en/brand_new_test_page.md']` +- [ ] Output shows this under reorg-caused conflicts (not unrelated) +- [ ] Fix PR is opened +- [ ] On the fix PR, the new file appears at `hugo/content/en/brand_new_test_page.md` + (not `content/en/...`) +- [ ] Original PR labeled `astro-reorg-stale` + +### 5.3 Cleanup + +```bash +gh pr close --repo DataDog/documentation +gh pr close --repo DataDog/documentation +git push origin --delete test/wrong-path-addition +git push origin --delete reorg-fix/pr- +``` + +--- + +## Part 6: UNKNOWN mergeability — skipped + +**Behavior being verified:** GitHub computes mergeability lazily. PRs that +return `UNKNOWN` are skipped without error so they can be re-checked later. + +This state is hard to manufacture reliably, but can be observed naturally on +a freshly pushed PR before GitHub has computed its mergeability. Watch the +output of a run against a PR you just created: + +```bash +# Run the script immediately after opening a PR, before GitHub has processed it +python3 astro_reorg/resolve_pr_conflicts.py \ + --base-branch astro-reorg-master \ + --pr +``` + +- [ ] If mergeability is `UNKNOWN`, output says + `Mergeability not yet computed by GitHub — skipping.` +- [ ] No changes made to the PR + +--- + +## Part 7: `--dry-run` flag + +**Behavior being verified:** `--dry-run` reports all intended actions without +making any changes — no branches pushed, no PRs opened, no labels applied, +no comments posted. + +Use `` from Part 2 (re-create it if already cleaned up). + +```bash +python3 astro_reorg/resolve_pr_conflicts.py \ + --base-branch astro-reorg-master \ + --dry-run \ + --pr +``` + +- [ ] Output starts with `DRY-RUN mode — no branches or PRs will be modified.` +- [ ] Output shows `[dry-run] would apply N commit(s)` +- [ ] Output shows `[dry-run] would open PR: '[reorg fix] ...'` +- [ ] No branch `reorg-fix/pr-` exists on origin after the run +- [ ] No labels added to the PR +- [ ] No comment posted on the PR + +--- + +## Part 8: `--pr` flag (targeted run) + +**Behavior being verified:** Without `--pr`, the script queries all open PRs. +With `--pr`, it checks only the specified PR(s). This is the main way to +avoid processing hundreds of PRs when testing. + +```bash +# Run against two specific PRs +python3 astro_reorg/resolve_pr_conflicts.py \ + --base-branch astro-reorg-master \ + --dry-run \ + --pr --pr +``` + +- [ ] Output says `Found 2 open PR(s) to check.` +- [ ] Only `` and `` appear in the output + +--- + +## Part 9: Full scan (no `--pr` filter) + +**Behavior being verified:** Without `--pr`, the script lists all open PRs +from the repo and processes each one. Run this only when ready, as it will +make real changes to conflicting PRs. + +```bash +python3 astro_reorg/resolve_pr_conflicts.py \ + --base-branch astro-reorg-master \ + --dry-run +``` + +- [ ] Output says `Found N open PR(s) to check.` for the real open PR count +- [ ] Each PR appears in the output with its mergeability status +- [ ] No changes made (dry-run) + +--- + +## Part 10: Teardown + +After all tests, clean up the fake base branch: + +```bash +git push origin --delete astro-reorg-master +git branch -D astro-reorg-master +``` + +If you created the labels in Part 0 and want to remove them: +```bash +gh label delete astro-reorg-manual-review --repo DataDog/documentation --yes +gh label delete astro-reorg-stale --repo DataDog/documentation --yes +``` diff --git a/astro_reorg/resolve_pr_conflicts.py b/astro_reorg/resolve_pr_conflicts.py new file mode 100644 index 00000000000..6b68456e1f7 --- /dev/null +++ b/astro_reorg/resolve_pr_conflicts.py @@ -0,0 +1,704 @@ +#!/usr/bin/env python3 +""" +Find open PRs with merge conflicts caused by the reorg. +Defaults to a dry run where it just reports the changes +that would be made. + +Usage: + python3 astro_reorg/resolve_pr_conflicts.py [--dry-run] [--pr NUMBER ...] + +Flags: + --no-dry-run Actually apply fixes and labels instead of just reporting what would be done. + --base-branch BRANCH Branch to treat as the post-reorg base (default: master). + +Background: + The reorg moves every entry in `moves_to_hugo` (astro_reorg/config.yaml) + from the repo root into hugo/. For example, content/ → hugo/content/, + layouts/ → hugo/layouts/, etc. PRs opened before the reorg was merged to + master will have their branches pointing at the old paths. When github + tries to compute mergeability, those PRs show as CONFLICTING. + +How we decide whether a conflict came from the reorg: + When git merges a PR branch into post-reorg master it uses rename detection + to pair the PR's pre-reorg file path (e.g. content/en/foo.md) with the + corresponding post-reorg path (hugo/content/en/foo.md) in master. If both + sides modified the file, git reports a conflict: Rename detection usually + places the conflict at the POST-reorg path (hugo/content/en/foo.md), because + that is where the file lives in master. Occasionally, when rename detection + fails (the file was heavily edited or the threshold wasn't met), git instead + reports a "deleted by them" conflict at the PRE-reorg path. + + A conflict is "from the reorg" if its path maps to a reorg-moved location: + a. hugo//... where is in moves_to_hugo (post-reorg path, + rename detected — the common case) + b. /... where is in moves_to_hugo (pre-reorg path, rename + NOT detected — git sees it as "they deleted the file") + + Any conflict at a path NOT matching either pattern is unrelated to the + reorg and must be resolved manually. + + We also detect a subtler case: files ADDED by the PR at a pre-reorg path + (e.g. content/en/brand_new.md). These produce no conflict marker — git + happily merges them at the wrong path. We catch them by scanning all + paths staged in the test merge and flagging any whose first segment is in + moves_to_hugo. + +Auto-fix strategy (reorg-only PRs): + For PRs where every conflict is a reorg conflict, the fix is to replay the + PR's commits at the post-reorg paths: + + 1. Find the merge base between the PR branch and the base branch (where + the PR diverged from master, before the reorg landed). + 2. Export each PR commit as its own patch with `git format-patch`, + preserving the original author and commit message. + 3. Rewrite every file path in the patches whose first segment is in + moves_to_hugo to be prefixed with hugo/ (content/en/ → hugo/content/en/). + 4. Replay the series onto a fresh branch off the base branch with + `git am --3way`. --3way falls back to a per-patch 3-way merge when + context lines have drifted because master made unrelated edits between + the PR's base and today. + 5. Push as `reorg-fix/pr-`, open a new PR for it, comment on the + original PR pointing to the fix PR, and label the original + astro-reorg-stale. + + A PR that already carries the astro-reorg-stale label (a fix PR was opened + on a prior run) is skipped, so re-running the script is safe. + + PRs from forks cannot be auto-fixed (we don't have push access to the fork). + They receive the astro-reorg-manual-review label. +""" +from __future__ import annotations + +import argparse +import json +import os +import shutil +import subprocess +import sys +import tempfile +from pathlib import Path + +try: + import yaml +except ImportError: + print("Error: PyYAML is required. Install with: pip install pyyaml", file=sys.stderr) + sys.exit(1) + +# --------------------------------------------------------------------------- +# Configuration +# --------------------------------------------------------------------------- + +SCRIPT_DIR = Path(__file__).parent +REPO_ROOT = SCRIPT_DIR.parent +CONFIG_PATH = SCRIPT_DIR / "config.yaml" + +with CONFIG_PATH.open() as f: + _config = yaml.safe_load(f) + +MOVES_TO_HUGO: set[str] = set(_config.get("moves_to_hugo", [])) +TOP_LEVEL: set[str] = set(_config.get("top_level", [])) + +REPO = "DataDog/documentation" +LABEL_MANUAL_REVIEW = "astro-reorg-manual-review" +LABEL_STALE = "astro-reorg-stale" +LABEL_COLOR = "e4e669" +LABEL_DESCRIPTION = "Needs manual conflict resolution after replatforming reorg" + +# Set in main() from --base-branch; everything else reads this. +BASE_BRANCH = "master" + +# --------------------------------------------------------------------------- +# Shell helpers +# --------------------------------------------------------------------------- + +def run(cmd: list[str], *, cwd: Path | None = None, input: str | None = None) -> subprocess.CompletedProcess: + return subprocess.run(cmd, capture_output=True, text=True, cwd=cwd, input=input) + + +def run_bytes(cmd: list[str], *, cwd: Path | None = None) -> subprocess.CompletedProcess: + """Run a command and capture output as bytes (needed for binary file content).""" + return subprocess.run(cmd, capture_output=True, cwd=cwd) + + +def gh_json(*args: str) -> object: + result = run(["gh", *args]) + if result.returncode != 0: + raise RuntimeError(f"gh {' '.join(args[:3])}: {result.stderr.strip()}") + return json.loads(result.stdout) + + +def gh_run(*args: str) -> str: + result = run(["gh", *args]) + if result.returncode != 0: + raise RuntimeError(f"gh {' '.join(args[:3])}: {result.stderr.strip()}") + return result.stdout + + +def git(*args: str, cwd: Path | None = None) -> subprocess.CompletedProcess: + return run(["git", *args], cwd=cwd or REPO_ROOT) + + +# --------------------------------------------------------------------------- +# Reorg path helpers +# --------------------------------------------------------------------------- + +def path_first_segment(file_path: str) -> str: + """Return the first directory segment of a path string.""" + return file_path.lstrip("/").split("/", 1)[0] + + +def is_reorg_path(file_path: str) -> bool: + """ + Return True if this path maps to a location moved by the reorg. + + Covers both forms that git can report: + - Pre-reorg path: content/en/foo.md (first segment in moves_to_hugo) + - Post-reorg path: hugo/content/en/foo.md (second segment in moves_to_hugo) + """ + parts = Path(file_path).parts + if not parts: + return False + if parts[0] in MOVES_TO_HUGO: + return True + if parts[0] == "hugo" and len(parts) > 1 and parts[1] in MOVES_TO_HUGO: + return True + return False + + +def to_post_reorg_path(file_path: str) -> str: + """ + Convert a file path to its post-reorg location. + + content/en/foo.md → hugo/content/en/foo.md + hugo/content/en/foo.md → hugo/content/en/foo.md (already correct) + README.md → README.md (not a reorg-moved path) + """ + parts = Path(file_path).parts + if not parts: + return file_path + if parts[0] == "hugo": + return file_path + if parts[0] in MOVES_TO_HUGO: + return "hugo/" + file_path + return file_path + + +def to_pre_reorg_path(file_path: str) -> str | None: + """ + Convert a post-reorg path back to its pre-reorg location, or None if not + applicable. + + hugo/content/en/foo.md → content/en/foo.md + """ + parts = Path(file_path).parts + if len(parts) > 1 and parts[0] == "hugo" and parts[1] in MOVES_TO_HUGO: + return "/".join(parts[1:]) + return None + + +# --------------------------------------------------------------------------- +# Merge conflict analysis (run inside a temp worktree) +# --------------------------------------------------------------------------- + +def get_conflict_classification(worktree: Path) -> tuple[list[str], list[str]]: + """ + Parse git status inside a worktree after a --no-commit merge attempt. + + Returns (reorg_conflicts, other_conflicts) — lists of conflicted file paths. + + git status --porcelain conflict codes (XY): + UU both modified + AA both added + DD both deleted + AU added by us, not staged on their side + UA added by them + DU deleted by us + UD deleted by them (common for rename/delete: the reorg "deleted" the + file from the old path by renaming it; the PR still has the old path) + + For rename conflicts git shows "old -> new" in the path field. We use + the FINAL path (after the arrow) for classification, because that is the + path that needs to be resolved in the working tree. + """ + result = run(["git", "status", "--porcelain"], cwd=worktree) + reorg: list[str] = [] + other: list[str] = [] + for line in result.stdout.splitlines(): + if len(line) < 4: + continue + xy = line[:2] + path = line[3:] + # Only unmerged (conflict) entries have U in XY or are AA/DD. + if "U" not in xy and xy not in ("AA", "DD"): + continue + # Rename entries show "old -> new". Treat a rename conflict as + # reorg-caused only when BOTH endpoints map to reorg-moved paths; if + # either side is unrelated, classify it as "other" so the PR goes to + # manual review rather than getting an auto-fix we can't be sure about. + if " -> " in path: + src, dst = (p.strip() for p in path.split(" -> ", 1)) + both_reorg = is_reorg_path(src) and is_reorg_path(dst) + (reorg if both_reorg else other).append(dst) + else: + path = path.strip() + (reorg if is_reorg_path(path) else other).append(path) + return reorg, other + + +def get_wrong_path_additions(worktree: Path) -> list[str]: + """ + Find files staged in the test merge at PRE-REORG paths that should instead + live under hugo/. + + These do not cause merge conflict markers — git happily adds the file at + the old path with no complaint. We catch them here so the caller can + include them in the "reorg conflict" count. + + Specifically: any file with status 'A' (added) in the staged diff against + HEAD whose first path segment is in moves_to_hugo is a "wrong path + addition" caused by the PR adding a brand-new file at a pre-reorg path. + """ + result = run( + ["git", "diff", "--cached", "--name-status", "--diff-filter=A", "HEAD"], + cwd=worktree, + ) + wrong: list[str] = [] + for line in result.stdout.splitlines(): + parts = line.split("\t", 1) + if len(parts) != 2: + continue + path = parts[1].strip() + if path_first_segment(path) in MOVES_TO_HUGO: + wrong.append(path) + return wrong + + +# --------------------------------------------------------------------------- +# Diff path transformation +# --------------------------------------------------------------------------- + +def transform_diff_paths(diff_text: str) -> str: + """ + Rewrite file paths in a unified diff to use post-reorg (hugo/-prefixed) paths. + + Only paths whose first segment is in moves_to_hugo are changed; all other + paths, and all diff hunk content lines, are left untouched. + + Handles the following diff header forms: + diff --git a/ b/ + --- a/ + +++ b/ + rename from + rename to + + The hunk bodies (+/- content lines) are never touched because they contain + file content, not file names. + """ + out: list[str] = [] + for line in diff_text.splitlines(keepends=True): + if line.startswith("diff --git "): + # "diff --git a/content/en/foo.md b/content/en/foo.md" + # Rewrite both the a/ and b/ path tokens. + tokens = line.rstrip("\n").split(" ") + new_tokens = [] + for tok in tokens: + if tok.startswith("a/") or tok.startswith("b/"): + side, rest = tok[:2], tok[2:] + new_tokens.append(side + to_post_reorg_path(rest)) + else: + new_tokens.append(tok) + out.append(" ".join(new_tokens) + "\n") + elif line.startswith("--- a/") or line.startswith("+++ b/"): + side = line[:6] # "--- a/" or "+++ b/" + rest = line[6:].rstrip("\n") + out.append(side + to_post_reorg_path(rest) + "\n") + elif line.startswith("rename from ") or line.startswith("rename to "): + keyword_end = line.index(" ", line.index(" ") + 1) + 1 + keyword = line[:keyword_end] + path = line[keyword_end:].rstrip("\n") + out.append(keyword + to_post_reorg_path(path) + "\n") + else: + out.append(line) + return "".join(out) + + +# --------------------------------------------------------------------------- +# GitHub label helpers +# --------------------------------------------------------------------------- + +def ensure_label_exists(label: str, dry_run: bool) -> None: + """Create the GitHub label if it doesn't already exist.""" + existing = gh_json("label", "list", "--repo", REPO, "--json", "name") + if any(l["name"] == label for l in existing): # type: ignore[index] + return + if dry_run: + print(f" [dry-run] would create label: {label!r}") + return + gh_run("label", "create", label, "--repo", REPO, + "--color", LABEL_COLOR, "--description", LABEL_DESCRIPTION) + print(f" Created label: {label!r}") + + +def add_label(pr_number: int, label: str, dry_run: bool) -> None: + if dry_run: + print(f" [dry-run] would add label {label!r} to PR #{pr_number}") + return + gh_run("pr", "edit", str(pr_number), "--repo", REPO, "--add-label", label) + print(f" Added label {label!r} to PR #{pr_number}") + + +def post_comment(pr_number: int, body: str, dry_run: bool) -> None: + if dry_run: + print(f" [dry-run] would comment on PR #{pr_number}:\n {body[:120]}...") + return + gh_run("pr", "comment", str(pr_number), "--repo", REPO, "--body", body) + print(f" Posted comment on PR #{pr_number}") + + +# --------------------------------------------------------------------------- +# Auto-fix +# --------------------------------------------------------------------------- + +def attempt_fix(pr: dict, dry_run: bool) -> bool: + """ + Attempt to auto-fix a reorg-conflict PR by re-applying its commits at the + post-reorg paths. + + Strategy: format-patch + am, preserving the PR's individual commits. + + 1. Find the merge base between the PR branch and BASE_BRANCH (the commit + where the PR diverged from master before the reorg landed). + 2. Use `git format-patch` to export each PR commit as its own patch, + including the original author name, email, and commit message. + 3. Rewrite file paths in every patch: anything whose first segment is in + moves_to_hugo gets a hugo/ prefix (content/ → hugo/content/, etc.). + 4. Apply the series with `git am --3way` onto a fresh branch off + BASE_BRANCH. --3way falls back to a 3-way merge per patch when + context lines have drifted due to unrelated master changes, so the + PR's individual commits land cleanly even if master moved on. + 5. Push as `reorg-fix/pr-` and post a comment on the PR. + + Using format-patch/am rather than a single squashed diff means the fix + branch has the same commit history as the original PR — same messages, + same authorship, same granularity — making it easy to review and to revert + individual commits if needed. + + Returns True if the fix was applied (or would be in dry-run mode), False + if we gave up and the caller should fall back to labeling. + + PRs from forks are not auto-fixed: we cannot push to a fork branch, so we + return False immediately and let the caller add the manual-review label. + """ + pr_number = pr["number"] + head_ref = pr["headRefName"] + is_fork = pr.get("isCrossRepository", False) + + if is_fork: + print(f" PR #{pr_number} is from a fork — cannot push; will label instead.") + return False + + # Fetch the PR branch so we can reference it locally. + pr_remote_ref = f"refs/remotes/origin/{head_ref}" + fetch = git("fetch", "origin", f"{head_ref}:{pr_remote_ref.replace('refs/remotes/', '')}") + if fetch.returncode != 0: + print(f" fetch failed: {fetch.stderr.strip()[:120]}", file=sys.stderr) + return False + + # The merge base is the last common ancestor of the PR branch and + # BASE_BRANCH — the point where the PR diverged from master before the + # reorg commit landed. + merge_base = git("merge-base", f"origin/{BASE_BRANCH}", pr_remote_ref) + if merge_base.returncode != 0: + print(f" could not find merge base: {merge_base.stderr.strip()[:80]}", file=sys.stderr) + return False + base_sha = merge_base.stdout.strip() + + # Export each PR commit as a separate mbox-format patch. stdout gives us + # all patches concatenated; git am can consume this directly. + format_patch = git("format-patch", "--stdout", f"{base_sha}..{pr_remote_ref}") + if format_patch.returncode != 0: + print(f" format-patch failed: {format_patch.stderr.strip()[:80]}", file=sys.stderr) + return False + + patches = format_patch.stdout + if not patches.strip(): + print(" no patches between merge base and PR HEAD — nothing to apply.") + return False + + transformed = transform_diff_paths(patches) + + if dry_run: + # Count patches and show per-patch file summaries. + subjects = [l[len("Subject: "):] for l in transformed.splitlines() + if l.startswith("Subject: ")] + print(f" [dry-run] would apply {len(subjects)} commit(s) to reorg-fix/pr-{pr_number}:") + for s in subjects[:10]: + print(f" {s}") + if len(subjects) > 10: + print(f" ... and {len(subjects) - 10} more") + would_be_title = f"[reorg fix] {pr['title']}" + print(f" [dry-run] would open PR: {would_be_title!r}") + return True + + fix_branch = f"reorg-fix/pr-{pr_number}" + tmpdir = tempfile.mkdtemp(prefix=f"reorg_fix_{pr_number}_") + try: + add_wt = git("worktree", "add", "-b", fix_branch, tmpdir, f"origin/{BASE_BRANCH}") + if add_wt.returncode != 0: + # Creating the branch failed — most likely a reorg-fix/pr- branch + # from a prior run already exists. We do NOT reset and reuse it: + # that could clobber a fix that's already in review. Bail and let + # the PR fall back to manual review. (A fully-applied fix carries + # astro-reorg-stale, so that PR would have been skipped earlier.) + print(f" could not create fix branch {fix_branch!r} " + f"(may already exist) — leaving for manual review: " + f"{add_wt.stderr.strip()[:120]}", file=sys.stderr) + return False + worktree = Path(tmpdir) + + # git am replays each patch as its own commit, preserving the original + # author and message. --3way enables per-patch 3-way merging so that + # patches whose context drifted (because master made unrelated edits + # between the PR base and now) still apply cleanly. + am = run(["git", "am", "--3way"], cwd=worktree, input=transformed) + if am.returncode != 0: + print(f" git am failed:\n{am.stderr[:400]}", file=sys.stderr) + run(["git", "am", "--abort"], cwd=worktree) + return False + + push = run(["git", "push", "origin", fix_branch], cwd=worktree) + if push.returncode != 0: + # A reorg-fix/pr- branch from a prior run already exists on + # origin. We don't force-push (repo policy), and a fix PR for it + # most likely already exists, so bail to manual review rather than + # clobber it. (PRs that were fully fixed carry astro-reorg-stale + # and are skipped before we ever get here.) + print(f" push failed (fix branch may already exist): " + f"{push.stderr.strip()[:120]}", file=sys.stderr) + return False + + print(f" Pushed fix to branch {fix_branch!r}") + + # Open a new PR for the fix branch so the author can preview, review, + # and merge it directly — then close the original conflicting PR. + original_body = pr.get("body") or "" + new_pr_body = ( + f"🤖 Auto-generated fix for #{pr_number}.\n\n" + f"This PR replays the commits from #{pr_number} with file paths " + f"translated to the post-reorg `hugo/` layout. The original commits " + f"are preserved — same messages and authorship.\n\n" + f"If this looks correct, merge this PR and close #{pr_number}.\n\n" + f"---\n\n" + f"**Original PR description:**\n\n{original_body}" + ) + # We only reach this point on a real run; dry-run returned earlier. + new_pr_title = f"[reorg fix] {pr['title']}" + pr_create = gh_run( + "pr", "create", + "--repo", REPO, + "--head", fix_branch, + "--base", BASE_BRANCH, + "--title", new_pr_title, + "--body", new_pr_body, + ) + new_pr_url = pr_create.strip() + new_pr_number = new_pr_url.rstrip("/").split("/")[-1] + print(f" Opened fix PR: {new_pr_url}") + + post_comment( + pr_number, + f"🤖 **Reorg conflict auto-fix: #{new_pr_number}**\n\n" + f"This PR has merge conflicts caused by the astro reorg " + f"(files moved from the repo root into `hugo/`). " + f"A new PR with your commits translated to the correct paths " + f"has been opened: #{new_pr_number}\n\n" + f"If #{new_pr_number} looks correct, merge it and close this PR.", + dry_run=False, + ) + add_label(pr_number, LABEL_STALE, dry_run) + return True + + finally: + git("worktree", "remove", "--force", tmpdir) + shutil.rmtree(tmpdir, ignore_errors=True) + + +# --------------------------------------------------------------------------- +# Per-PR analysis +# --------------------------------------------------------------------------- + +def analyze_pr(pr: dict, dry_run: bool) -> None: + pr_number = pr["number"] + title = pr["title"] + mergeable = pr.get("mergeable", "UNKNOWN") + + print(f"\nPR #{pr_number}: {title}") + print(f" mergeable: {mergeable}") + + # A fix PR was already opened for this one on a prior run — skip so we + # don't re-push the fix branch or post a duplicate comment. This makes + # the whole script safe to re-run. + if any(l["name"] == LABEL_STALE for l in pr.get("labels", [])): + print(f" Already has a fix PR ({LABEL_STALE}) — skipping.") + return + + if mergeable == "MERGEABLE": + print(" No conflicts — skipping.") + return + + if mergeable == "UNKNOWN": + # GitHub computes mergeability lazily; try again later if needed. + print(" Mergeability not yet computed by GitHub — skipping.") + return + + # CONFLICTING: fetch the branch and do a local merge test to classify + # conflicts as reorg-caused vs unrelated. + pr_ref = f"refs/remotes/origin/{pr['headRefName']}" + fetch = git("fetch", "origin", + f"refs/pull/{pr_number}/head:{pr_ref.replace('refs/remotes/', '')}") + if fetch.returncode != 0: + print(f" fetch failed: {fetch.stderr.strip()[:120]}", file=sys.stderr) + return + + tmpdir = tempfile.mkdtemp(prefix=f"reorg_check_{pr_number}_") + try: + add_wt = git("worktree", "add", "--detach", tmpdir, f"origin/{BASE_BRANCH}") + if add_wt.returncode != 0: + print(f" worktree add failed: {add_wt.stderr.strip()[:120]}", file=sys.stderr) + return + worktree = Path(tmpdir) + + # Attempt the merge without committing so we can inspect the conflicts. + # We do NOT use --no-ff here; the default is fine since we're only + # inspecting, not keeping the result. + merge = run(["git", "merge", "--no-commit", pr_ref], cwd=worktree) + + # Classify any files that have conflict markers. + reorg_conflicts, other_conflicts = get_conflict_classification(worktree) + + # Even if the merge succeeded cleanly (returncode 0), the PR may have + # added files at pre-reorg paths with no conflict marker. Check for + # these "wrong path additions" in the staged result. + wrong_additions: list[str] = [] + if merge.returncode == 0: + wrong_additions = get_wrong_path_additions(worktree) + # Treat wrong-path additions as reorg conflicts: the PR added a + # file at a pre-reorg path that should be under hugo/. + reorg_conflicts.extend(wrong_additions) + + # Always abort the test merge before leaving the worktree. + run(["git", "merge", "--abort"], cwd=worktree) + + print(f" Reorg-caused conflicts : {reorg_conflicts or 'none'}") + print(f" Unrelated conflicts : {other_conflicts or 'none'}") + if wrong_additions: + print(f" Wrong-path additions : {wrong_additions}") + + if not reorg_conflicts and not other_conflicts: + if merge.returncode != 0: + # The merge failed but we couldn't classify any conflicted path + # (an unusual conflict type we don't parse). Don't guess that + # it's reorg-caused — flag it for a human rather than skip a + # real conflict. + print(" Merge failed but no conflicts could be classified " + "— labeling for manual review.") + add_label(pr_number, LABEL_MANUAL_REVIEW, dry_run) + else: + print(" No conflicts found locally (GitHub mergeability may be stale).") + return + + if other_conflicts: + # The PR has conflicts that are NOT from the reorg. We must not + # touch it — just label it so a human can resolve it manually. + print(" Non-reorg conflicts present — labeling for manual review.") + add_label(pr_number, LABEL_MANUAL_REVIEW, dry_run) + else: + # All conflicts are reorg-caused. Attempt the auto-fix. + print(" All conflicts are reorg-caused — attempting auto-fix.") + success = attempt_fix(pr, dry_run) + if not success: + # Auto-fix failed (fork, apply error, etc.) — fall back to labeling. + print(" Auto-fix failed or not applicable — labeling for manual review.") + add_label(pr_number, LABEL_MANUAL_REVIEW, dry_run) + + finally: + run(["git", "worktree", "remove", "--force", tmpdir]) + shutil.rmtree(tmpdir, ignore_errors=True) + + +# --------------------------------------------------------------------------- +# Entry point +# --------------------------------------------------------------------------- + +def get_open_prs(only: list[int] | None = None) -> list[dict]: + """Return open PRs, optionally filtered to specific numbers.""" + fields = ("number,title,body,labels,headRefName,headRepositoryOwner," + "baseRefName,headRefOid,baseRefOid,isCrossRepository,mergeable") + if only: + prs = [] + for n in only: + pr = gh_json("pr", "view", str(n), "--repo", REPO, "--json", fields) + prs.append(pr) + return prs + return gh_json( # type: ignore[return-value] + "pr", "list", "--repo", REPO, "--state", "open", + "--json", fields, "--limit", "300", + ) + + +def main() -> None: + parser = argparse.ArgumentParser( + description="Check open PRs for reorg-caused merge conflicts.", + formatter_class=argparse.RawDescriptionHelpFormatter, + epilog=__doc__, + ) + parser.add_argument( + "--dry-run", action=argparse.BooleanOptionalAction, default=True, + help="Report what would be done without making any changes (default: on). Use --no-dry-run to apply changes.", + ) + parser.add_argument( + "--pr", type=int, action="append", dest="prs", metavar="NUMBER", + help="Only check this PR number (may be repeated).", + ) + parser.add_argument( + "--base-branch", default="master", metavar="BRANCH", + help="Branch to treat as the post-reorg base (default: master). " + "Set to a test branch to run against a fake main.", + ) + args = parser.parse_args() + + global BASE_BRANCH + BASE_BRANCH = args.base_branch + + if args.dry_run: + print("DRY-RUN mode — no branches or PRs will be modified.\n") + + print(f"Fetching origin/{BASE_BRANCH}...") + fetch_master = git("fetch", "origin", BASE_BRANCH) + if fetch_master.returncode != 0: + print(f"Warning: could not update {BASE_BRANCH}: {fetch_master.stderr.strip()[:80]}", + file=sys.stderr) + + ensure_label_exists(LABEL_MANUAL_REVIEW, args.dry_run) + ensure_label_exists(LABEL_STALE, args.dry_run) + + prs = get_open_prs(args.prs) + print(f"Found {len(prs)} open PR(s) to check.") + + for pr in prs: + # Isolate failures: one PR raising (e.g. a transient gh/network error) + # shouldn't abort the whole batch. A half-finished fix is safe to + # retry — a re-run either skips it (astro-reorg-stale) or, if the fix + # branch already exists, falls back to manual review. + try: + analyze_pr(pr, args.dry_run) + except Exception as exc: + print(f"\nERROR processing PR #{pr.get('number', '?')}: {exc}", + file=sys.stderr) + print(" Skipping to the next PR.", file=sys.stderr) + + print("\nDone.") + + +if __name__ == "__main__": + main() From 87a3b97a648aa1b100a10fd295e6dd9e8895e365 Mon Sep 17 00:00:00 2001 From: Jen Gilbert Date: Mon, 15 Jun 2026 12:24:25 -0500 Subject: [PATCH 15/18] Omit the images folder from the reorg --- astro_reorg/config.yaml | 4 +++- astro_reorg/execute_reorg.py | 3 --- config/_default/config.yaml | 4 ++-- 3 files changed, 5 insertions(+), 6 deletions(-) diff --git a/astro_reorg/config.yaml b/astro_reorg/config.yaml index 47bc16e6276..4d9885a62dc 100644 --- a/astro_reorg/config.yaml +++ b/astro_reorg/config.yaml @@ -48,6 +48,9 @@ top_level: # Docker - docker-compose-docs.yml + # Static assets (shared; Hugo mounts via ../static) + - static + moves_to_hugo: # Hugo core - archetypes @@ -57,7 +60,6 @@ moves_to_hugo: - data - i18n - layouts - - static - resources - public diff --git a/astro_reorg/execute_reorg.py b/astro_reorg/execute_reorg.py index 29dea2b3c6d..92f4d406165 100644 --- a/astro_reorg/execute_reorg.py +++ b/astro_reorg/execute_reorg.py @@ -133,12 +133,9 @@ def route_gitignore_segment(line): WORKFLOW_SUBSTITUTIONS = [ ("- 'content/en/", "- 'hugo/content/en/"), ("- 'layouts/shortcodes/", "- 'hugo/layouts/shortcodes/"), - ("- 'static/images/", "- 'hugo/static/images/"), ("python local/bin/", "python hugo/local/bin/"), # vale_linter.yml passes the template path to vale via --output= ("--output=local/bin/", "--output=hugo/local/bin/"), - (" static |", " hugo/static |"), - ("^static/images/", "^hugo/static/images/"), ("-- 'content/en/**/*.md'", "-- 'hugo/content/en/**/*.md'"), # bump_* and version_getter_shared workflows cp/commit data files by ./ path ("./data/", "./hugo/data/"), diff --git a/config/_default/config.yaml b/config/_default/config.yaml index d769844efce..ba77a4faf3b 100644 --- a/config/_default/config.yaml +++ b/config/_default/config.yaml @@ -133,7 +133,7 @@ module: - source: content/es target: content lang: es - - source: static + - source: ../static target: static - source: layouts target: layouts @@ -142,7 +142,7 @@ module: - 'shortcodes/mdoc/*' - source: data target: data - - source: static + - source: ../static target: assets - source: assets target: assets From 69393a65649c88c5484b785ff9075604cfda9bb9 Mon Sep 17 00:00:00 2001 From: Jen Gilbert Date: Mon, 15 Jun 2026 13:39:10 -0500 Subject: [PATCH 16/18] Address build issues --- astro_reorg/execute_reorg.py | 44 ++++++++++++++++++++++++++++++++++++ 1 file changed, 44 insertions(+) diff --git a/astro_reorg/execute_reorg.py b/astro_reorg/execute_reorg.py index 92f4d406165..52e66ade41b 100644 --- a/astro_reorg/execute_reorg.py +++ b/astro_reorg/execute_reorg.py @@ -175,6 +175,40 @@ def route_gitignore_segment(line): py_file.write_text(updated) print(f" Written: {py_file.name}") +# Update Makefile: static/ stays at repo root, so paths that were ./static/ +# must become ../static/ relative to the hugo/ project root. +MAKEFILE_SUBSTITUTIONS = [ + ("@git clean -Xf ./static/", "@git clean -Xf ../static/"), +] + +print("\nUpdating Makefile...") +makefile = hugo_dir / "Makefile" +if makefile.exists(): + original = makefile.read_text() + updated = original + for old, new in MAKEFILE_SUBSTITUTIONS: + updated = updated.replace(old, new) + if updated != original: + makefile.write_text(updated) + print(" Written: Makefile") + +# Update assets/scripts/ JS build scripts: static/ stays at repo root, so +# ./static/ references must become ../static/ relative to hugo/. +ASSETS_SCRIPTS_SUBSTITUTIONS = [ + ("./static/resources/json/", "../static/resources/json/"), +] + +print("\nUpdating assets/scripts/...") +scripts_dir = hugo_dir / "assets" / "scripts" +for js_file in sorted(scripts_dir.glob("*.js")): + original = js_file.read_text() + updated = original + for old, new in ASSETS_SCRIPTS_SUBSTITUTIONS: + updated = updated.replace(old, new) + if updated != original: + js_file.write_text(updated) + print(f" Written: assets/scripts/{js_file.name}") + # Update .github/CODEOWNERS to reference paths under hugo/. # # CODEOWNERS is not YAML; it is a line-based format where each rule is @@ -244,3 +278,13 @@ def route_codeowners_pattern(pattern): if changed: codeowners.write_text("".join(lines)) print(" Written: CODEOWNERS") + +# TODO: Update Cdocs so the reorg can support *.mdoc.md files. Until then, +# delete all *.mdoc.md files from hugo/ to avoid Hugo build errors — they are +# excluded from Hugo's ignoreFiles list (config/_default/config.yaml) but the +# source .mdoc.md files still need to be absent until Cdocs is reorg-aware. +print("\nDeleting *.mdoc.md files from hugo/...") +mdoc_files = list(hugo_dir.rglob("*.mdoc.md")) +for f in sorted(mdoc_files): + f.unlink() +print(f" Deleted {len(mdoc_files)} *.mdoc.md file(s).") From 930722795c89c79c20512b90aaa7ced0f2c8aab7 Mon Sep 17 00:00:00 2001 From: Jen Gilbert Date: Mon, 15 Jun 2026 14:57:21 -0500 Subject: [PATCH 17/18] Make husky hooks and github workflows flexible --- .../bump_private_action_runner_version.yml | 10 +- .../bump_synthetics_worker_version.yml | 10 +- .github/workflows/bump_versions.yml | 8 +- .github/workflows/preview_link.yml | 5 +- .github/workflows/site_region_check.yml | 5 +- .github/workflows/vale_linter.yml | 8 +- .github/workflows/version_getter_shared.yml | 10 +- .husky/check-cdocs-gitignore.py | 5 +- .husky/check-circular-aliases.py | 12 +- .husky/check-section-index.py | 16 +-- astro_reorg/config.yaml | 5 +- astro_reorg/execute_reorg.py | 117 ++++++++---------- config/_default/config.yaml | 8 +- 13 files changed, 113 insertions(+), 106 deletions(-) diff --git a/.github/workflows/bump_private_action_runner_version.yml b/.github/workflows/bump_private_action_runner_version.yml index 49b65395dec..05772a564b8 100644 --- a/.github/workflows/bump_private_action_runner_version.yml +++ b/.github/workflows/bump_private_action_runner_version.yml @@ -12,6 +12,8 @@ jobs: pull-requests: write # Action creates a PR. runs-on: ubuntu-latest name: Find latest private action runner version + env: + HUGO_ROOT: ${{ vars.HUGO_DIR && format('{0}/', vars.HUGO_DIR) || '' }} steps: - uses: DataDog/dd-octo-sts-action@acaa02eee7e3bb0839e4272dacb37b8f3b58ba80 # v1.0.3 id: octo-sts @@ -31,14 +33,14 @@ jobs: - name: Find and write latest version id: write-version run: | - python local/bin/py/version_getter.py \ + python ${HUGO_ROOT}local/bin/py/version_getter.py \ --url "https://api.datadoghq.com/api/v2/on-prem-management-service/runner/latest-image" \ --file-name "private_action_runner_version.json" - name: Save modified file run: | mkdir -p $RUNNER_TEMP/temp - cp ./data/private_action_runner_version.json $RUNNER_TEMP/temp/ + cp ./${HUGO_ROOT}data/private_action_runner_version.json $RUNNER_TEMP/temp/ - name: echo new version run: echo ${{ steps.write-version.outputs.new_version }} @@ -49,14 +51,14 @@ jobs: - name: Restore modified file run: | - cp $RUNNER_TEMP/temp/private_action_runner_version.json ./data/ + cp $RUNNER_TEMP/temp/private_action_runner_version.json ./${HUGO_ROOT}data/ - name: Write version if: steps.write-version.outputs.new_version == 'true' run: |- git config user.name "github-actions[bot]" git config user.email "41898282+github-actions[bot]@users.noreply.github.com" - git add ./data/private_action_runner_version.json + git add ./${HUGO_ROOT}data/private_action_runner_version.json git commit -m "(Automated) Bump private action runner version" git push -f origin HEAD:refs/heads/automatic-version-update/private-action-runner diff --git a/.github/workflows/bump_synthetics_worker_version.yml b/.github/workflows/bump_synthetics_worker_version.yml index f4be2f5a565..a8fbf465007 100644 --- a/.github/workflows/bump_synthetics_worker_version.yml +++ b/.github/workflows/bump_synthetics_worker_version.yml @@ -12,6 +12,8 @@ jobs: pull-requests: write # Action creates a PR. runs-on: ubuntu-latest name: Find latest synthetics-worker version + env: + HUGO_ROOT: ${{ vars.HUGO_DIR && format('{0}/', vars.HUGO_DIR) || '' }} steps: - uses: DataDog/dd-octo-sts-action@acaa02eee7e3bb0839e4272dacb37b8f3b58ba80 # v1.0.3 id: octo-sts @@ -31,14 +33,14 @@ jobs: - name: Find and write latest version id: write-version run: | - python local/bin/py/version_getter.py \ + python ${HUGO_ROOT}local/bin/py/version_getter.py \ --url "https://ddsynthetics-windows.s3.amazonaws.com/installers.json" \ --file-name "synthetics_worker_versions.json" - name: Save modified file run: | mkdir -p $RUNNER_TEMP/temp - cp ./data/synthetics_worker_versions.json $RUNNER_TEMP/temp/ + cp ./${HUGO_ROOT}data/synthetics_worker_versions.json $RUNNER_TEMP/temp/ - name: echo new version run: echo ${{ steps.write-version.outputs.new_version }} @@ -49,14 +51,14 @@ jobs: - name: Restore modified file run: | - cp $RUNNER_TEMP/temp/synthetics_worker_versions.json ./data/ + cp $RUNNER_TEMP/temp/synthetics_worker_versions.json ./${HUGO_ROOT}data/ - name: Write version if: steps.write-version.outputs.new_version == 'true' run: |- git config user.name "github-actions[bot]" git config user.email "41898282+github-actions[bot]@users.noreply.github.com" - git add ./data/synthetics_worker_versions.json + git add ./${HUGO_ROOT}data/synthetics_worker_versions.json git commit -m "(Automated) Bump synthetic worker version" git push -f origin HEAD:refs/heads/automatic-version-update/synthetics-worker diff --git a/.github/workflows/bump_versions.yml b/.github/workflows/bump_versions.yml index bc800ca13ee..fb3530eca67 100644 --- a/.github/workflows/bump_versions.yml +++ b/.github/workflows/bump_versions.yml @@ -11,6 +11,8 @@ jobs: id-token: write # Needed to federate tokens. runs-on: ubuntu-latest + env: + HUGO_ROOT: ${{ vars.HUGO_DIR && format('{0}/', vars.HUGO_DIR) || '' }} steps: - uses: DataDog/dd-octo-sts-action@acaa02eee7e3bb0839e4272dacb37b8f3b58ba80 # v1.0.3 id: octo-sts @@ -49,9 +51,9 @@ jobs: - name: Write version id: write-version run: |- - mkdir -p ./data - echo '${{steps.set-versions.outputs.result}}' > ./data/sdk_versions.json - git add ./data/sdk_versions.json + mkdir -p ./${HUGO_ROOT}data + echo '${{steps.set-versions.outputs.result}}' > ./${HUGO_ROOT}data/sdk_versions.json + git add ./${HUGO_ROOT}data/sdk_versions.json git config user.name "github-actions[bot]" git config user.email "41898282+github-actions[bot]@users.noreply.github.com" git add . diff --git a/.github/workflows/preview_link.yml b/.github/workflows/preview_link.yml index e1351b40099..6755f0677e9 100644 --- a/.github/workflows/preview_link.yml +++ b/.github/workflows/preview_link.yml @@ -3,6 +3,7 @@ on: pull_request: paths: - 'content/en/**.md' + - 'hugo/content/en/**.md' permissions: contents: read @@ -17,6 +18,8 @@ jobs: preview-link: if: contains(github.head_ref, '/') runs-on: ubuntu-latest + env: + HUGO_ROOT: ${{ vars.HUGO_DIR && format('{0}/', vars.HUGO_DIR) || '' }} steps: - name: Checkout uses: actions/checkout@08eba0b27e820071cde6df949e0beb9ba4906955 # v4.3.0 @@ -64,7 +67,7 @@ jobs: ADDED_FILES: ${{ steps.changed_files.outputs.added_files }} id: comment_body run: | - python local/bin/py/preview_links.py --deleted="${DELETED_FILES}" \ + python ${HUGO_ROOT}local/bin/py/preview_links.py --deleted="${DELETED_FILES}" \ --renamed="${RENAMED_FILES}" \ --modified="${MODIFIED_FILES}" \ --added="${ADDED_FILES}" diff --git a/.github/workflows/site_region_check.yml b/.github/workflows/site_region_check.yml index e160729e7cb..68012e7dda0 100644 --- a/.github/workflows/site_region_check.yml +++ b/.github/workflows/site_region_check.yml @@ -4,6 +4,7 @@ on: pull_request: paths: - 'content/en/**/*.md' + - 'hugo/content/en/**/*.md' concurrency: group: ${{ github.workflow }}-${{ github.head_ref }} @@ -17,6 +18,8 @@ jobs: check-site-region: if: github.head_ref != 'guacbot/translation-pipeline' runs-on: ubuntu-latest + env: + HUGO_ROOT: ${{ vars.HUGO_DIR && format('{0}/', vars.HUGO_DIR) || '' }} steps: - name: Checkout code uses: actions/checkout@08eba0b27e820071cde6df949e0beb9ba4906955 # v4.3.0 @@ -27,7 +30,7 @@ jobs: - name: Get changed markdown files id: changed_files run: | - FILES=$(git diff --diff-filter=AMD --name-only ${{ github.event.pull_request.base.sha }}...${{ github.event.pull_request.head.sha }} -- 'content/en/**/*.md' | xargs) + FILES=$(git diff --diff-filter=AMD --name-only ${{ github.event.pull_request.base.sha }}...${{ github.event.pull_request.head.sha }} -- "${HUGO_ROOT}content/en/**/*.md" | xargs) echo "files=$FILES" >> $GITHUB_OUTPUT - name: Check for site-region at top of page with support language diff --git a/.github/workflows/vale_linter.yml b/.github/workflows/vale_linter.yml index aa1d7bf9101..bb2b5368ec3 100644 --- a/.github/workflows/vale_linter.yml +++ b/.github/workflows/vale_linter.yml @@ -3,7 +3,9 @@ on: pull_request: paths: - 'content/en/**/*' + - 'hugo/content/en/**/*' - 'layouts/shortcodes/**/*.md' + - 'hugo/layouts/shortcodes/**/*.md' - '!**/*.json' permissions: @@ -18,6 +20,8 @@ jobs: vale: runs-on: ubuntu-latest timeout-minutes: 5 + env: + HUGO_ROOT: ${{ vars.HUGO_DIR && format('{0}/', vars.HUGO_DIR) || '' }} steps: - uses: actions/checkout@08eba0b27e820071cde6df949e0beb9ba4906955 # v4.3.0 with: @@ -50,12 +54,12 @@ jobs: CHANGED_FILES: ${{ steps.changed_lines.outputs.changed_files }} run: | vale "${CHANGED_FILES}" \ - --output=local/bin/py/vale/vale_template.tmpl --no-exit > vale_output.log + --output=${HUGO_ROOT}local/bin/py/vale/vale_template.tmpl --no-exit > vale_output.log - name: Parse Vale output if: steps.changed_lines.outputs.changed_files env: CHANGED_LINES: ${{ steps.changed_lines.outputs.changed_lines }} run: | - python local/bin/py/vale/vale_annotations.py \ + python ${HUGO_ROOT}local/bin/py/vale/vale_annotations.py \ --git_data="${CHANGED_LINES}" \ No newline at end of file diff --git a/.github/workflows/version_getter_shared.yml b/.github/workflows/version_getter_shared.yml index 4652c585e50..031eed0842d 100644 --- a/.github/workflows/version_getter_shared.yml +++ b/.github/workflows/version_getter_shared.yml @@ -20,6 +20,8 @@ jobs: id-token: write # Needed to federate tokens. runs-on: ubuntu-latest name: Find latest version + env: + HUGO_ROOT: ${{ vars.HUGO_DIR && format('{0}/', vars.HUGO_DIR) || '' }} steps: - uses: DataDog/dd-octo-sts-action@acaa02eee7e3bb0839e4272dacb37b8f3b58ba80 # v1.0.3 id: octo-sts @@ -39,12 +41,12 @@ jobs: - name: Find and write latest version id: write-version run: | - python local/bin/py/version_getter.py --url ${{ inputs.url }} --file-name ${{ inputs.file_name }} + python ${HUGO_ROOT}local/bin/py/version_getter.py --url ${{ inputs.url }} --file-name ${{ inputs.file_name }} - name: Save modified file run: | mkdir -p $RUNNER_TEMP/temp - cp ./data/{{ inputs.file_name }} $RUNNER_TEMP/temp/ + cp ./${HUGO_ROOT}data/{{ inputs.file_name }} $RUNNER_TEMP/temp/ - name: echo new version run: echo ${{ steps.write-version.outputs.new_version }} @@ -55,14 +57,14 @@ jobs: - name: Restore modified file run: | - cp $RUNNER_TEMP/temp/{{ inputs.file_name }} ./data/ + cp $RUNNER_TEMP/temp/{{ inputs.file_name }} ./${HUGO_ROOT}data/ - name: Write version if: steps.write-version.outputs.new_version == 'true' run: |- git config user.name "github-actions[bot]" git config user.email "41898282+github-actions[bot]@users.noreply.github.com" - git add ./data/{{ inputs.file_name }} + git add ./${HUGO_ROOT}data/{{ inputs.file_name }} git commit -m "(Automated) Bump version" git push -f origin HEAD:refs/heads/automatic-version-update/versions diff --git a/.husky/check-cdocs-gitignore.py b/.husky/check-cdocs-gitignore.py index 2a51589c784..911b6950564 100644 --- a/.husky/check-cdocs-gitignore.py +++ b/.husky/check-cdocs-gitignore.py @@ -10,7 +10,8 @@ import sys from pathlib import Path -GITIGNORE_PATH = Path("content/.gitignore") +_hugo_prefix = "hugo/" if Path("hugo/content").exists() else "" +GITIGNORE_PATH = Path(f"{_hugo_prefix}content/.gitignore") def get_gitignore_patterns(): @@ -26,7 +27,7 @@ def get_gitignore_patterns(): if stripped.endswith(".md"): # Paths in the gitignore are relative to content/, e.g. /en/foo/bar.md # Convert to repo-relative patterns: content/en/foo/bar.md - repo_pattern = "content" + stripped + repo_pattern = f"{_hugo_prefix}content" + stripped patterns.append(repo_pattern) return patterns diff --git a/.husky/check-circular-aliases.py b/.husky/check-circular-aliases.py index 2d2c70ed66b..ddb8f5bc618 100755 --- a/.husky/check-circular-aliases.py +++ b/.husky/check-circular-aliases.py @@ -6,6 +6,8 @@ import re from pathlib import Path +_hugo_prefix = "hugo/" if Path("hugo/content").exists() else "" + def parse_frontmatter(content): """Parse YAML frontmatter from markdown content.""" # Match frontmatter between --- delimiters @@ -62,12 +64,12 @@ def get_staged_files(): text=True, check=True ) - staged_files = [f for f in result.stdout.strip().split('\n') - if f.endswith('.md') and f.startswith('content/en/')] + staged_files = [f for f in result.stdout.strip().split('\n') + if f.endswith('.md') and f.startswith(f'{_hugo_prefix}content/en/')] return staged_files if staged_files != [''] else [] except subprocess.CalledProcessError: # Fallback to all markdown files for testing - content_dir = Path('content/en') + content_dir = Path(f'{_hugo_prefix}content/en') if content_dir.exists(): return [str(f) for f in content_dir.rglob('*.md')] return [] @@ -100,10 +102,10 @@ def check_circular_aliases(): # Calculate expected location path if file_path.endswith('/_index.md'): # For _index.md files: content/en/foo/bar/_index.md -> foo/bar - expected_location = file_path.replace('content/en/', '').replace('/_index.md', '') + expected_location = file_path.replace(f'{_hugo_prefix}content/en/', '').replace('/_index.md', '') else: # For regular .md files: content/en/foo/bar.md -> foo/bar - expected_location = file_path.replace('content/en/', '').replace('.md', '') + expected_location = file_path.replace(f'{_hugo_prefix}content/en/', '').replace('.md', '') # Check each alias for circular reference for alias in aliases: diff --git a/.husky/check-section-index.py b/.husky/check-section-index.py index c15e8181dd0..22bc6cb591f 100644 --- a/.husky/check-section-index.py +++ b/.husky/check-section-index.py @@ -4,6 +4,8 @@ import subprocess from pathlib import Path +_hugo_prefix = "hugo/" if Path("hugo/content").exists() else "" + def get_repo_root(): """Get the git repository root directory.""" @@ -26,7 +28,7 @@ def get_staged_files(): check=True ) staged_files = [f for f in result.stdout.strip().split('\n') - if f.endswith('.md') and f.startswith('content/en/')] + if f.endswith('.md') and f.startswith(f'{_hugo_prefix}content/en/')] return staged_files if staged_files != [''] else [] except subprocess.CalledProcessError: return [] @@ -56,7 +58,7 @@ def dir_exists_on_base_branch(dir_name): merge_base = result.stdout.strip() # Check if the directory existed at the merge base result = subprocess.run( - ['git', 'ls-tree', '--name-only', merge_base, f'content/en/{dir_name}/'], + ['git', 'ls-tree', '--name-only', merge_base, f'{_hugo_prefix}content/en/{dir_name}/'], capture_output=True, text=True, check=True @@ -66,7 +68,7 @@ def dir_exists_on_base_branch(dir_name): # If there's no merge base (e.g. first commit), check if dir exists in HEAD try: result = subprocess.run( - ['git', 'ls-tree', '--name-only', 'HEAD', f'content/en/{dir_name}/'], + ['git', 'ls-tree', '--name-only', 'HEAD', f'{_hugo_prefix}content/en/{dir_name}/'], capture_output=True, text=True, check=True @@ -78,12 +80,12 @@ def dir_exists_on_base_branch(dir_name): def has_index_file(repo_root, dir_name): """Check if a top-level directory has an _index.md or _index.mdoc.md.""" - dir_path = repo_root / 'content' / 'en' / dir_name + dir_path = repo_root / f'{_hugo_prefix}content' / 'en' / dir_name if (dir_path / '_index.md').exists() or (dir_path / '_index.mdoc.md').exists(): return True # Also check if either variant is staged (new but not yet on disk) for name in ('_index.md', '_index.mdoc.md'): - relative = f'content/en/{dir_name}/{name}' + relative = f'{_hugo_prefix}content/en/{dir_name}/{name}' try: subprocess.run( ['git', 'show', f':{relative}'], @@ -131,9 +133,9 @@ def main(): print('=====================================', file=sys.stderr) for dir_name in missing: - print(f'\n Directory: content/en/{dir_name}/', file=sys.stderr) + print(f'\n Directory: {_hugo_prefix}content/en/{dir_name}/', file=sys.stderr) print(f' URL path: /{dir_name}/', file=sys.stderr) - print(f' Fix: Create content/en/{dir_name}/_index.md', file=sys.stderr) + print(f' Fix: Create {_hugo_prefix}content/en/{dir_name}/_index.md', file=sys.stderr) print('\n=====================================', file=sys.stderr) print(f'Found {len(missing)} directory(ies) missing _index.md.', file=sys.stderr) diff --git a/astro_reorg/config.yaml b/astro_reorg/config.yaml index 4d9885a62dc..a3fefa99e9d 100644 --- a/astro_reorg/config.yaml +++ b/astro_reorg/config.yaml @@ -48,9 +48,6 @@ top_level: # Docker - docker-compose-docs.yml - # Static assets (shared; Hugo mounts via ../static) - - static - moves_to_hugo: # Hugo core - archetypes @@ -61,6 +58,8 @@ moves_to_hugo: - i18n - layouts - resources + - static: + excludes: [images] # static/images stays at repo root; everything else moves - public # Hugo build tooling diff --git a/astro_reorg/execute_reorg.py b/astro_reorg/execute_reorg.py index 52e66ade41b..04748c380fe 100644 --- a/astro_reorg/execute_reorg.py +++ b/astro_reorg/execute_reorg.py @@ -17,9 +17,22 @@ config = yaml.safe_load(f) top_level = set(config.get("top_level", [])) -moves_to_hugo = set(config.get("moves_to_hugo", [])) ignore = set(config.get("ignore", [])) +# moves_to_hugo can contain plain strings or {name: {excludes: [...]}} dicts. +# Build a flat set of names for routing checks, and a separate excludes_map for +# entries that need a partial move (some children stay at the repo root). +moves_to_hugo = set() +excludes_map = {} # name -> set of child names that stay at repo root +for _item in config.get("moves_to_hugo", []): + if isinstance(_item, str): + moves_to_hugo.add(_item) + else: + for _name, _opts in _item.items(): + moves_to_hugo.add(_name) + if _opts and "excludes" in _opts: + excludes_map[_name] = set(_opts["excludes"]) + # Sanity-check the config itself for conflicts. conflicts = top_level & moves_to_hugo if conflicts: @@ -62,6 +75,22 @@ # those pointing at the old path — activation then falls back to the system # Python (no project deps). A venv is a regenerable artifact, so delete it # rather than move it; `make` rebuilds it in place with correct paths. + if name in excludes_map: + # Partial move: relocate all children except the excluded ones, which + # stay at the repo root inside the (now mostly-empty) source directory. + excluded = excludes_map[name] + dst.mkdir(exist_ok=True) + children_moved = 0 + for child in sorted(src.iterdir()): + if child.name in excluded: + continue + child.rename(dst / child.name) + children_moved += 1 + print(f"Moving {name}/ -> hugo/{name}/ " + f"({children_moved} item(s), excluding: {', '.join(sorted(excluded))})") + moved += 1 + continue + if (src / "pyvenv.cfg").is_file(): print(f"Deleting venv {name} (regenerated by make in hugo/{name})") shutil.rmtree(src) @@ -112,8 +141,15 @@ def route_gitignore_segment(line): root_lines.append(raw) hugo_lines.append(raw) elif segment in moves_to_hugo: - hugo_lines.append(raw) - hugo_only_segments.add(segment) + # If the line's second path segment is excluded, it stays at the root. + stripped_body = raw.strip().lstrip("!").lstrip("/") + parts = stripped_body.split("/", 2) + subsegment = parts[1] if len(parts) > 1 else None + if subsegment and subsegment in excludes_map.get(segment, set()): + root_lines.append(raw) + else: + hugo_lines.append(raw) + hugo_only_segments.add(segment) elif segment in top_level: root_lines.append(raw) else: @@ -129,54 +165,9 @@ def route_gitignore_segment(line): print(" NOTE: kept in both (first path segment not in astro_reorg/config.yaml): " + ", ".join(sorted(both_segments))) -# Update .github/workflows/ files to reference paths under hugo/. -WORKFLOW_SUBSTITUTIONS = [ - ("- 'content/en/", "- 'hugo/content/en/"), - ("- 'layouts/shortcodes/", "- 'hugo/layouts/shortcodes/"), - ("python local/bin/", "python hugo/local/bin/"), - # vale_linter.yml passes the template path to vale via --output= - ("--output=local/bin/", "--output=hugo/local/bin/"), - ("-- 'content/en/**/*.md'", "-- 'hugo/content/en/**/*.md'"), - # bump_* and version_getter_shared workflows cp/commit data files by ./ path - ("./data/", "./hugo/data/"), -] - -print("\nUpdating .github/workflows/...") -workflows_dir = repo_root / ".github" / "workflows" -for yml_file in sorted(workflows_dir.glob("*.yml")): - original = yml_file.read_text() - updated = original - for old, new in WORKFLOW_SUBSTITUTIONS: - updated = updated.replace(old, new) - if updated != original: - yml_file.write_text(updated) - print(f" Written: {yml_file.name}") - -# Update .husky/ hook scripts to reference paths under hugo/content/. -HUSKY_SUBSTITUTIONS = [ - ("content/en/{dir_name}/{name}", "hugo/content/en/{dir_name}/{name}"), - ("content/en/{dir_name}/", "hugo/content/en/{dir_name}/"), - ("Path('content/en')", "Path('hugo/content/en')"), - ('Path("content/.gitignore")', 'Path("hugo/content/.gitignore")'), - ("f.startswith('content/en/')", "f.startswith('hugo/content/en/')"), - (".replace('content/en/', '')", ".replace('hugo/content/en/', '')"), - ("repo_root / 'content' / 'en'", "repo_root / 'hugo' / 'content' / 'en'"), - ('repo_pattern = "content"', 'repo_pattern = "hugo/content"'), -] - -print("\nUpdating .husky/...") -husky_dir = repo_root / ".husky" -for py_file in sorted(husky_dir.glob("*.py")): - original = py_file.read_text() - updated = original - for old, new in HUSKY_SUBSTITUTIONS: - updated = updated.replace(old, new) - if updated != original: - py_file.write_text(updated) - print(f" Written: {py_file.name}") - -# Update Makefile: static/ stays at repo root, so paths that were ./static/ -# must become ../static/ relative to the hugo/ project root. +# Update Makefile: static/images stays at the repo root (excluded from the +# static/ partial move), so ./static/images/ must become ../static/images/ +# relative to the hugo/ project root. MAKEFILE_SUBSTITUTIONS = [ ("@git clean -Xf ./static/", "@git clean -Xf ../static/"), ] @@ -192,23 +183,6 @@ def route_gitignore_segment(line): makefile.write_text(updated) print(" Written: Makefile") -# Update assets/scripts/ JS build scripts: static/ stays at repo root, so -# ./static/ references must become ../static/ relative to hugo/. -ASSETS_SCRIPTS_SUBSTITUTIONS = [ - ("./static/resources/json/", "../static/resources/json/"), -] - -print("\nUpdating assets/scripts/...") -scripts_dir = hugo_dir / "assets" / "scripts" -for js_file in sorted(scripts_dir.glob("*.js")): - original = js_file.read_text() - updated = original - for old, new in ASSETS_SCRIPTS_SUBSTITUTIONS: - updated = updated.replace(old, new) - if updated != original: - js_file.write_text(updated) - print(f" Written: assets/scripts/{js_file.name}") - # Update .github/CODEOWNERS to reference paths under hugo/. # # CODEOWNERS is not YAML; it is a line-based format where each rule is @@ -244,6 +218,13 @@ def route_codeowners_pattern(pattern): if segment not in moves_to_hugo: return segment, None + # If the pattern's second segment is excluded, it stays at the root. + rest = body[len(segment):] + if rest.startswith("/"): + subsegment = rest[1:].split("/", 1)[0] + if subsegment in excludes_map.get(segment, set()): + return segment, None + new_body = "hugo/" + body return segment, (("/" + new_body) if anchored else new_body) diff --git a/config/_default/config.yaml b/config/_default/config.yaml index ba77a4faf3b..6900dddd191 100644 --- a/config/_default/config.yaml +++ b/config/_default/config.yaml @@ -133,8 +133,10 @@ module: - source: content/es target: content lang: es - - source: ../static + - source: static target: static + - source: ../static/images + target: static/images - source: layouts target: layouts excludeFiles: @@ -142,8 +144,10 @@ module: - 'shortcodes/mdoc/*' - source: data target: data - - source: ../static + - source: static target: assets + - source: ../static/images + target: assets/images - source: assets target: assets - source: i18n From e88e62c6cba5da0132bcad1b6c1ac067caf2ff26 Mon Sep 17 00:00:00 2001 From: Jen Gilbert Date: Mon, 15 Jun 2026 15:04:35 -0500 Subject: [PATCH 18/18] Use manual var override for hugo root --- .github/workflows/bump_private_action_runner_version.yml | 2 +- .github/workflows/bump_synthetics_worker_version.yml | 2 +- .github/workflows/bump_versions.yml | 2 +- .github/workflows/preview_link.yml | 2 +- .github/workflows/site_region_check.yml | 2 +- .github/workflows/vale_linter.yml | 2 +- .github/workflows/version_getter_shared.yml | 2 +- 7 files changed, 7 insertions(+), 7 deletions(-) diff --git a/.github/workflows/bump_private_action_runner_version.yml b/.github/workflows/bump_private_action_runner_version.yml index 05772a564b8..a6b26360b1f 100644 --- a/.github/workflows/bump_private_action_runner_version.yml +++ b/.github/workflows/bump_private_action_runner_version.yml @@ -13,7 +13,7 @@ jobs: runs-on: ubuntu-latest name: Find latest private action runner version env: - HUGO_ROOT: ${{ vars.HUGO_DIR && format('{0}/', vars.HUGO_DIR) || '' }} + HUGO_ROOT: 'hugo/' # TODO: Remove once HUGO_DIR repo var is set; replace with ${{ vars.HUGO_DIR && format('{0}/', vars.HUGO_DIR) || '' }} steps: - uses: DataDog/dd-octo-sts-action@acaa02eee7e3bb0839e4272dacb37b8f3b58ba80 # v1.0.3 id: octo-sts diff --git a/.github/workflows/bump_synthetics_worker_version.yml b/.github/workflows/bump_synthetics_worker_version.yml index a8fbf465007..848926b2821 100644 --- a/.github/workflows/bump_synthetics_worker_version.yml +++ b/.github/workflows/bump_synthetics_worker_version.yml @@ -13,7 +13,7 @@ jobs: runs-on: ubuntu-latest name: Find latest synthetics-worker version env: - HUGO_ROOT: ${{ vars.HUGO_DIR && format('{0}/', vars.HUGO_DIR) || '' }} + HUGO_ROOT: 'hugo/' # TODO: Remove once HUGO_DIR repo var is set; replace with ${{ vars.HUGO_DIR && format('{0}/', vars.HUGO_DIR) || '' }} steps: - uses: DataDog/dd-octo-sts-action@acaa02eee7e3bb0839e4272dacb37b8f3b58ba80 # v1.0.3 id: octo-sts diff --git a/.github/workflows/bump_versions.yml b/.github/workflows/bump_versions.yml index fb3530eca67..2e04d8a662d 100644 --- a/.github/workflows/bump_versions.yml +++ b/.github/workflows/bump_versions.yml @@ -12,7 +12,7 @@ jobs: runs-on: ubuntu-latest env: - HUGO_ROOT: ${{ vars.HUGO_DIR && format('{0}/', vars.HUGO_DIR) || '' }} + HUGO_ROOT: 'hugo/' # TODO: Remove once HUGO_DIR repo var is set; replace with ${{ vars.HUGO_DIR && format('{0}/', vars.HUGO_DIR) || '' }} steps: - uses: DataDog/dd-octo-sts-action@acaa02eee7e3bb0839e4272dacb37b8f3b58ba80 # v1.0.3 id: octo-sts diff --git a/.github/workflows/preview_link.yml b/.github/workflows/preview_link.yml index 6755f0677e9..17f31f4999c 100644 --- a/.github/workflows/preview_link.yml +++ b/.github/workflows/preview_link.yml @@ -19,7 +19,7 @@ jobs: if: contains(github.head_ref, '/') runs-on: ubuntu-latest env: - HUGO_ROOT: ${{ vars.HUGO_DIR && format('{0}/', vars.HUGO_DIR) || '' }} + HUGO_ROOT: 'hugo/' # TODO: Remove once HUGO_DIR repo var is set; replace with ${{ vars.HUGO_DIR && format('{0}/', vars.HUGO_DIR) || '' }} steps: - name: Checkout uses: actions/checkout@08eba0b27e820071cde6df949e0beb9ba4906955 # v4.3.0 diff --git a/.github/workflows/site_region_check.yml b/.github/workflows/site_region_check.yml index 68012e7dda0..285a4423dfb 100644 --- a/.github/workflows/site_region_check.yml +++ b/.github/workflows/site_region_check.yml @@ -19,7 +19,7 @@ jobs: if: github.head_ref != 'guacbot/translation-pipeline' runs-on: ubuntu-latest env: - HUGO_ROOT: ${{ vars.HUGO_DIR && format('{0}/', vars.HUGO_DIR) || '' }} + HUGO_ROOT: 'hugo/' # TODO: Remove once HUGO_DIR repo var is set; replace with ${{ vars.HUGO_DIR && format('{0}/', vars.HUGO_DIR) || '' }} steps: - name: Checkout code uses: actions/checkout@08eba0b27e820071cde6df949e0beb9ba4906955 # v4.3.0 diff --git a/.github/workflows/vale_linter.yml b/.github/workflows/vale_linter.yml index bb2b5368ec3..4183d403982 100644 --- a/.github/workflows/vale_linter.yml +++ b/.github/workflows/vale_linter.yml @@ -21,7 +21,7 @@ jobs: runs-on: ubuntu-latest timeout-minutes: 5 env: - HUGO_ROOT: ${{ vars.HUGO_DIR && format('{0}/', vars.HUGO_DIR) || '' }} + HUGO_ROOT: 'hugo/' # TODO: Remove once HUGO_DIR repo var is set; replace with ${{ vars.HUGO_DIR && format('{0}/', vars.HUGO_DIR) || '' }} steps: - uses: actions/checkout@08eba0b27e820071cde6df949e0beb9ba4906955 # v4.3.0 with: diff --git a/.github/workflows/version_getter_shared.yml b/.github/workflows/version_getter_shared.yml index 031eed0842d..907b6735d59 100644 --- a/.github/workflows/version_getter_shared.yml +++ b/.github/workflows/version_getter_shared.yml @@ -21,7 +21,7 @@ jobs: runs-on: ubuntu-latest name: Find latest version env: - HUGO_ROOT: ${{ vars.HUGO_DIR && format('{0}/', vars.HUGO_DIR) || '' }} + HUGO_ROOT: 'hugo/' # TODO: Remove once HUGO_DIR repo var is set; replace with ${{ vars.HUGO_DIR && format('{0}/', vars.HUGO_DIR) || '' }} steps: - uses: DataDog/dd-octo-sts-action@acaa02eee7e3bb0839e4272dacb37b8f3b58ba80 # v1.0.3 id: octo-sts