diff --git a/.github/workflows/app-gitops-guardrails.yml b/.github/workflows/app-gitops-guardrails.yml index c079b09..c7ed6fc 100644 --- a/.github/workflows/app-gitops-guardrails.yml +++ b/.github/workflows/app-gitops-guardrails.yml @@ -17,14 +17,18 @@ jobs: - name: Checkout uses: actions/checkout@v4 + - name: Setup Go for policy toolchain + uses: actions/setup-go@v5 + with: + go-version: '1.22' + - name: Validate Kubernetes manifests with kubeconform shell: bash run: | set -euo pipefail - curl -sSL -o kubeconform.tar.gz \ - https://github.com/yannh/kubeconform/releases/latest/download/kubeconform-linux-amd64.tar.gz - tar -xzf kubeconform.tar.gz kubeconform + go install github.com/yannh/kubeconform/cmd/kubeconform@v0.6.7 + export PATH="$PATH:$(go env GOPATH)/bin" mapfile -t manifest_files < <(find applications/gitops/base -type f \( -name '*.yaml' -o -name '*.yml' \) | sort) @@ -33,8 +37,21 @@ jobs: exit 1 fi - ./kubeconform -strict -summary "${manifest_files[@]}" + kubeconform -strict -summary "${manifest_files[@]}" - - name: Policy test placeholder (OPA/Kyverno) + - name: Policy checks with Conftest + shell: bash run: | - echo "Run conftest / kyverno CLI checks here" + set -euo pipefail + + go install github.com/open-policy-agent/conftest@v0.57.0 + export PATH="$PATH:$(go env GOPATH)/bin" + + mapfile -t manifest_files < <(find applications/gitops/base -type f \( -name '*.yaml' -o -name '*.yml' \) | sort) + + if [ "${#manifest_files[@]}" -eq 0 ]; then + echo "No Kubernetes manifests found in applications/gitops/base" + exit 1 + fi + + conftest test "${manifest_files[@]}" -p applications/policy diff --git a/Makefile b/Makefile index d25651f..f3d5e11 100644 --- a/Makefile +++ b/Makefile @@ -3,7 +3,7 @@ ENV ?= dev SERVICE ?= sample-service TAG ?= latest -.PHONY: help build test synth platform-check platform-plan platform-apply app-bootstrap app-deploy platform-progress +.PHONY: help build test synth platform-check platform-plan platform-apply app-bootstrap app-deploy app-policy-test platform-progress help: @echo "make build # Build TypeScript" @@ -14,6 +14,7 @@ help: @echo "make platform-apply ENV=dev # Apply platform changes" @echo "make app-bootstrap SERVICE=name # Bootstrap app from template" @echo "make app-deploy ENV=dev SERVICE=name TAG=v1.0.0" + @echo "make app-policy-test # Run local policy bundle checks" @echo "make platform-progress # Show platform-as-product progress tracker" build: @@ -44,5 +45,9 @@ app-deploy: @echo "[app-deploy] ENV=$(ENV) SERVICE=$(SERVICE) TAG=$(TAG)" @echo "Update GitOps manifest tag and let Argo CD reconcile" +app-policy-test: + @echo "[app-policy-test] run conftest against applications/gitops/base with applications/policy" + conftest test applications/gitops/base/*.yaml -p applications/policy + platform-progress: @cat docs/platform-product-progress.md diff --git a/README.md b/README.md index 721be44..ab2fe6d 100644 --- a/README.md +++ b/README.md @@ -18,6 +18,7 @@ It now provides opinionated architecture, repository layout, templates, and deli - Backstage software template example for self-service service creation - CI pipeline for platform IaC quality gates (fmt/validate/lint/security) - GitOps-oriented app delivery guardrails +- OPA/Conftest policy bundle for Kubernetes deployment security checks - Day-2 DX helpers via `Makefile` ## Repository structure @@ -79,6 +80,7 @@ Review feedback and implemented fixes are tracked in: Track implementation maturity and next milestones in: - `docs/platform-product-progress.md` +- `docs/platform-product-operating-model.md` ## Quick commands @@ -89,6 +91,7 @@ make platform-plan ENV=dev make platform-apply ENV=dev make app-bootstrap SERVICE=my-api make app-deploy ENV=dev SERVICE=my-api TAG=v1.2.3 +make app-policy-test ``` ## Notes diff --git a/applications/gitops/base/sample-service.yaml b/applications/gitops/base/sample-service.yaml index 5e2c97d..dc45598 100644 --- a/applications/gitops/base/sample-service.yaml +++ b/applications/gitops/base/sample-service.yaml @@ -24,7 +24,7 @@ spec: spec: containers: - name: app - image: nginx:1.27 + image: nginx:1.27.0 ports: - containerPort: 80 resources: @@ -37,6 +37,7 @@ spec: securityContext: allowPrivilegeEscalation: false readOnlyRootFilesystem: true + runAsNonRoot: true --- apiVersion: v1 kind: Service diff --git a/applications/policy/README.md b/applications/policy/README.md new file mode 100644 index 0000000..7fef53d --- /dev/null +++ b/applications/policy/README.md @@ -0,0 +1,17 @@ +# Application GitOps Policy Bundle + +This directory contains OPA/Rego policies evaluated in CI with `conftest`. + +## Scope + +Policies currently validate Kubernetes manifests in `applications/gitops/base` and enforce: + +- non-`latest` immutable image tags +- CPU/memory requests and limits +- secure container defaults (`runAsNonRoot`, `allowPrivilegeEscalation: false`) + +## Local validation + +```bash +conftest test applications/gitops/base/*.yaml -p applications/policy +``` diff --git a/applications/policy/deployment-security.rego b/applications/policy/deployment-security.rego new file mode 100644 index 0000000..6457604 --- /dev/null +++ b/applications/policy/deployment-security.rego @@ -0,0 +1,52 @@ +package main + +import rego.v1 + +deny contains msg if { + input.kind == "Deployment" + container := input.spec.template.spec.containers[_] + not container.securityContext.runAsNonRoot + msg := sprintf("deployment %q container %q must set securityContext.runAsNonRoot=true", [input.metadata.name, container.name]) +} + +deny contains msg if { + input.kind == "Deployment" + container := input.spec.template.spec.containers[_] + not container.securityContext.allowPrivilegeEscalation == false + msg := sprintf("deployment %q container %q must set securityContext.allowPrivilegeEscalation=false", [input.metadata.name, container.name]) +} + +deny contains msg if { + input.kind == "Deployment" + container := input.spec.template.spec.containers[_] + not container.resources.requests.cpu + msg := sprintf("deployment %q container %q must define resources.requests.cpu", [input.metadata.name, container.name]) +} + +deny contains msg if { + input.kind == "Deployment" + container := input.spec.template.spec.containers[_] + not container.resources.requests.memory + msg := sprintf("deployment %q container %q must define resources.requests.memory", [input.metadata.name, container.name]) +} + +deny contains msg if { + input.kind == "Deployment" + container := input.spec.template.spec.containers[_] + not container.resources.limits.cpu + msg := sprintf("deployment %q container %q must define resources.limits.cpu", [input.metadata.name, container.name]) +} + +deny contains msg if { + input.kind == "Deployment" + container := input.spec.template.spec.containers[_] + not container.resources.limits.memory + msg := sprintf("deployment %q container %q must define resources.limits.memory", [input.metadata.name, container.name]) +} + +deny contains msg if { + input.kind == "Deployment" + container := input.spec.template.spec.containers[_] + endswith(container.image, ":latest") + msg := sprintf("deployment %q container %q must not use mutable image tags like :latest", [input.metadata.name, container.name]) +} diff --git a/docs/platform-product-operating-model.md b/docs/platform-product-operating-model.md new file mode 100644 index 0000000..fc3a41d --- /dev/null +++ b/docs/platform-product-operating-model.md @@ -0,0 +1,95 @@ +# Platform as a Product Operating Model + +_Last updated: 2026-03-26_ + +This guide defines how the platform is managed as an internal product, not only as infrastructure. + +## Product mission + +Enable product teams to ship secure, observable services to production quickly by providing paved roads with self-service workflows and built-in guardrails. + +## Product users and ownership + +| Area | Owner | Responsibility | +|---|---|---| +| Platform product strategy | Platform Product Manager | Roadmap, prioritization, adoption, stakeholder communication | +| Runtime and infrastructure | Platform Engineering | EKS, networking, compute, shared services | +| Security guardrails | Security + Platform | Policy bundles, IAM patterns, vulnerability controls | +| Developer portal and templates | Platform Developer Experience | Backstage catalog, templates, golden paths | +| Service onboarding consumers | Product Engineering Teams | Build services using templates and follow platform contracts | + +## Product capabilities + +The platform product is organized into capabilities with explicit contracts: + +1. **Service scaffolding** + - Backstage template-driven repository bootstrap + - Standardized service metadata and ownership tags +2. **Delivery orchestration** + - CI checks for manifest validation and policy enforcement + - GitOps reconciliation through Argo CD +3. **Runtime baseline** + - EKS runtime and namespace conventions + - Network, compute, and secret management patterns +4. **Security and policy** + - OPA/Conftest policy checks in PR workflow + - Secure defaults for workload manifests +5. **Observability and reliability** + - Metrics, logs, traces and SLO conventions + - Alerting integration and runbook expectations + +## Platform contracts (golden path) + +Every onboarded service is expected to provide: + +- a catalog entry with service owner and tier +- deployable GitOps manifests for `dev`, `stage`, and `prod` +- CPU/memory requests and limits on workload containers +- non-root runtime and no privilege escalation +- immutable image references (no `:latest`) +- minimum observability signals (health, metrics, logs) + +## Intake and prioritization workflow + +1. Teams submit platform requests through backlog intake. +2. Requests are triaged weekly by Platform PM + lead engineer. +3. Prioritization uses impact, adoption, risk reduction, and effort. +4. Decisions are published in roadmap updates. +5. Completed features include migration docs and rollout notes. + +## Release and change management + +- **Cadence**: bi-weekly platform release train. +- **Change types**: + - additive (non-breaking): immediate release + - behavioral (potentially breaking): release note + deprecation window +- **Versioning approach**: + - templates and policy bundles use semantic tags + - breaking policy changes require staged enforcement (warn -> block) + +## Adoption metrics + +Track platform outcomes as product KPIs: + +- lead time to first deployment +- percentage of services onboarded via template +- PR policy compliance pass rate +- failed deployment rollback rate +- developer satisfaction (quarterly pulse) + +## Operating rituals + +- Weekly platform triage and incident review +- Bi-weekly roadmap/demo for stakeholders +- Monthly policy and compliance review with security +- Quarterly platform maturity review against success metrics + +## Documentation standards + +For every new platform capability, include: + +- capability description and user story +- onboarding instructions +- operational runbook and escalation path +- rollback/deprecation guidance +- success metric and owner diff --git a/docs/platform-product-progress.md b/docs/platform-product-progress.md index 5d8c304..5e3ea34 100644 --- a/docs/platform-product-progress.md +++ b/docs/platform-product-progress.md @@ -1,6 +1,6 @@ # Platform as a Product Progress Tracker -_Last updated: 2026-03-24_ +_Last updated: 2026-03-26_ ## Delivery status snapshot @@ -12,7 +12,7 @@ _Last updated: 2026-03-24_ | App GitOps guardrails | ✅ Complete | 100% | kubeconform validation enabled and fail-fast behavior enforced. | | Secure-by-default CDK sample hardening | ✅ Complete | 100% | KMS, VPC, DLQ, IAM auth, caching, encrypted logs implemented. | | Environment overlays (dev/stage/prod) | 🟡 In Progress | 40% | Structure exists; env-specific manifests and policy sets pending. | -| Policy-as-code enforcement (OPA/Kyverno) | 🟡 In Progress | 30% | Placeholder step exists; enforceable policy bundles pending. | +| Policy-as-code enforcement (OPA/Kyverno) | 🟡 In Progress | 60% | Conftest policy bundle and CI enforcement added for deployment security/image/resource guardrails. | | Observability productization | 🟡 In Progress | 35% | Architecture defined; Prometheus/Grafana/Loki/OTel deployments pending. | | EKS + Argo CD platform runtime | ⏳ Planned | 20% | Target model documented; implementation modules still to be added. | | Backstage portal deployment | ⏳ Planned | 15% | Template exists; portal deployment and catalog automation pending. | @@ -29,7 +29,7 @@ _Last updated: 2026-03-24_ 1. Implement EKS runtime module under `platform/modules/eks` and bootstrap cluster add-ons. 2. Stand up Argo CD in `platform/services/argocd` with app-of-apps model. -3. Add policy bundles and CI checks (`conftest` and/or `kyverno apply`) in `app-gitops-guardrails`. +3. Expand policy bundle coverage beyond Deployment controls (Ingress, NetworkPolicy, PodDisruptionBudget). 4. Add observability baseline (Prometheus, Grafana, Loki, OpenTelemetry Collector). 5. Expand service repo structure with CI, Dockerfile, Helm chart, and SLO/runbook assets.