diff --git a/.github/workflows/ci.yaml b/.github/workflows/ci.yaml index 5449837ae..684cd3a2c 100644 --- a/.github/workflows/ci.yaml +++ b/.github/workflows/ci.yaml @@ -60,3 +60,20 @@ jobs: --platform linux/amd64,linux/arm64 \ --provenance=false \ . + + e2e-kind: + name: e2e-kind + runs-on: ubuntu-latest + steps: + - uses: actions/checkout@de0fac2e4500dabe0009e67214ff5f5447ce83dd # v6.0.2 + # kubectl and docker are preinstalled on ubuntu-latest; pin kind and helm via + # direct downloads to keep the toolchain reproducible without extra actions. + - name: Install kind and helm + run: | + curl -fsSLo ./kind https://kind.sigs.k8s.io/dl/v0.27.0/kind-linux-amd64 + chmod +x ./kind && sudo mv ./kind /usr/local/bin/kind + curl -fsSL https://get.helm.sh/helm-v3.17.1-linux-amd64.tar.gz | tar -xz + sudo mv linux-amd64/helm /usr/local/bin/helm + # Builds the image, loads it into a kind consumer cluster, helm-installs the + # konnector, and asserts the full bind/sync flow against a kind provider. + - run: make test-e2e-kind diff --git a/Makefile b/Makefile index db11856aa..5dd03417a 100644 --- a/Makefile +++ b/Makefile @@ -71,6 +71,13 @@ test-e2e: KUBEBUILDER_ASSETS="$$($(SETUP_ENVTEST) use $(ENVTEST_K8S_VERSION) -p path)" \ go test ./test/e2e/... -count=1 -timeout 600s +# Full kind-based e2e: build the image, load it into kind, helm-install the +# konnector, and assert the bind/sync flow end to end. Pass KEEP=1 to leave the +# clusters running, NO_BUILD=1 to reuse an already-loaded image. +.PHONY: test-e2e-kind +test-e2e-kind: + IMAGE=$(IMAGE) ./hack/e2e.sh + .PHONY: vet vet: go vet ./... diff --git a/hack/demo.sh b/hack/demo.sh index 15b8ad9a4..354af5a25 100755 --- a/hack/demo.sh +++ b/hack/demo.sh @@ -23,7 +23,7 @@ PROVIDER=kbind-provider CONSUMER=kbind-consumer HERE="$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)" V2="$(cd "${HERE}/.." && pwd)" -SAMPLES="${V2}/konnector/config/samples" +SAMPLES="${V2}/config/samples" PROVIDER_KC=/tmp/${PROVIDER}.kubeconfig CONSUMER_KC=/tmp/${CONSUMER}.kubeconfig @@ -59,7 +59,7 @@ cat <${RESET} $*"; } +pass() { echo "${GREEN} ✓${RESET} $*"; } +fail() { echo "${RED} ✗ $*${RESET}" >&2; exit 1; } + +# retry — succeeds as soon as does. +retry() { + local attempts=$1 delay=$2; shift 2 + local i + for ((i = 1; i <= attempts; i++)); do + if "$@"; then return 0; fi + sleep "${delay}" + done + return 1 +} + +cleanup() { + rm -f "${PROVIDER_KC}" "${CONSUMER_KC}" "${PROVIDER_INTERNAL_KC}" + if [[ "${KEEP}" == "1" ]]; then + cat </dev/null 2>&1 || true + kind delete cluster --name "${CONSUMER}" >/dev/null 2>&1 || true +} +trap cleanup EXIT + +# ---------------------------------------------------------------------------- +info "Creating kind clusters (${PROVIDER}, ${CONSUMER})" +kind create cluster --name "${PROVIDER}" >/dev/null 2>&1 || true +kind create cluster --name "${CONSUMER}" >/dev/null 2>&1 || true +kind get kubeconfig --name "${PROVIDER}" > "${PROVIDER_KC}" +kind get kubeconfig --name "${CONSUMER}" > "${CONSUMER_KC}" + +# A kind cluster can report "created" before its API server is reachable, and a +# memory-starved docker host can leave a node NotReady. Gate on real readiness +# so later steps fail loudly here instead of with a cryptic "connection refused". +info "Waiting for both clusters' API servers and nodes to be Ready" +retry "${TIMEOUT}" 2 kp get --raw=/readyz >/dev/null 2>&1 \ + || fail "provider API server never became ready (check 'docker ps' — the kind node may have been OOM-killed)" +retry "${TIMEOUT}" 2 kc get --raw=/readyz >/dev/null 2>&1 \ + || fail "consumer API server never became ready (check 'docker ps' — the kind node may have been OOM-killed)" +kp wait --for=condition=Ready nodes --all --timeout="${TIMEOUT}s" >/dev/null +kc wait --for=condition=Ready nodes --all --timeout="${TIMEOUT}s" >/dev/null +pass "both clusters are reachable and Ready" + +# The konnector runs as a pod in the consumer cluster, so it cannot reach the +# provider via the host-mapped 127.0.0.1 port. Rewrite the server to the +# provider node's IP on the shared docker "kind" network; the kind API server +# cert includes that internal IP as a SAN, so TLS still verifies. +PROVIDER_IP="$(docker inspect -f '{{(index .NetworkSettings.Networks "kind").IPAddress}}' "${PROVIDER}-control-plane")" +[[ -n "${PROVIDER_IP}" ]] || fail "could not determine provider node IP on the kind network" +sed -E "s#server: https://127\.0\.0\.1:[0-9]+#server: https://${PROVIDER_IP}:6443#" \ + "${PROVIDER_KC}" > "${PROVIDER_INTERNAL_KC}" + +# ---------------------------------------------------------------------------- +if [[ "${NO_BUILD}" == "1" ]]; then + info "Skipping image build (NO_BUILD=1); loading ${IMAGE} into ${CONSUMER}" +else + info "Building konnector image ${IMAGE}" + docker build -t "${IMAGE}" "${ROOT}" +fi +info "Loading ${IMAGE} into the consumer cluster" +kind load docker-image "${IMAGE}" --name "${CONSUMER}" + +# ---------------------------------------------------------------------------- +info "Provider: installing the exported Widget CRD" +kp apply -f "${SAMPLES}/provider-widget-crd.yaml" + +# ---------------------------------------------------------------------------- +info "Consumer: helm install the konnector (chart bundles the core CRDs)" +kc create namespace "${NAMESPACE}" --dry-run=client -o yaml | kc apply -f - +helm --kubeconfig "${CONSUMER_KC}" upgrade --install "${RELEASE}" "${CHART}" \ + --namespace "${NAMESPACE}" \ + --set image.repository="${IMAGE_REPO}" \ + --set image.tag="${IMAGE_TAG}" \ + --set image.pullPolicy=IfNotPresent \ + --wait --timeout "${TIMEOUT}s" + +DEPLOY="$(kc -n "${NAMESPACE}" get deploy -l app.kubernetes.io/instance="${RELEASE}" -o name | head -n1)" +[[ -n "${DEPLOY}" ]] || fail "konnector deployment not found" +kc -n "${NAMESPACE}" rollout status "${DEPLOY}" --timeout="${TIMEOUT}s" +pass "konnector deployment is available" + +# ---------------------------------------------------------------------------- +info "Consumer: store the provider kubeconfig and apply the bundle" +kc -n "${NAMESPACE}" delete secret demo-provider-kubeconfig --ignore-not-found >/dev/null +kc -n "${NAMESPACE}" create secret generic demo-provider-kubeconfig \ + --from-file=kubeconfig="${PROVIDER_INTERNAL_KC}" +kc apply -f "${SAMPLES}/binding.yaml" + +# ---------------------------------------------------------------------------- +info "Asserting the bind flow" +kc wait --for=condition=Ready connection/demo-provider --timeout="${TIMEOUT}s" \ + || fail "Connection did not become Ready" +pass "Connection demo-provider is Ready" + +retry "${TIMEOUT}" 1 kc get crd widgets.example.org >/dev/null 2>&1 \ + || fail "Widget CRD was not pulled onto the consumer" +kc wait --for=condition=Established crd/widgets.example.org --timeout="${TIMEOUT}s" >/dev/null +pass "Widget CRD pulled onto the consumer and Established" + +kc wait --for=condition=Synced clusterbinding/widgets --timeout="${TIMEOUT}s" \ + || fail "ClusterBinding did not become Synced" +pass "ClusterBinding widgets is Synced" + +# ---------------------------------------------------------------------------- +info "Asserting spec sync UP (consumer -> provider)" +kc apply -f "${SAMPLES}/widget.yaml" +retry "${TIMEOUT}" 1 kp -n default get widget my-widget >/dev/null 2>&1 \ + || fail "Widget did not sync up to the provider" +SIZE="$(kp -n default get widget my-widget -o jsonpath='{.spec.size}')" +[[ "${SIZE}" == "large" ]] || fail "Widget synced up but spec.size=${SIZE:-} (want large)" +pass "Widget synced up to the provider (spec.size=large)" + +# ---------------------------------------------------------------------------- +info "Asserting status sync DOWN (provider -> consumer)" +kp -n default patch widget my-widget --subresource=status --type=merge \ + -p '{"status":{"phase":"Running"}}' +phase_is_running() { + [[ "$(kc -n default get widget my-widget -o jsonpath='{.status.phase}' 2>/dev/null)" == "Running" ]] +} +retry "${TIMEOUT}" 1 phase_is_running \ + || fail "provider status did not flow down to the consumer" +pass "status flowed down to the consumer (status.phase=Running)" + +echo +echo "${GREEN}${BOLD}E2E PASSED${RESET}"