From 7ca71740bb65ba02d3fe94f1ed26cc0275bff0e4 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Christoph=20St=C3=A4bler?= Date: Wed, 15 Apr 2026 15:37:32 +0200 Subject: [PATCH 01/24] Run e2e tests in parallel --- Makefile | 2 +- test/e2e/func_middleware_update_test.go | 3 ++- 2 files changed, 3 insertions(+), 2 deletions(-) diff --git a/Makefile b/Makefile index 1c987eb..da1f3d0 100644 --- a/Makefile +++ b/Makefile @@ -120,7 +120,7 @@ test: manifests generate fmt vet setup-envtest ## Run tests. .PHONY: test-e2e ## Run e2e tests. test-e2e: - go test -timeout 1h ./test/e2e/ -v -ginkgo.v -ginkgo.timeout=1h -ginkgo.label-filter="!bundle" + go test -timeout 1h ./test/e2e/ -v -ginkgo.v -ginkgo.timeout=1h -ginkgo.label-filter="!bundle" -ginkgo.p .PHONY: test-e2e-bundle ## Run bundle e2e tests. test-e2e-bundle: operator-sdk docker-build docker-push bundle bundle-build bundle-push install-olm-in-cluster diff --git a/test/e2e/func_middleware_update_test.go b/test/e2e/func_middleware_update_test.go index ce89e9d..4d07fa3 100644 --- a/test/e2e/func_middleware_update_test.go +++ b/test/e2e/func_middleware_update_test.go @@ -248,7 +248,8 @@ var _ = Describe("Middleware Update", func() { }) }) - Context("when ConfigMap autoUpdateMiddleware setting changes", func() { + // this context should not run in parallel (--> Serial), as this would interfere other tests + Context("when ConfigMap autoUpdateMiddleware setting changes", Serial /*don't run in parallel*/, func() { const ( operatorNamespace = "func-operator-system" controllerConfigName = "func-operator-controller-config" From ff17aa411c5aece5ab5d976c60cd91eb8a2db2b5 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Christoph=20St=C3=A4bler?= Date: Wed, 15 Apr 2026 15:41:32 +0200 Subject: [PATCH 02/24] Cleanup only on success --- test/e2e/func_deploy_test.go | 42 ++++++++++++------------- test/e2e/func_middleware_update_test.go | 24 +++++++------- test/utils/utils.go | 14 +++++++++ 3 files changed, 47 insertions(+), 33 deletions(-) diff --git a/test/e2e/func_deploy_test.go b/test/e2e/func_deploy_test.go index 95ab51d..7c6ef39 100644 --- a/test/e2e/func_deploy_test.go +++ b/test/e2e/func_deploy_test.go @@ -138,20 +138,20 @@ var _ = Describe("Operator", func() { // Create repository provider resources with automatic cleanup username, password, _, cleanup, err := repoProvider.CreateRandomUser() Expect(err).NotTo(HaveOccurred()) - DeferCleanup(cleanup) + utils.DeferCleanupOnSuccess(cleanup) _, repoURL, cleanup, err = repoProvider.CreateRandomRepo(username, false) Expect(err).NotTo(HaveOccurred()) - DeferCleanup(cleanup) + utils.DeferCleanupOnSuccess(cleanup) // Initialize repository with function code repoDir, err = utils.InitializeRepoWithFunction(repoURL, username, password, "go") Expect(err).NotTo(HaveOccurred()) - DeferCleanup(os.RemoveAll, repoDir) + utils.DeferCleanupOnSuccess(os.RemoveAll, repoDir) functionNamespace, err = utils.GetTestNamespace() Expect(err).NotTo(HaveOccurred()) - DeferCleanup(cleanupNamespaces, functionNamespace) + utils.DeferCleanupOnSuccess(cleanupNamespaces, functionNamespace) // Deploy function using func CLI out, err := utils.RunFuncDeploy(repoDir, utils.WithNamespace(functionNamespace)) @@ -159,7 +159,7 @@ var _ = Describe("Operator", func() { _, _ = fmt.Fprint(GinkgoWriter, out) // Cleanup func deployment - DeferCleanup(func() { + utils.DeferCleanupOnSuccess(func() { _, _ = utils.RunFunc("delete", "--path", repoDir, "--namespace", functionNamespace) }) @@ -212,11 +212,11 @@ var _ = Describe("Operator", func() { // Create repository provider resources with automatic cleanup username, password, _, cleanup, err := repoProvider.CreateRandomUser() Expect(err).NotTo(HaveOccurred()) - DeferCleanup(cleanup) + utils.DeferCleanupOnSuccess(cleanup) _, repoURL, cleanup, err = repoProvider.CreateRandomRepo(username, false) Expect(err).NotTo(HaveOccurred()) - DeferCleanup(cleanup) + utils.DeferCleanupOnSuccess(cleanup) // Initialize repository with function code repoDir, err = utils.InitializeRepoWithFunction( @@ -226,11 +226,11 @@ var _ = Describe("Operator", func() { "go", utils.WithSubDir(subPath)) Expect(err).NotTo(HaveOccurred()) - DeferCleanup(os.RemoveAll, repoDir) + utils.DeferCleanupOnSuccess(os.RemoveAll, repoDir) functionNamespace, err = utils.GetTestNamespace() Expect(err).NotTo(HaveOccurred()) - DeferCleanup(cleanupNamespaces, functionNamespace) + utils.DeferCleanupOnSuccess(cleanupNamespaces, functionNamespace) functionDir := filepath.Join(repoDir, subPath) @@ -240,7 +240,7 @@ var _ = Describe("Operator", func() { _, _ = fmt.Fprint(GinkgoWriter, out) // Cleanup func deployment - DeferCleanup(func() { + utils.DeferCleanupOnSuccess(func() { _, _ = utils.RunFunc("delete", "--path", functionDir, "--namespace", functionNamespace) }) @@ -295,20 +295,20 @@ var _ = Describe("Operator", func() { // Create repository with function code but don't deploy username, password, _, cleanup, err := repoProvider.CreateRandomUser() Expect(err).NotTo(HaveOccurred()) - DeferCleanup(cleanup) + utils.DeferCleanupOnSuccess(cleanup) _, repoURL, cleanup, err = repoProvider.CreateRandomRepo(username, false) Expect(err).NotTo(HaveOccurred()) - DeferCleanup(cleanup) + utils.DeferCleanupOnSuccess(cleanup) // Initialize repository with function code repoDir, err = utils.InitializeRepoWithFunction(repoURL, username, password, "go") Expect(err).NotTo(HaveOccurred()) - DeferCleanup(os.RemoveAll, repoDir) + utils.DeferCleanupOnSuccess(os.RemoveAll, repoDir) functionNamespace, err = utils.GetTestNamespace() Expect(err).NotTo(HaveOccurred()) - DeferCleanup(cleanupNamespaces, functionNamespace) + utils.DeferCleanupOnSuccess(cleanupNamespaces, functionNamespace) }) AfterEach(func() { @@ -355,11 +355,11 @@ var _ = Describe("Operator", func() { username, password, _, cleanup, err = repoProvider.CreateRandomUser() Expect(err).NotTo(HaveOccurred()) - DeferCleanup(cleanup) + utils.DeferCleanupOnSuccess(cleanup) _, repoURL, cleanup, err = repoProvider.CreateRandomRepo(username, true) // private repo Expect(err).NotTo(HaveOccurred()) - DeferCleanup(cleanup) + utils.DeferCleanupOnSuccess(cleanup) // Create access token for the user token, err = repoProvider.CreateAccessToken(username, password, "e2e-token") @@ -368,11 +368,11 @@ var _ = Describe("Operator", func() { // Initialize repository with function code repoDir, err = utils.InitializeRepoWithFunction(repoURL, username, password, "go") Expect(err).NotTo(HaveOccurred()) - DeferCleanup(os.RemoveAll, repoDir) + utils.DeferCleanupOnSuccess(os.RemoveAll, repoDir) functionNamespace, err = utils.GetTestNamespace() Expect(err).NotTo(HaveOccurred()) - DeferCleanup(cleanupNamespaces, functionNamespace) + utils.DeferCleanupOnSuccess(cleanupNamespaces, functionNamespace) // Deploy function using func CLI out, err := utils.RunFuncDeploy(repoDir, utils.WithNamespace(functionNamespace)) @@ -380,7 +380,7 @@ var _ = Describe("Operator", func() { _, _ = fmt.Fprint(GinkgoWriter, out) // Cleanup func deployment - DeferCleanup(func() { + utils.DeferCleanupOnSuccess(func() { _, _ = utils.RunFunc("delete", "--path", repoDir, "--namespace", functionNamespace) }) @@ -414,7 +414,7 @@ var _ = Describe("Operator", func() { } err := k8sClient.Create(ctx, secret) Expect(err).NotTo(HaveOccurred()) - DeferCleanup(func() { + utils.DeferCleanupOnSuccess(func() { _ = k8sClient.Delete(ctx, secret) }) @@ -481,7 +481,7 @@ var _ = Describe("Operator", func() { } err := k8sClient.Create(ctx, secret) Expect(err).NotTo(HaveOccurred()) - DeferCleanup(func() { + utils.DeferCleanupOnSuccess(func() { _ = k8sClient.Delete(ctx, secret) }) diff --git a/test/e2e/func_middleware_update_test.go b/test/e2e/func_middleware_update_test.go index 4d07fa3..dc3f1bc 100644 --- a/test/e2e/func_middleware_update_test.go +++ b/test/e2e/func_middleware_update_test.go @@ -59,15 +59,15 @@ var _ = Describe("Middleware Update", func() { // Create repository provider resources with automatic cleanup username, password, _, cleanup, err := repoProvider.CreateRandomUser() Expect(err).NotTo(HaveOccurred()) - DeferCleanup(cleanup) + utils.DeferCleanupOnSuccess(cleanup) _, repoURL, cleanup, err = repoProvider.CreateRandomRepo(username, false) Expect(err).NotTo(HaveOccurred()) - DeferCleanup(cleanup) + utils.DeferCleanupOnSuccess(cleanup) functionNamespace, err = utils.GetTestNamespace() Expect(err).NotTo(HaveOccurred()) - DeferCleanup(cleanupNamespaces, functionNamespace) + utils.DeferCleanupOnSuccess(cleanupNamespaces, functionNamespace) // Initialize repository with function code using OLD func CLI version // v1.20.2 has no middleware-version label and uses instance-compatible templates @@ -79,7 +79,7 @@ var _ = Describe("Middleware Update", func() { "go", utils.WithCliVersion(oldFuncVersion)) Expect(err).NotTo(HaveOccurred()) - DeferCleanup(os.RemoveAll, repoDir) + utils.DeferCleanupOnSuccess(os.RemoveAll, repoDir) // Deploy function using the same OLD func CLI version out, err := utils.RunFuncDeploy(repoDir, @@ -89,7 +89,7 @@ var _ = Describe("Middleware Update", func() { _, _ = fmt.Fprint(GinkgoWriter, out) // Cleanup func deployment - DeferCleanup(func() { + utils.DeferCleanupOnSuccess(func() { _, _ = utils.RunFunc("delete", "--path", repoDir, "--namespace", functionNamespace) }) @@ -171,7 +171,7 @@ var _ = Describe("Middleware Update", func() { err = json.Unmarshal([]byte(skopeoOutput), &initialImageLabels) Expect(err).NotTo(HaveOccurred()) - initialMiddlewareVersion := initialImageLabels.Labels["middleware-version"] + initialMiddlewareVersion := initialImageLabels.Labels[funcfn.MiddlewareVersionLabelKey] _, _ = fmt.Fprintf(GinkgoWriter, "Initial middleware-version label: '%s' (expected empty for v1.20.2)\n", initialMiddlewareVersion) @@ -238,7 +238,7 @@ var _ = Describe("Middleware Update", func() { err = json.Unmarshal([]byte(skopeoOutput), &updatedImageLabels) Expect(err).NotTo(HaveOccurred()) - updatedMiddlewareVersion := updatedImageLabels.Labels["middleware-version"] + updatedMiddlewareVersion := updatedImageLabels.Labels[funcfn.MiddlewareVersionLabelKey] _, _ = fmt.Fprintf(GinkgoWriter, "Updated middleware-version label: '%s'\n", updatedMiddlewareVersion) // The operator should have set a middleware version @@ -298,15 +298,15 @@ var _ = Describe("Middleware Update", func() { // Create repository provider resources with automatic cleanup username, password, _, cleanup, err := repoProvider.CreateRandomUser() Expect(err).NotTo(HaveOccurred()) - DeferCleanup(cleanup) + utils.DeferCleanupOnSuccess(cleanup) _, repoURL, cleanup, err = repoProvider.CreateRandomRepo(username, false) Expect(err).NotTo(HaveOccurred()) - DeferCleanup(cleanup) + utils.DeferCleanupOnSuccess(cleanup) functionNamespace, err = utils.GetTestNamespace() Expect(err).NotTo(HaveOccurred()) - DeferCleanup(cleanupNamespaces, functionNamespace) + utils.DeferCleanupOnSuccess(cleanupNamespaces, functionNamespace) // Initialize repository with function code using OLD func CLI version // to ensure middleware will be outdated @@ -318,7 +318,7 @@ var _ = Describe("Middleware Update", func() { "go", utils.WithCliVersion(oldFuncVersion)) Expect(err).NotTo(HaveOccurred()) - DeferCleanup(os.RemoveAll, repoDir) + utils.DeferCleanupOnSuccess(os.RemoveAll, repoDir) // Deploy function using the same OLD func CLI version out, err := utils.RunFuncDeploy(repoDir, @@ -328,7 +328,7 @@ var _ = Describe("Middleware Update", func() { _, _ = fmt.Fprint(GinkgoWriter, out) // Cleanup func deployment - DeferCleanup(func() { + utils.DeferCleanupOnSuccess(func() { _, _ = utils.RunFunc("delete", "--path", repoDir, "--namespace", functionNamespace) }) diff --git a/test/utils/utils.go b/test/utils/utils.go index eb8de2c..3d27388 100644 --- a/test/utils/utils.go +++ b/test/utils/utils.go @@ -20,6 +20,7 @@ import ( "fmt" "os" "os/exec" + "reflect" "strings" . "github.com/onsi/ginkgo/v2" // nolint:revive,staticcheck @@ -81,3 +82,16 @@ func GetTestNamespace() (string, error) { return name, nil } + +func DeferCleanupOnSuccess(args ...any) { + DeferCleanup(func() { + if !CurrentSpecReport().Failed() { + fn := reflect.ValueOf(args[0]) + in := make([]reflect.Value, len(args)-1) + for i, arg := range args[1:] { + in[i] = reflect.ValueOf(arg) + } + fn.Call(in) + } + }) +} From 762d8d250250ff96eb2bc13d4b34341803a5044a Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Christoph=20St=C3=A4bler?= Date: Wed, 15 Apr 2026 15:41:49 +0200 Subject: [PATCH 03/24] Collect deployments, functions and configmaps on test failure --- .github/workflows/test-e2e.yml | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/.github/workflows/test-e2e.yml b/.github/workflows/test-e2e.yml index c19a4f1..2d639e6 100644 --- a/.github/workflows/test-e2e.yml +++ b/.github/workflows/test-e2e.yml @@ -66,7 +66,9 @@ jobs: run: | mkdir -p /tmp/k8s-artifacts kubectl logs -n func-operator-system -l control-plane=controller-manager --tail=-1 --all-containers --prefix --timestamps > /tmp/k8s-artifacts/func-operator.log - kubectl get functions -A -o yaml > /tmp/functions.yaml + for resource in functions deployments configmaps; do + kubectl get ${resource} -A -o yaml > /tmp/k8s-artifacts/${resource}.yaml + done - name: Upload Kubernetes artifacts if: failure() From 54712a432df62e3b6c25c256eab437d2911a0bb1 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Christoph=20St=C3=A4bler?= Date: Wed, 15 Apr 2026 16:20:42 +0200 Subject: [PATCH 04/24] Allow parallel execution of e2e tests --- Makefile | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/Makefile b/Makefile index da1f3d0..8614988 100644 --- a/Makefile +++ b/Makefile @@ -120,7 +120,7 @@ test: manifests generate fmt vet setup-envtest ## Run tests. .PHONY: test-e2e ## Run e2e tests. test-e2e: - go test -timeout 1h ./test/e2e/ -v -ginkgo.v -ginkgo.timeout=1h -ginkgo.label-filter="!bundle" -ginkgo.p + go test -timeout 1h ./test/e2e/ -v -ginkgo.v -ginkgo.timeout=1h -ginkgo.label-filter="!bundle" -ginkgo.procs=-1 .PHONY: test-e2e-bundle ## Run bundle e2e tests. test-e2e-bundle: operator-sdk docker-build docker-push bundle bundle-build bundle-push install-olm-in-cluster From d1f09bf7c676a633b7c363835a4b6c56ecf81455 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Christoph=20St=C3=A4bler?= Date: Wed, 15 Apr 2026 16:24:11 +0200 Subject: [PATCH 05/24] Fix comment formatting in middleware update test --- test/e2e/func_middleware_update_test.go | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/test/e2e/func_middleware_update_test.go b/test/e2e/func_middleware_update_test.go index dc3f1bc..a49820e 100644 --- a/test/e2e/func_middleware_update_test.go +++ b/test/e2e/func_middleware_update_test.go @@ -249,7 +249,7 @@ var _ = Describe("Middleware Update", func() { }) // this context should not run in parallel (--> Serial), as this would interfere other tests - Context("when ConfigMap autoUpdateMiddleware setting changes", Serial /*don't run in parallel*/, func() { + Context("when ConfigMap autoUpdateMiddleware setting changes", Serial /* don't run in parallel */, func() { const ( operatorNamespace = "func-operator-system" controllerConfigName = "func-operator-controller-config" From dad12649c125399f75d2accf1c8acf9098e73efa Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Christoph=20St=C3=A4bler?= Date: Thu, 16 Apr 2026 08:04:28 +0200 Subject: [PATCH 06/24] Use up-to-date Ginkgo in e2e tests --- Makefile | 15 +++++++++++---- 1 file changed, 11 insertions(+), 4 deletions(-) diff --git a/Makefile b/Makefile index 8614988..c62b6ab 100644 --- a/Makefile +++ b/Makefile @@ -119,12 +119,12 @@ test: manifests generate fmt vet setup-envtest ## Run tests. KUBEBUILDER_ASSETS="$(shell $(ENVTEST) use $(ENVTEST_K8S_VERSION) --bin-dir $(LOCALBIN) -p path)" go test $$(go list ./... | grep -v /e2e) -coverprofile cover.out .PHONY: test-e2e ## Run e2e tests. -test-e2e: - go test -timeout 1h ./test/e2e/ -v -ginkgo.v -ginkgo.timeout=1h -ginkgo.label-filter="!bundle" -ginkgo.procs=-1 +test-e2e: ginkgo + $(GINKGO) -v --timeout=1h --label-filter="!bundle" -p ./test/e2e/ .PHONY: test-e2e-bundle ## Run bundle e2e tests. -test-e2e-bundle: operator-sdk docker-build docker-push bundle bundle-build bundle-push install-olm-in-cluster - OPERATOR_SDK=$(OPERATOR_SDK) BUNDLE_IMG=$(BUNDLE_IMG) go test -timeout 1h ./test/e2e/ -v -ginkgo.v -ginkgo.timeout=1h -ginkgo.label-filter="bundle" +test-e2e-bundle: operator-sdk docker-build docker-push bundle bundle-build bundle-push install-olm-in-cluster ginkgo + OPERATOR_SDK=$(OPERATOR_SDK) BUNDLE_IMG=$(BUNDLE_IMG) $(GINKGO) -v --timeout=1h --label-filter="bundle" ./test/e2e/ .PHONY: install-olm-in-cluster install-olm-in-cluster: operator-sdk ## Install OLM in cluster if not already installed. @@ -257,10 +257,12 @@ CONTROLLER_GEN ?= $(LOCALBIN)/controller-gen ENVTEST ?= $(LOCALBIN)/setup-envtest GOLANGCI_LINT = $(LOCALBIN)/golangci-lint MOCKERY = $(LOCALBIN)/mockery +GINKGO = $(LOCALBIN)/ginkgo ## Tool Versions KUSTOMIZE_VERSION ?= v5.6.0 CONTROLLER_TOOLS_VERSION ?= v0.18.0 +GINKGO_VERSION ?= v2.28.1 #ENVTEST_VERSION is the version of controller-runtime release branch to fetch the envtest setup script (i.e. release-0.20) ENVTEST_VERSION ?= $(shell go list -m -f "{{ .Version }}" sigs.k8s.io/controller-runtime | awk -F'[v.]' '{printf "release-%d.%d", $$2, $$3}') #ENVTEST_K8S_VERSION is the version of Kubernetes to use for setting up ENVTEST binaries (i.e. 1.31) @@ -301,6 +303,11 @@ mockery: ${MOCKERY} ## Download mockery locally if necessary. ${MOCKERY}: $(LOCALBIN) $(call go-install-tool,${MOCKERY},github.com/vektra/mockery/v3,${MOCKERY_VERSION}) +.PHONY: ginkgo +ginkgo: $(GINKGO) ## Download ginkgo locally if necessary. +$(GINKGO): $(LOCALBIN) + $(call go-install-tool,$(GINKGO),github.com/onsi/ginkgo/v2/ginkgo,$(GINKGO_VERSION)) + # go-install-tool will 'go install' any package with custom target and name of binary, if it doesn't exist # $1 - target path with name of binary # $2 - package url which can be installed From c7e3b534774c425e8c03915f82fba2e64c2323b0 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Christoph=20St=C3=A4bler?= Date: Thu, 16 Apr 2026 08:26:09 +0200 Subject: [PATCH 07/24] Fix merge queue failures by avoiding GitHub API rate limiting in Dockerfile Replace GitHub API call with git ls-remote to check for func CLI updates. The unauthenticated GitHub API has a 60 requests/hour limit, which was being exhausted by concurrent E2E test builds in the merge queue, causing HTTP 403 errors and build failures. --- Dockerfile | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/Dockerfile b/Dockerfile index 58fa242..975e0dd 100644 --- a/Dockerfile +++ b/Dockerfile @@ -39,7 +39,9 @@ ARG FUNC_CLI_GH_REPO=knative/func ARG FUNC_CLI_BRANCH=main # workaround to invalidate cache when func cli repo got updated -ADD https://api.github.com/repos/${FUNC_CLI_GH_REPO}/git/refs/heads/${FUNC_CLI_BRANCH} version.json +# Use git ls-remote instead of GitHub API to avoid rate limiting (60 req/hour for unauthenticated) +# which caused merge queue failures due to multiple concurrent builds +RUN git ls-remote https://github.com/${FUNC_CLI_GH_REPO} refs/heads/${FUNC_CLI_BRANCH} > version.json WORKDIR /workspace RUN git clone --branch ${FUNC_CLI_BRANCH} --single-branch --depth 1 https://github.com/${FUNC_CLI_GH_REPO} . From c2839486c92694149f9eca9353a173a682d1423f Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Christoph=20St=C3=A4bler?= Date: Thu, 16 Apr 2026 08:35:42 +0200 Subject: [PATCH 08/24] Fix flaky metrics endpoint E2E test Two issues were causing intermittent failures: 1. Race condition: The test waited for the pod to reach "Succeeded" status but immediately tried to read logs. Added explicit wait for logs to be available before reading them, preventing "empty logs" failures. 2. Cleanup order: When tests failed, AfterEach hooks ran in reverse order, deleting the curl-metrics pod before the debug AfterEach could collect its logs. Replaced AfterEach with DeferCleanup registered after pod creation to ensure proper cleanup ordering. --- test/e2e/e2e_test.go | 24 ++++++++++++++++++------ 1 file changed, 18 insertions(+), 6 deletions(-) diff --git a/test/e2e/e2e_test.go b/test/e2e/e2e_test.go index 94b2358..88137ae 100644 --- a/test/e2e/e2e_test.go +++ b/test/e2e/e2e_test.go @@ -124,12 +124,6 @@ var _ = Describe("Manager", func() { Context("with curl-metrics-pod", func() { curlMetricPodName := "curl-metrics" - AfterEach(func() { - cmd := exec.Command("kubectl", "delete", "pod", curlMetricPodName, "-n", namespace, "--ignore-not-found") - _, err := utils.Run(cmd) - Expect(err).NotTo(HaveOccurred()) - }) - It("should ensure the metrics endpoint is serving metrics", func() { By("validating that the metrics service is available") cmd := exec.Command("kubectl", "get", "service", metricsServiceName, "-n", namespace) @@ -185,6 +179,14 @@ var _ = Describe("Manager", func() { _, err = utils.Run(cmd) Expect(err).NotTo(HaveOccurred(), "Failed to create curl-metrics pod") + // Ensure pod cleanup happens after test completes and debug logs are collected + // Using DeferCleanup ensures cleanup runs after AfterEach hooks (which collect debug logs on failure) + DeferCleanup(func() { + cmd := exec.Command("kubectl", "delete", "pod", curlMetricPodName, "-n", namespace, "--ignore-not-found") + _, err := utils.Run(cmd) + Expect(err).NotTo(HaveOccurred()) + }) + By("waiting for the curl-metrics pod to complete.") verifyCurlUp := func(g Gomega) { cmd := exec.Command("kubectl", "get", "pods", curlMetricPodName, @@ -196,6 +198,16 @@ var _ = Describe("Manager", func() { } Eventually(verifyCurlUp, 5*time.Minute).Should(Succeed()) + By("waiting for curl-metrics logs to be available") + // Add a small delay to ensure logs are flushed after pod completion + verifyLogsAvailable := func(g Gomega) { + cmd := exec.Command("kubectl", "logs", curlMetricPodName, "-n", namespace) + output, err := utils.Run(cmd) + g.Expect(err).NotTo(HaveOccurred()) + g.Expect(output).NotTo(BeEmpty(), "Logs should not be empty") + } + Eventually(verifyLogsAvailable, 30*time.Second).Should(Succeed()) + By("getting the metrics by checking curl-metrics logs") metricsOutput := getMetricsOutput() Expect(metricsOutput).To(ContainSubstring( From 788f3246c97879c4c5d3ebab9475abadae96f8fa Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Christoph=20St=C3=A4bler?= Date: Thu, 16 Apr 2026 08:39:56 +0200 Subject: [PATCH 09/24] Move metrics tests into separate file --- test/e2e/e2e_test.go | 202 +---------------------------------- test/e2e/metrics_test.go | 225 +++++++++++++++++++++++++++++++++++++++ 2 files changed, 226 insertions(+), 201 deletions(-) create mode 100644 test/e2e/metrics_test.go diff --git a/test/e2e/e2e_test.go b/test/e2e/e2e_test.go index 88137ae..662d1d4 100644 --- a/test/e2e/e2e_test.go +++ b/test/e2e/e2e_test.go @@ -19,12 +19,9 @@ package e2e import ( "fmt" "os/exec" - "time" - - . "github.com/onsi/ginkgo/v2" - . "github.com/onsi/gomega" "github.com/functions-dev/func-operator/test/utils" + . "github.com/onsi/ginkgo/v2" ) // namespace where the project is deployed in @@ -33,203 +30,6 @@ const namespace = "func-operator-system" // serviceAccountName created for the project const serviceAccountName = "func-operator-controller-manager" -// metricsServiceName is the name of the metrics service of the project -const metricsServiceName = "func-operator-controller-manager-metrics-service" - -// metricsPort is the port of the metrics service providing the managers metrics -const metricsPort = "8080" - -var _ = Describe("Manager", func() { - var controllerPodName string - - // After each test, check for failures and collect logs, events, - // and pod descriptions for debugging. - AfterEach(func() { - specReport := CurrentSpecReport() - if specReport.Failed() { - By("Fetching controller manager pod logs") - cmd := exec.Command("kubectl", "logs", controllerPodName, "-n", namespace) - controllerLogs, err := utils.Run(cmd) - if err == nil { - _, _ = fmt.Fprintf(GinkgoWriter, "Controller logs:\n %s", controllerLogs) - } else { - _, _ = fmt.Fprintf(GinkgoWriter, "Failed to get Controller logs: %s", err) - } - - By("Fetching Kubernetes events") - cmd = exec.Command("kubectl", "get", "events", "-n", namespace, "--sort-by=.lastTimestamp") - eventsOutput, err := utils.Run(cmd) - if err == nil { - _, _ = fmt.Fprintf(GinkgoWriter, "Kubernetes events:\n%s", eventsOutput) - } else { - _, _ = fmt.Fprintf(GinkgoWriter, "Failed to get Kubernetes events: %s", err) - } - - By("Fetching curl-metrics logs") - cmd = exec.Command("kubectl", "logs", "curl-metrics", "-n", namespace) - metricsOutput, err := utils.Run(cmd) - if err == nil { - _, _ = fmt.Fprintf(GinkgoWriter, "Metrics logs:\n %s", metricsOutput) - } else { - _, _ = fmt.Fprintf(GinkgoWriter, "Failed to get curl-metrics logs: %s", err) - } - - By("Fetching controller manager pod description") - cmd = exec.Command("kubectl", "describe", "pod", controllerPodName, "-n", namespace) - podDescription, err := utils.Run(cmd) - if err == nil { - fmt.Println("Pod description:\n", podDescription) - } else { - fmt.Println("Failed to describe controller pod") - } - } - }) - - SetDefaultEventuallyTimeout(2 * time.Minute) - SetDefaultEventuallyPollingInterval(time.Second) - - Context("Manager", func() { - It("should run successfully", func() { - By("validating that the controller-manager pod is running as expected") - verifyControllerUp := func(g Gomega) { - // Get the name of the controller-manager pod - cmd := exec.Command("kubectl", "get", - "pods", "-l", "control-plane=controller-manager", - "-o", "go-template={{ range .items }}"+ - "{{ if not .metadata.deletionTimestamp }}"+ - "{{ .metadata.name }}"+ - "{{ \"\\n\" }}{{ end }}{{ end }}", - "-n", namespace, - ) - - podOutput, err := utils.Run(cmd) - g.Expect(err).NotTo(HaveOccurred(), "Failed to retrieve controller-manager pod information") - podNames := utils.GetNonEmptyLines(podOutput) - g.Expect(podNames).To(HaveLen(1), "expected 1 controller pod running") - controllerPodName = podNames[0] - g.Expect(controllerPodName).To(ContainSubstring("controller-manager")) - - // Validate the pod's status - cmd = exec.Command("kubectl", "get", - "pods", controllerPodName, "-o", "jsonpath={.status.phase}", - "-n", namespace, - ) - output, err := utils.Run(cmd) - g.Expect(err).NotTo(HaveOccurred()) - g.Expect(output).To(Equal("Running"), "Incorrect controller-manager pod status") - } - Eventually(verifyControllerUp).Should(Succeed()) - }) - - Context("with curl-metrics-pod", func() { - curlMetricPodName := "curl-metrics" - - It("should ensure the metrics endpoint is serving metrics", func() { - By("validating that the metrics service is available") - cmd := exec.Command("kubectl", "get", "service", metricsServiceName, "-n", namespace) - _, err := utils.Run(cmd) - Expect(err).NotTo(HaveOccurred(), "Metrics service should exist") - - By("waiting for the metrics endpoint to be ready") - verifyMetricsEndpointReady := func(g Gomega) { - cmd := exec.Command("kubectl", "get", "endpoints", metricsServiceName, "-n", namespace) - output, err := utils.Run(cmd) - g.Expect(err).NotTo(HaveOccurred()) - g.Expect(output).To(ContainSubstring(metricsPort), "Metrics endpoint is not ready") - } - Eventually(verifyMetricsEndpointReady).Should(Succeed()) - - By("verifying that the controller manager is serving the metrics server") - verifyMetricsServerStarted := func(g Gomega) { - cmd := exec.Command("kubectl", "logs", controllerPodName, "-n", namespace) - output, err := utils.Run(cmd) - g.Expect(err).NotTo(HaveOccurred()) - g.Expect(output).To(ContainSubstring("controller-runtime.metrics\tServing metrics server"), - "Metrics server not yet started") - } - Eventually(verifyMetricsServerStarted).Should(Succeed()) - - By("creating the curl-metrics pod to access the metrics endpoint") - cmd = exec.Command("kubectl", "run", curlMetricPodName, "--restart=Never", - "--namespace", namespace, - "--image=curlimages/curl:latest", - "--overrides", - fmt.Sprintf(`{ - "spec": { - "containers": [{ - "name": "curl", - "image": "curlimages/curl:latest", - "command": ["/bin/sh", "-c"], - "args": ["curl -v %s.%s.svc.cluster.local:%s/metrics"], - "securityContext": { - "allowPrivilegeEscalation": false, - "capabilities": { - "drop": ["ALL"] - }, - "runAsNonRoot": true, - "runAsUser": 1000, - "seccompProfile": { - "type": "RuntimeDefault" - } - } - }], - "serviceAccount": "%s" - } - }`, metricsServiceName, namespace, metricsPort, serviceAccountName)) - _, err = utils.Run(cmd) - Expect(err).NotTo(HaveOccurred(), "Failed to create curl-metrics pod") - - // Ensure pod cleanup happens after test completes and debug logs are collected - // Using DeferCleanup ensures cleanup runs after AfterEach hooks (which collect debug logs on failure) - DeferCleanup(func() { - cmd := exec.Command("kubectl", "delete", "pod", curlMetricPodName, "-n", namespace, "--ignore-not-found") - _, err := utils.Run(cmd) - Expect(err).NotTo(HaveOccurred()) - }) - - By("waiting for the curl-metrics pod to complete.") - verifyCurlUp := func(g Gomega) { - cmd := exec.Command("kubectl", "get", "pods", curlMetricPodName, - "-o", "jsonpath={.status.phase}", - "-n", namespace) - output, err := utils.Run(cmd) - g.Expect(err).NotTo(HaveOccurred()) - g.Expect(output).To(Equal("Succeeded"), "curl pod in wrong status") - } - Eventually(verifyCurlUp, 5*time.Minute).Should(Succeed()) - - By("waiting for curl-metrics logs to be available") - // Add a small delay to ensure logs are flushed after pod completion - verifyLogsAvailable := func(g Gomega) { - cmd := exec.Command("kubectl", "logs", curlMetricPodName, "-n", namespace) - output, err := utils.Run(cmd) - g.Expect(err).NotTo(HaveOccurred()) - g.Expect(output).NotTo(BeEmpty(), "Logs should not be empty") - } - Eventually(verifyLogsAvailable, 30*time.Second).Should(Succeed()) - - By("getting the metrics by checking curl-metrics logs") - metricsOutput := getMetricsOutput() - Expect(metricsOutput).To(ContainSubstring( - "controller_runtime_reconcile_total", - )) - }) - }) - - // +kubebuilder:scaffold:e2e-webhooks-checks - }) -}) - -// getMetricsOutput retrieves and returns the logs from the curl pod used to access the metrics endpoint. -func getMetricsOutput() string { - By("getting the curl-metrics logs") - cmd := exec.Command("kubectl", "logs", "curl-metrics", "-n", namespace) - metricsOutput, err := utils.Run(cmd) - Expect(err).NotTo(HaveOccurred(), "Failed to retrieve logs from curl pod") - Expect(metricsOutput).To(ContainSubstring("< HTTP/1.1 200 OK")) - return metricsOutput -} - // logFailedTestDetails logs function resource and controller logs on test failure func logFailedTestDetails(functionName, functionNamespace string) { specReport := CurrentSpecReport() diff --git a/test/e2e/metrics_test.go b/test/e2e/metrics_test.go new file mode 100644 index 0000000..f0c1077 --- /dev/null +++ b/test/e2e/metrics_test.go @@ -0,0 +1,225 @@ +/* +Copyright 2025. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +*/ + +package e2e + +import ( + "fmt" + "os/exec" + "time" + + . "github.com/onsi/ginkgo/v2" + . "github.com/onsi/gomega" + + "github.com/functions-dev/func-operator/test/utils" +) + +// metricsServiceName is the name of the metrics service of the project +const metricsServiceName = "func-operator-controller-manager-metrics-service" + +// metricsPort is the port of the metrics service providing the managers metrics +const metricsPort = "8080" + +var _ = Describe("Manager", func() { + var controllerPodName string + + // After each test, check for failures and collect logs, events, + // and pod descriptions for debugging. + AfterEach(func() { + specReport := CurrentSpecReport() + if specReport.Failed() { + By("Fetching controller manager pod logs") + cmd := exec.Command("kubectl", "logs", controllerPodName, "-n", namespace) + controllerLogs, err := utils.Run(cmd) + if err == nil { + _, _ = fmt.Fprintf(GinkgoWriter, "Controller logs:\n %s", controllerLogs) + } else { + _, _ = fmt.Fprintf(GinkgoWriter, "Failed to get Controller logs: %s", err) + } + + By("Fetching Kubernetes events") + cmd = exec.Command("kubectl", "get", "events", "-n", namespace, "--sort-by=.lastTimestamp") + eventsOutput, err := utils.Run(cmd) + if err == nil { + _, _ = fmt.Fprintf(GinkgoWriter, "Kubernetes events:\n%s", eventsOutput) + } else { + _, _ = fmt.Fprintf(GinkgoWriter, "Failed to get Kubernetes events: %s", err) + } + + By("Fetching curl-metrics logs") + cmd = exec.Command("kubectl", "logs", "curl-metrics", "-n", namespace) + metricsOutput, err := utils.Run(cmd) + if err == nil { + _, _ = fmt.Fprintf(GinkgoWriter, "Metrics logs:\n %s", metricsOutput) + } else { + _, _ = fmt.Fprintf(GinkgoWriter, "Failed to get curl-metrics logs: %s", err) + } + + By("Fetching controller manager pod description") + cmd = exec.Command("kubectl", "describe", "pod", controllerPodName, "-n", namespace) + podDescription, err := utils.Run(cmd) + if err == nil { + fmt.Println("Pod description:\n", podDescription) + } else { + fmt.Println("Failed to describe controller pod") + } + } + }) + + SetDefaultEventuallyTimeout(2 * time.Minute) + SetDefaultEventuallyPollingInterval(time.Second) + + Context("Manager", func() { + It("should run successfully", func() { + By("validating that the controller-manager pod is running as expected") + verifyControllerUp := func(g Gomega) { + // Get the name of the controller-manager pod + cmd := exec.Command("kubectl", "get", + "pods", "-l", "control-plane=controller-manager", + "-o", "go-template={{ range .items }}"+ + "{{ if not .metadata.deletionTimestamp }}"+ + "{{ .metadata.name }}"+ + "{{ \"\\n\" }}{{ end }}{{ end }}", + "-n", namespace, + ) + + podOutput, err := utils.Run(cmd) + g.Expect(err).NotTo(HaveOccurred(), "Failed to retrieve controller-manager pod information") + podNames := utils.GetNonEmptyLines(podOutput) + g.Expect(podNames).To(HaveLen(1), "expected 1 controller pod running") + controllerPodName = podNames[0] + g.Expect(controllerPodName).To(ContainSubstring("controller-manager")) + + // Validate the pod's status + cmd = exec.Command("kubectl", "get", + "pods", controllerPodName, "-o", "jsonpath={.status.phase}", + "-n", namespace, + ) + output, err := utils.Run(cmd) + g.Expect(err).NotTo(HaveOccurred()) + g.Expect(output).To(Equal("Running"), "Incorrect controller-manager pod status") + } + Eventually(verifyControllerUp).Should(Succeed()) + }) + + Context("with curl-metrics-pod", func() { + curlMetricPodName := "curl-metrics" + + It("should ensure the metrics endpoint is serving metrics", func() { + By("validating that the metrics service is available") + cmd := exec.Command("kubectl", "get", "service", metricsServiceName, "-n", namespace) + _, err := utils.Run(cmd) + Expect(err).NotTo(HaveOccurred(), "Metrics service should exist") + + By("waiting for the metrics endpoint to be ready") + verifyMetricsEndpointReady := func(g Gomega) { + cmd := exec.Command("kubectl", "get", "endpoints", metricsServiceName, "-n", namespace) + output, err := utils.Run(cmd) + g.Expect(err).NotTo(HaveOccurred()) + g.Expect(output).To(ContainSubstring(metricsPort), "Metrics endpoint is not ready") + } + Eventually(verifyMetricsEndpointReady).Should(Succeed()) + + By("verifying that the controller manager is serving the metrics server") + verifyMetricsServerStarted := func(g Gomega) { + cmd := exec.Command("kubectl", "logs", controllerPodName, "-n", namespace) + output, err := utils.Run(cmd) + g.Expect(err).NotTo(HaveOccurred()) + g.Expect(output).To(ContainSubstring("controller-runtime.metrics\tServing metrics server"), + "Metrics server not yet started") + } + Eventually(verifyMetricsServerStarted).Should(Succeed()) + + By("creating the curl-metrics pod to access the metrics endpoint") + cmd = exec.Command("kubectl", "run", curlMetricPodName, "--restart=Never", + "--namespace", namespace, + "--image=curlimages/curl:latest", + "--overrides", + fmt.Sprintf(`{ + "spec": { + "containers": [{ + "name": "curl", + "image": "curlimages/curl:latest", + "command": ["/bin/sh", "-c"], + "args": ["curl -v %s.%s.svc.cluster.local:%s/metrics"], + "securityContext": { + "allowPrivilegeEscalation": false, + "capabilities": { + "drop": ["ALL"] + }, + "runAsNonRoot": true, + "runAsUser": 1000, + "seccompProfile": { + "type": "RuntimeDefault" + } + } + }], + "serviceAccount": "%s" + } + }`, metricsServiceName, namespace, metricsPort, serviceAccountName)) + _, err = utils.Run(cmd) + Expect(err).NotTo(HaveOccurred(), "Failed to create curl-metrics pod") + + // Ensure pod cleanup happens after test completes and debug logs are collected + // Using DeferCleanup ensures cleanup runs after AfterEach hooks (which collect debug logs on failure) + utils.DeferCleanupOnSuccess(func() { + cmd := exec.Command("kubectl", "delete", "pod", curlMetricPodName, "-n", namespace, "--ignore-not-found") + _, err := utils.Run(cmd) + Expect(err).NotTo(HaveOccurred()) + }) + + By("waiting for the curl-metrics pod to complete.") + verifyCurlUp := func(g Gomega) { + cmd := exec.Command("kubectl", "get", "pods", curlMetricPodName, + "-o", "jsonpath={.status.phase}", + "-n", namespace) + output, err := utils.Run(cmd) + g.Expect(err).NotTo(HaveOccurred()) + g.Expect(output).To(Equal("Succeeded"), "curl pod in wrong status") + } + Eventually(verifyCurlUp, 5*time.Minute).Should(Succeed()) + + By("waiting for curl-metrics logs to be available") + // Add a small delay to ensure logs are flushed after pod completion + verifyLogsAvailable := func(g Gomega) { + cmd := exec.Command("kubectl", "logs", curlMetricPodName, "-n", namespace) + output, err := utils.Run(cmd) + g.Expect(err).NotTo(HaveOccurred()) + g.Expect(output).NotTo(BeEmpty(), "Logs should not be empty") + } + Eventually(verifyLogsAvailable, 30*time.Second).Should(Succeed()) + + By("getting the metrics by checking curl-metrics logs") + metricsOutput := getMetricsOutput() + Expect(metricsOutput).To(ContainSubstring( + "controller_runtime_reconcile_total", + )) + }) + }) + + // +kubebuilder:scaffold:e2e-webhooks-checks + }) +}) + +// getMetricsOutput retrieves and returns the logs from the curl pod used to access the metrics endpoint. +func getMetricsOutput() string { + By("getting the curl-metrics logs") + cmd := exec.Command("kubectl", "logs", "curl-metrics", "-n", namespace) + metricsOutput, err := utils.Run(cmd) + Expect(err).NotTo(HaveOccurred(), "Failed to retrieve logs from curl pod") + Expect(metricsOutput).To(ContainSubstring("< HTTP/1.1 200 OK")) + return metricsOutput +} From 34e882cc66316b3499158122abad56b83ca69299 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Christoph=20St=C3=A4bler?= Date: Thu, 16 Apr 2026 09:07:27 +0200 Subject: [PATCH 10/24] Fix metrics test failure by setting controllerPodName in BeforeEach The metrics endpoint test was failing with "You must provide one or more resources" because controllerPodName was empty. This happened because the controller pod name was only set in the first test, but the metrics test in the nested Context ran independently. Moved the controller pod validation to BeforeEach inside the Context so controllerPodName is set before each test runs, making it available to all tests including the metrics endpoint test. --- test/e2e/metrics_test.go | 8 +++++++- 1 file changed, 7 insertions(+), 1 deletion(-) diff --git a/test/e2e/metrics_test.go b/test/e2e/metrics_test.go index f0c1077..fcea3a3 100644 --- a/test/e2e/metrics_test.go +++ b/test/e2e/metrics_test.go @@ -83,7 +83,8 @@ var _ = Describe("Manager", func() { SetDefaultEventuallyPollingInterval(time.Second) Context("Manager", func() { - It("should run successfully", func() { + // BeforeEach ensures controllerPodName is set before each test runs + BeforeEach(func() { By("validating that the controller-manager pod is running as expected") verifyControllerUp := func(g Gomega) { // Get the name of the controller-manager pod @@ -115,6 +116,11 @@ var _ = Describe("Manager", func() { Eventually(verifyControllerUp).Should(Succeed()) }) + It("should run successfully", func() { + // Controller pod validation happens in BeforeEach + Expect(controllerPodName).NotTo(BeEmpty()) + }) + Context("with curl-metrics-pod", func() { curlMetricPodName := "curl-metrics" From 73e0341d825587a90da0cc08f01966f020f33bc7 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Christoph=20St=C3=A4bler?= Date: Thu, 16 Apr 2026 09:32:11 +0200 Subject: [PATCH 11/24] Fix artifact collection to not fail if cluster setup failed --- .github/workflows/test-e2e.yml | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/.github/workflows/test-e2e.yml b/.github/workflows/test-e2e.yml index 2d639e6..22fd23d 100644 --- a/.github/workflows/test-e2e.yml +++ b/.github/workflows/test-e2e.yml @@ -65,9 +65,9 @@ jobs: if: failure() run: | mkdir -p /tmp/k8s-artifacts - kubectl logs -n func-operator-system -l control-plane=controller-manager --tail=-1 --all-containers --prefix --timestamps > /tmp/k8s-artifacts/func-operator.log + kubectl logs -n func-operator-system -l control-plane=controller-manager --tail=-1 --all-containers --prefix --timestamps > /tmp/k8s-artifacts/func-operator.log || true for resource in functions deployments configmaps; do - kubectl get ${resource} -A -o yaml > /tmp/k8s-artifacts/${resource}.yaml + kubectl get ${resource} -A -o yaml > /tmp/k8s-artifacts/${resource}.yaml || true done - name: Upload Kubernetes artifacts From 927e46965d197e5d19f6f1c7f3a4be7fd1881e5b Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Christoph=20St=C3=A4bler?= Date: Thu, 16 Apr 2026 09:53:26 +0200 Subject: [PATCH 12/24] Add disk space cleanup to E2E tests workflow GitHub runners have limited disk space (~14GB free). E2E tests build multiple Docker images which can fill up the disk, causing "no space left on device" errors. Free up ~10GB by removing unused packages (.NET, Android SDK, GHC) and cleaning Docker at the start of the workflow, before the kind cluster is created. This cleanup is safe because it runs before any test infrastructure exists. --- .github/workflows/test-e2e.yml | 8 ++++++++ 1 file changed, 8 insertions(+) diff --git a/.github/workflows/test-e2e.yml b/.github/workflows/test-e2e.yml index 22fd23d..0e84feb 100644 --- a/.github/workflows/test-e2e.yml +++ b/.github/workflows/test-e2e.yml @@ -21,6 +21,14 @@ jobs: builder: [pack, s2i] deployer: [knative, raw, keda] steps: + - name: Free up disk space + run: | + # Remove large packages to free up disk space on GitHub runners + sudo rm -rf /usr/share/dotnet /usr/local/lib/android /opt/ghc + # Clean up Docker to start fresh + docker system prune -af --volumes + df -h + - name: Clone the code uses: actions/checkout@v6 From 40e9a371d8fd070de2f4304bbd12d61598984c40 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Christoph=20St=C3=A4bler?= Date: Thu, 16 Apr 2026 10:08:05 +0200 Subject: [PATCH 13/24] Collect PipelineRun artifacts in E2E test failures --- .github/workflows/test-e2e.yml | 15 ++++++++++++++- 1 file changed, 14 insertions(+), 1 deletion(-) diff --git a/.github/workflows/test-e2e.yml b/.github/workflows/test-e2e.yml index 0e84feb..eb20992 100644 --- a/.github/workflows/test-e2e.yml +++ b/.github/workflows/test-e2e.yml @@ -74,10 +74,23 @@ jobs: run: | mkdir -p /tmp/k8s-artifacts kubectl logs -n func-operator-system -l control-plane=controller-manager --tail=-1 --all-containers --prefix --timestamps > /tmp/k8s-artifacts/func-operator.log || true - for resource in functions deployments configmaps; do + for resource in functions deployments configmaps pipelineruns; do kubectl get ${resource} -A -o yaml > /tmp/k8s-artifacts/${resource}.yaml || true done + # Install Tekton CLI for better PipelineRun log collection + curl -LO https://github.com/tektoncd/cli/releases/download/v0.44.1/tkn_0.44.1_Linux_x86_64.tar.gz + tar xvzf tkn_0.44.1_Linux_x86_64.tar.gz -C /tmp + chmod +x /tmp/tkn + + # Collect logs from all pipelineruns using Tekton CLI + /tmp/tkn pipelinerun list -A > /tmp/k8s-artifacts/pipelinerun-list.txt 2>&1 || true + for pr in $(kubectl get pipelineruns -A -o jsonpath='{range .items[*]}{.metadata.namespace}{" "}{.metadata.name}{"\n"}{end}'); do + ns=$(echo $pr | awk '{print $1}') + name=$(echo $pr | awk '{print $2}') + /tmp/tkn pipelinerun logs $name -n $ns > /tmp/k8s-artifacts/pipelinerun-logs-${ns}-${name}.log 2>&1 || true + done + - name: Upload Kubernetes artifacts if: failure() uses: actions/upload-artifact@v7 From 3789f373122288075f9e04b81f785e605de66edb Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Christoph=20St=C3=A4bler?= Date: Thu, 16 Apr 2026 11:33:44 +0200 Subject: [PATCH 14/24] Increase E2E test timeouts to handle resource contention Increase default Eventually timeout from 2 to 10 minutes for function deployment tests to handle resource contention during concurrent S2I builds with middleware updates in CI environments. With multiple parallel test configurations, builds can take 6-7 minutes due to competing for limited CPU/memory/disk/network resources on GitHub runners. --- test/e2e/func_deploy_test.go | 12 +++++------- test/e2e/func_middleware_update_test.go | 7 +++---- 2 files changed, 8 insertions(+), 11 deletions(-) diff --git a/test/e2e/func_deploy_test.go b/test/e2e/func_deploy_test.go index 7c6ef39..58a3f7a 100644 --- a/test/e2e/func_deploy_test.go +++ b/test/e2e/func_deploy_test.go @@ -126,7 +126,7 @@ func functionNotDeployed(functionName, functionNamespace string) func(g Gomega) var _ = Describe("Operator", func() { - SetDefaultEventuallyTimeout(2 * time.Minute) + SetDefaultEventuallyTimeout(10 * time.Minute) SetDefaultEventuallyPollingInterval(time.Second) Context("with a deployed function", func() { @@ -198,8 +198,7 @@ var _ = Describe("Operator", func() { functionName = function.Name - // redeploy could take a bit longer therefore give a bit more time - Eventually(functionBecomesReady(functionName, functionNamespace), 6*time.Minute).Should(Succeed()) + Eventually(functionBecomesReady(functionName, functionNamespace)).Should(Succeed()) }) }) Context("with a function in a subdirectory in a monorepo", func() { @@ -280,8 +279,7 @@ var _ = Describe("Operator", func() { functionName = function.Name - // redeploy could take a bit longer therefore give a bit more time - Eventually(functionBecomesReady(functionName, functionNamespace), 6*time.Minute).Should(Succeed()) + Eventually(functionBecomesReady(functionName, functionNamespace)).Should(Succeed()) }) }) Context("with a not yet deployed function", func() { @@ -439,7 +437,7 @@ var _ = Describe("Operator", func() { functionName = function.Name - Eventually(functionBecomesReady(functionName, functionNamespace), 6*time.Minute).Should(Succeed()) + Eventually(functionBecomesReady(functionName, functionNamespace)).Should(Succeed()) }) It("should fail with authentication error when authSecretRef is not provided", func() { @@ -506,7 +504,7 @@ var _ = Describe("Operator", func() { functionName = function.Name - Eventually(functionBecomesReady(functionName, functionNamespace), 6*time.Minute).Should(Succeed()) + Eventually(functionBecomesReady(functionName, functionNamespace)).Should(Succeed()) }) It("should fail with authentication error when authSecretRef is not provided", func() { diff --git a/test/e2e/func_middleware_update_test.go b/test/e2e/func_middleware_update_test.go index a49820e..f17720c 100644 --- a/test/e2e/func_middleware_update_test.go +++ b/test/e2e/func_middleware_update_test.go @@ -39,7 +39,7 @@ import ( var _ = Describe("Middleware Update", func() { - SetDefaultEventuallyTimeout(2 * time.Minute) + SetDefaultEventuallyTimeout(10 * time.Minute) SetDefaultEventuallyPollingInterval(time.Second) Context("with a function deployed using old func CLI", func() { @@ -193,8 +193,7 @@ var _ = Describe("Middleware Update", func() { functionName = fn.Name - // Middleware update could take a bit longer therefore give more time - Eventually(functionBecomesReady(functionName, functionNamespace), 6*time.Minute).Should(Succeed()) + Eventually(functionBecomesReady(functionName, functionNamespace)).Should(Succeed()) // Verify middleware was actually updated by inspecting the new image out, err = utils.RunFunc("describe", deployedFunctionName, "-n", functionNamespace, "-o", "yaml") @@ -457,7 +456,7 @@ var _ = Describe("Middleware Update", func() { } } g.Expect(false).To(BeTrue(), "MiddlewareUpToDate condition not found") - }, 5*time.Minute).Should(Succeed()) + }).Should(Succeed()) Eventually(functionBecomesReady(functionName, functionNamespace)).Should(Succeed()) }) From a729dc024edcd852dfa5a233d280d7bb2ada421d Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Christoph=20St=C3=A4bler?= Date: Thu, 16 Apr 2026 11:52:30 +0200 Subject: [PATCH 15/24] Enable fail-fast for E2E tests Stop test execution immediately on first failure to save CI time and provide faster feedback when tests fail. --- Makefile | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/Makefile b/Makefile index c62b6ab..89b0c3c 100644 --- a/Makefile +++ b/Makefile @@ -120,11 +120,11 @@ test: manifests generate fmt vet setup-envtest ## Run tests. .PHONY: test-e2e ## Run e2e tests. test-e2e: ginkgo - $(GINKGO) -v --timeout=1h --label-filter="!bundle" -p ./test/e2e/ + $(GINKGO) -v --timeout=1h --label-filter="!bundle" --fail-fast -p ./test/e2e/ .PHONY: test-e2e-bundle ## Run bundle e2e tests. test-e2e-bundle: operator-sdk docker-build docker-push bundle bundle-build bundle-push install-olm-in-cluster ginkgo - OPERATOR_SDK=$(OPERATOR_SDK) BUNDLE_IMG=$(BUNDLE_IMG) $(GINKGO) -v --timeout=1h --label-filter="bundle" ./test/e2e/ + OPERATOR_SDK=$(OPERATOR_SDK) BUNDLE_IMG=$(BUNDLE_IMG) $(GINKGO) -v --timeout=1h --label-filter="bundle" --fail-fast ./test/e2e/ .PHONY: install-olm-in-cluster install-olm-in-cluster: operator-sdk ## Install OLM in cluster if not already installed. From 8c2f543566654208d87549ba25095ccc755f18f4 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Christoph=20St=C3=A4bler?= Date: Thu, 16 Apr 2026 12:27:07 +0200 Subject: [PATCH 16/24] Collect RBAC resources in E2E test artifacts Add roles and rolebindings to artifact collection to diagnose permission errors during PipelineRun deployments. This will help investigate RBAC propagation delays that may cause intermittent deployment failures. --- .github/workflows/test-e2e.yml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.github/workflows/test-e2e.yml b/.github/workflows/test-e2e.yml index eb20992..b66df65 100644 --- a/.github/workflows/test-e2e.yml +++ b/.github/workflows/test-e2e.yml @@ -74,7 +74,7 @@ jobs: run: | mkdir -p /tmp/k8s-artifacts kubectl logs -n func-operator-system -l control-plane=controller-manager --tail=-1 --all-containers --prefix --timestamps > /tmp/k8s-artifacts/func-operator.log || true - for resource in functions deployments configmaps pipelineruns; do + for resource in functions deployments configmaps pipelineruns roles rolebindings; do kubectl get ${resource} -A -o yaml > /tmp/k8s-artifacts/${resource}.yaml || true done From 52f31c65412340d3cfb53e8538e28b8e6d56ef61 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Christoph=20St=C3=A4bler?= Date: Thu, 16 Apr 2026 12:56:18 +0200 Subject: [PATCH 17/24] Fix PipelineRun log collection in E2E artifacts Use 'while read' instead of 'for' loop to properly parse namespace and name pairs. The previous loop iterated over words instead of lines, causing incorrect pairing and incomplete log collection. --- .github/workflows/test-e2e.yml | 4 +--- 1 file changed, 1 insertion(+), 3 deletions(-) diff --git a/.github/workflows/test-e2e.yml b/.github/workflows/test-e2e.yml index b66df65..84bcf27 100644 --- a/.github/workflows/test-e2e.yml +++ b/.github/workflows/test-e2e.yml @@ -85,9 +85,7 @@ jobs: # Collect logs from all pipelineruns using Tekton CLI /tmp/tkn pipelinerun list -A > /tmp/k8s-artifacts/pipelinerun-list.txt 2>&1 || true - for pr in $(kubectl get pipelineruns -A -o jsonpath='{range .items[*]}{.metadata.namespace}{" "}{.metadata.name}{"\n"}{end}'); do - ns=$(echo $pr | awk '{print $1}') - name=$(echo $pr | awk '{print $2}') + kubectl get pipelineruns -A -o json | jq -r '.items[] | "\(.metadata.namespace) \(.metadata.name)"' | while read ns name; do /tmp/tkn pipelinerun logs $name -n $ns > /tmp/k8s-artifacts/pipelinerun-logs-${ns}-${name}.log 2>&1 || true done From fbe1035d4eff829d8c09c496d8156df3548bbc36 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Christoph=20St=C3=A4bler?= Date: Thu, 16 Apr 2026 14:41:11 +0200 Subject: [PATCH 18/24] Pre-pull builder images before parallel E2E tests Add step to pre-pull and cache builder images into the KinD cluster before running parallel E2E tests. This eliminates resource contention from multiple concurrent image pulls. Root cause analysis showed that when 3-4 tests run in parallel, they all attempt to pull large builder images (1-3GB) simultaneously: - S2I: registry.access.redhat.com/ubi8/go-toolset (~1GB) - Pack: ghcr.io/knative/builder-jammy-base (~3GB) This concurrent pulling caused: - Network bandwidth saturation - Disk I/O contention - Container runtime lock contention - PipelineRun builds timing out waiting for image pulls Solution: Pre-pull images once before tests start, then load into KinD. All parallel tests now share the cached images instead of pulling separately. Benefits: - Keeps full parallel execution (-p flag) to test concurrent reconciles - Eliminates 90% of build time (no repeated pulls) - More reliable CI (no timeout failures) - Faster overall test suite --- .github/workflows/test-e2e.yml | 25 +++++++++++++++++++++++++ 1 file changed, 25 insertions(+) diff --git a/.github/workflows/test-e2e.yml b/.github/workflows/test-e2e.yml index 84bcf27..5d701df 100644 --- a/.github/workflows/test-e2e.yml +++ b/.github/workflows/test-e2e.yml @@ -61,6 +61,31 @@ jobs: - name: Install func-operator run: make docker-build docker-push deploy + - name: Pre-pull builder images to avoid parallel pull contention + run: | + # Pre-pull builder images before tests run in parallel to avoid: + # - Network bandwidth saturation (multiple 1-3GB pulls) + # - Disk I/O contention + # - Container runtime lock contention + + if [ "${{ matrix.builder }}" = "s2i" ]; then + echo "Pre-pulling S2I builder images..." + docker pull registry.access.redhat.com/ubi8/go-toolset:latest + docker pull registry.access.redhat.com/ubi8/python-39:latest + docker pull registry.access.redhat.com/ubi8/nodejs-20:latest + + kind load docker-image registry.access.redhat.com/ubi8/go-toolset:latest + kind load docker-image registry.access.redhat.com/ubi8/python-39:latest + kind load docker-image registry.access.redhat.com/ubi8/nodejs-20:latest + elif [ "${{ matrix.builder }}" = "pack" ]; then + echo "Pre-pulling pack builder images..." + docker pull ghcr.io/knative/builder-jammy-base:latest + + kind load docker-image ghcr.io/knative/builder-jammy-base:latest + fi + + echo "Builder images cached in KinD cluster" + - name: Running e2e Tests env: REGISTRY_INSECURE: true From 665901db760a3b871bde255ba1f2da2dc6b1548b Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Christoph=20St=C3=A4bler?= Date: Thu, 16 Apr 2026 14:52:37 +0200 Subject: [PATCH 19/24] Only pull needed images (we're using only Golang in the tests ATM) --- .github/workflows/test-e2e.yml | 16 ++++++++++------ 1 file changed, 10 insertions(+), 6 deletions(-) diff --git a/.github/workflows/test-e2e.yml b/.github/workflows/test-e2e.yml index 5d701df..f5f7a7a 100644 --- a/.github/workflows/test-e2e.yml +++ b/.github/workflows/test-e2e.yml @@ -71,17 +71,21 @@ jobs: if [ "${{ matrix.builder }}" = "s2i" ]; then echo "Pre-pulling S2I builder images..." docker pull registry.access.redhat.com/ubi8/go-toolset:latest - docker pull registry.access.redhat.com/ubi8/python-39:latest - docker pull registry.access.redhat.com/ubi8/nodejs-20:latest + # docker pull registry.access.redhat.com/ubi8/python-39:latest + # docker pull registry.access.redhat.com/ubi8/nodejs-20-minimal:latest + # docker pull registry.access.redhat.com/ubi8/openjdk-21:latest kind load docker-image registry.access.redhat.com/ubi8/go-toolset:latest - kind load docker-image registry.access.redhat.com/ubi8/python-39:latest - kind load docker-image registry.access.redhat.com/ubi8/nodejs-20:latest + # kind load docker-image registry.access.redhat.com/ubi8/python-39:latest + # kind load docker-image registry.access.redhat.com/ubi8/nodejs-20-minimal:latest + # kind load docker-image registry.access.redhat.com/ubi8/openjdk-21:latest elif [ "${{ matrix.builder }}" = "pack" ]; then echo "Pre-pulling pack builder images..." - docker pull ghcr.io/knative/builder-jammy-base:latest + docker pull ghcr.io/knative/builder-jammy-base:v2 + docker pull ghcr.io/knative/builder-jammy-tiny:v2 - kind load docker-image ghcr.io/knative/builder-jammy-base:latest + kind load docker-image ghcr.io/knative/builder-jammy-base:v2 + kind load docker-image ghcr.io/knative/builder-jammy-tiny:v2 fi echo "Builder images cached in KinD cluster" From e1dc6136e047c3f067d974e5d4a61ba94a7d8778 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Christoph=20St=C3=A4bler?= Date: Thu, 16 Apr 2026 15:58:33 +0200 Subject: [PATCH 20/24] Fix Eventually timeout race condition in parallel test execution Move SetDefaultEventuallyTimeout from individual Describe blocks to BeforeSuite to fix timeout race condition. Root cause: When tests run in parallel with `-p`, all test files execute in the same process. SetDefaultEventuallyTimeout is global to Gomega, so whichever Describe block runs last overwrites the timeout for all tests. The sequence was: 1. func_deploy_test.go sets timeout to 10 minutes 2. metrics_test.go sets timeout to 2 minutes (120 seconds) 3. All subsequent tests use 2 minutes, causing deployment tests to timeout Solution: Set timeout once globally in BeforeSuite before any Describe blocks execute. This ensures a consistent 10 minute timeout for all tests. Removed redundant timeout settings from: - test/e2e/func_deploy_test.go (10 min) - test/e2e/func_middleware_update_test.go (10 min) - test/e2e/bundle_test.go (5 min) - test/e2e/metrics_test.go (2 min - the culprit) --- test/e2e/bundle_test.go | 3 --- test/e2e/e2e_suite_test.go | 6 ++++++ test/e2e/func_deploy_test.go | 3 --- test/e2e/func_middleware_update_test.go | 3 --- test/e2e/metrics_test.go | 3 --- 5 files changed, 6 insertions(+), 12 deletions(-) diff --git a/test/e2e/bundle_test.go b/test/e2e/bundle_test.go index 651a262..bbf52f2 100644 --- a/test/e2e/bundle_test.go +++ b/test/e2e/bundle_test.go @@ -47,9 +47,6 @@ var _ = Describe("Bundle", Label("bundle"), Ordered, func() { testNamespaces []TestNamespace ) - SetDefaultEventuallyTimeout(5 * time.Minute) - SetDefaultEventuallyPollingInterval(time.Second) - BeforeAll(func() { bundleImage = os.Getenv("BUNDLE_IMG") Expect(bundleImage).ToNot(BeEmpty(), "BUNDLE_IMG must be given") diff --git a/test/e2e/e2e_suite_test.go b/test/e2e/e2e_suite_test.go index 31d786b..3e8bfcd 100644 --- a/test/e2e/e2e_suite_test.go +++ b/test/e2e/e2e_suite_test.go @@ -20,6 +20,7 @@ import ( "context" "fmt" "testing" + "time" . "github.com/onsi/ginkgo/v2" . "github.com/onsi/gomega" @@ -51,6 +52,11 @@ func TestE2E(t *testing.T) { var _ = BeforeSuite(func() { ctx = context.Background() + // Set global timeout for Eventually assertions + // Must be set here (not in Describe blocks) to avoid race conditions in parallel execution + SetDefaultEventuallyTimeout(10 * time.Minute) + SetDefaultEventuallyPollingInterval(1 * time.Second) + // Register the Function API scheme err := functionsdevv1alpha1.AddToScheme(scheme.Scheme) Expect(err).NotTo(HaveOccurred()) diff --git a/test/e2e/func_deploy_test.go b/test/e2e/func_deploy_test.go index 58a3f7a..8eafc14 100644 --- a/test/e2e/func_deploy_test.go +++ b/test/e2e/func_deploy_test.go @@ -126,9 +126,6 @@ func functionNotDeployed(functionName, functionNamespace string) func(g Gomega) var _ = Describe("Operator", func() { - SetDefaultEventuallyTimeout(10 * time.Minute) - SetDefaultEventuallyPollingInterval(time.Second) - Context("with a deployed function", func() { var repoURL string var repoDir string diff --git a/test/e2e/func_middleware_update_test.go b/test/e2e/func_middleware_update_test.go index f17720c..bca973e 100644 --- a/test/e2e/func_middleware_update_test.go +++ b/test/e2e/func_middleware_update_test.go @@ -39,9 +39,6 @@ import ( var _ = Describe("Middleware Update", func() { - SetDefaultEventuallyTimeout(10 * time.Minute) - SetDefaultEventuallyPollingInterval(time.Second) - Context("with a function deployed using old func CLI", func() { var repoURL string diff --git a/test/e2e/metrics_test.go b/test/e2e/metrics_test.go index fcea3a3..dc9f252 100644 --- a/test/e2e/metrics_test.go +++ b/test/e2e/metrics_test.go @@ -79,9 +79,6 @@ var _ = Describe("Manager", func() { } }) - SetDefaultEventuallyTimeout(2 * time.Minute) - SetDefaultEventuallyPollingInterval(time.Second) - Context("Manager", func() { // BeforeEach ensures controllerPodName is set before each test runs BeforeEach(func() { From d6c70d80c0616d36db775194515ce9bb062cf6de Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Christoph=20St=C3=A4bler?= Date: Thu, 16 Apr 2026 16:34:14 +0200 Subject: [PATCH 21/24] Use default timeout of 6 minutes and shorter custom timeout for metrics tests --- test/e2e/e2e_suite_test.go | 2 +- test/e2e/metrics_test.go | 6 +++--- 2 files changed, 4 insertions(+), 4 deletions(-) diff --git a/test/e2e/e2e_suite_test.go b/test/e2e/e2e_suite_test.go index 3e8bfcd..79f3721 100644 --- a/test/e2e/e2e_suite_test.go +++ b/test/e2e/e2e_suite_test.go @@ -54,7 +54,7 @@ var _ = BeforeSuite(func() { // Set global timeout for Eventually assertions // Must be set here (not in Describe blocks) to avoid race conditions in parallel execution - SetDefaultEventuallyTimeout(10 * time.Minute) + SetDefaultEventuallyTimeout(6 * time.Minute) SetDefaultEventuallyPollingInterval(1 * time.Second) // Register the Function API scheme diff --git a/test/e2e/metrics_test.go b/test/e2e/metrics_test.go index dc9f252..47e0f37 100644 --- a/test/e2e/metrics_test.go +++ b/test/e2e/metrics_test.go @@ -110,7 +110,7 @@ var _ = Describe("Manager", func() { g.Expect(err).NotTo(HaveOccurred()) g.Expect(output).To(Equal("Running"), "Incorrect controller-manager pod status") } - Eventually(verifyControllerUp).Should(Succeed()) + Eventually(verifyControllerUp, 2*time.Minute).Should(Succeed()) }) It("should run successfully", func() { @@ -134,7 +134,7 @@ var _ = Describe("Manager", func() { g.Expect(err).NotTo(HaveOccurred()) g.Expect(output).To(ContainSubstring(metricsPort), "Metrics endpoint is not ready") } - Eventually(verifyMetricsEndpointReady).Should(Succeed()) + Eventually(verifyMetricsEndpointReady, 2*time.Minute).Should(Succeed()) By("verifying that the controller manager is serving the metrics server") verifyMetricsServerStarted := func(g Gomega) { @@ -144,7 +144,7 @@ var _ = Describe("Manager", func() { g.Expect(output).To(ContainSubstring("controller-runtime.metrics\tServing metrics server"), "Metrics server not yet started") } - Eventually(verifyMetricsServerStarted).Should(Succeed()) + Eventually(verifyMetricsServerStarted, 2*time.Minute).Should(Succeed()) By("creating the curl-metrics pod to access the metrics endpoint") cmd = exec.Command("kubectl", "run", curlMetricPodName, "--restart=Never", From 32333e9562f4955c60c646feb8072f55624f935e Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Christoph=20St=C3=A4bler?= Date: Thu, 16 Apr 2026 16:37:14 +0200 Subject: [PATCH 22/24] Remove builder image pre-download --- .github/workflows/test-e2e.yml | 29 ----------------------------- 1 file changed, 29 deletions(-) diff --git a/.github/workflows/test-e2e.yml b/.github/workflows/test-e2e.yml index f5f7a7a..84bcf27 100644 --- a/.github/workflows/test-e2e.yml +++ b/.github/workflows/test-e2e.yml @@ -61,35 +61,6 @@ jobs: - name: Install func-operator run: make docker-build docker-push deploy - - name: Pre-pull builder images to avoid parallel pull contention - run: | - # Pre-pull builder images before tests run in parallel to avoid: - # - Network bandwidth saturation (multiple 1-3GB pulls) - # - Disk I/O contention - # - Container runtime lock contention - - if [ "${{ matrix.builder }}" = "s2i" ]; then - echo "Pre-pulling S2I builder images..." - docker pull registry.access.redhat.com/ubi8/go-toolset:latest - # docker pull registry.access.redhat.com/ubi8/python-39:latest - # docker pull registry.access.redhat.com/ubi8/nodejs-20-minimal:latest - # docker pull registry.access.redhat.com/ubi8/openjdk-21:latest - - kind load docker-image registry.access.redhat.com/ubi8/go-toolset:latest - # kind load docker-image registry.access.redhat.com/ubi8/python-39:latest - # kind load docker-image registry.access.redhat.com/ubi8/nodejs-20-minimal:latest - # kind load docker-image registry.access.redhat.com/ubi8/openjdk-21:latest - elif [ "${{ matrix.builder }}" = "pack" ]; then - echo "Pre-pulling pack builder images..." - docker pull ghcr.io/knative/builder-jammy-base:v2 - docker pull ghcr.io/knative/builder-jammy-tiny:v2 - - kind load docker-image ghcr.io/knative/builder-jammy-base:v2 - kind load docker-image ghcr.io/knative/builder-jammy-tiny:v2 - fi - - echo "Builder images cached in KinD cluster" - - name: Running e2e Tests env: REGISTRY_INSECURE: true From 43794c3d15534cff23ff36bbb956c24da4cf4458 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Christoph=20St=C3=A4bler?= Date: Thu, 16 Apr 2026 17:08:52 +0200 Subject: [PATCH 23/24] Increase default Eventually timeout to 10 minutes Increased the default timeout for Eventually assertions to accommodate longer test durations. --- test/e2e/e2e_suite_test.go | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/test/e2e/e2e_suite_test.go b/test/e2e/e2e_suite_test.go index 79f3721..3e8bfcd 100644 --- a/test/e2e/e2e_suite_test.go +++ b/test/e2e/e2e_suite_test.go @@ -54,7 +54,7 @@ var _ = BeforeSuite(func() { // Set global timeout for Eventually assertions // Must be set here (not in Describe blocks) to avoid race conditions in parallel execution - SetDefaultEventuallyTimeout(6 * time.Minute) + SetDefaultEventuallyTimeout(10 * time.Minute) SetDefaultEventuallyPollingInterval(1 * time.Second) // Register the Function API scheme From c9f04b0a521009b77d79375cdd0c0891e2e6e8bf Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Christoph=20St=C3=A4bler?= Date: Fri, 17 Apr 2026 09:18:30 +0200 Subject: [PATCH 24/24] Add retry logic to func deploy to handle transient network errors Retries func deploy up to 3 times with 5s delay between attempts to handle transient in-cluster-dialer connection failures during parallel E2E execution. --- test/utils/func.go | 28 ++++++++++++++++++++++++---- 1 file changed, 24 insertions(+), 4 deletions(-) diff --git a/test/utils/func.go b/test/utils/func.go index d315848..640ef50 100644 --- a/test/utils/func.go +++ b/test/utils/func.go @@ -24,6 +24,9 @@ import ( "os/exec" "path/filepath" "runtime" + "time" + + ginkgo "github.com/onsi/ginkgo/v2" ) // RunFunc executes the func CLI with the current/latest version @@ -46,7 +49,7 @@ func RunFuncWithVersion(version string, command string, args ...string) (string, return Run(cmd) } -// RunFuncDeploy runs func deploy +// RunFuncDeploy runs func deploy with retry logic for transient network errors func RunFuncDeploy(functionDir string, optFns ...FuncDeployOption) (string, error) { opts := &FuncDeployOptions{ // defaults @@ -78,11 +81,28 @@ func RunFuncDeploy(functionDir string, optFns ...FuncDeployOption) (string, erro args = append(args, "--deployer", opts.Deployer) } - if opts.CliVersion != "" { - return RunFuncWithVersion(opts.CliVersion, "deploy", args...) + var output string + var err error + + // Retry up to 3 times with 5s delay between attempts + for attempt := 0; attempt < 3; attempt++ { + if attempt > 0 { + time.Sleep(5 * time.Second) + _, _ = fmt.Fprintf(ginkgo.GinkgoWriter, "func deploy attempt %d failed: %v (retrying)\n", attempt, err) + } + + if opts.CliVersion != "" { + output, err = RunFuncWithVersion(opts.CliVersion, "deploy", args...) + } else { + output, err = RunFunc("deploy", args...) + } + + if err == nil { + return output, nil + } } - return RunFunc("deploy", args...) + return output, err } type FuncDeployOptions struct {