From 9b5d7ee58ca5f7be5a430274d39c2eba17bf9429 Mon Sep 17 00:00:00 2001 From: Gabriel Ichim Date: Tue, 23 Jun 2026 15:14:07 +0300 Subject: [PATCH] fix(controller): reference agent images by tag for private registries Declarative agent images are pinned by their link-time digest (registry/repository@sha256:...) injected via controller-digest-ldflags.sh. When --image-registry / --image-repository point to a private registry, only the registry and repository are rewritten while the upstream digest is kept, so the controller emits a reference that is not resolvable in registries that do not preserve the upstream manifest digest. The agent pods then fail with ImagePullBackOff / manifest unknown, and --image-tag is silently ignored for declarative agents. Add a --pin-runtime-image-digest flag (env PIN_RUNTIME_IMAGE_DIGEST), default true to preserve current behaviour. When set to false, runtime image resolution falls back to a tag reference (registry/repository:tag) via the existing ImageConfig.PinnedImage(), restoring the pre-0.9.7 behaviour for operators mirroring images into a private registry. The Helm chart exposes this as controller.pinRuntimeImageDigest and plumbs it to the controller ConfigMap. Fixes #2055 Signed-off-by: Gabriel Ichim --- .../translator/agent/adk_api_translator.go | 14 +- .../translator/agent/deployments.go | 38 +++-- .../translator/agent/runtime_test.go | 146 ++++++++++++++++++ go/core/pkg/app/app.go | 1 + .../templates/controller-configmap.yaml | 1 + .../tests/controller-configmap_test.yaml | 20 +++ helm/kagent/values.yaml | 6 + 7 files changed, 213 insertions(+), 13 deletions(-) create mode 100644 helm/kagent/tests/controller-configmap_test.yaml diff --git a/go/core/internal/controller/translator/agent/adk_api_translator.go b/go/core/internal/controller/translator/agent/adk_api_translator.go index 4b09d30a22..5428b7177f 100644 --- a/go/core/internal/controller/translator/agent/adk_api_translator.go +++ b/go/core/internal/controller/translator/agent/adk_api_translator.go @@ -117,11 +117,23 @@ var DefaultImageConfig = ImageConfig{ } // PythonADKImageDigest, GoADKImageDigest, and GoADKFullImageDigest are set at -// controller link time from the pushed runtime image manifest digests. +// controller link time from the pushed runtime image manifest digests. They are +// used as the default digests when digest pinning is enabled. var PythonADKImageDigest string var GoADKImageDigest string var GoADKFullImageDigest string +// PinRuntimeImageDigest controls how declarative agent images are referenced. +// When true (the default), images are pinned by their link-time digest +// (registry/repository@sha256:...) for supply-chain integrity. This requires the +// configured registry to expose the same manifest digest as the upstream image. +// +// Operators mirroring images into a private registry that does not preserve the +// upstream manifest digest can set this to false so the controller emits a tag +// reference (registry/repository:tag) that is resolvable in their registry. +// See https://github.com/kagent-dev/kagent/issues/2055. +var PinRuntimeImageDigest = true + // DefaultSkillsInitImageConfig is the image config for the skills-init container // that clones skill repositories from Git and pulls OCI skill images. var DefaultSkillsInitImageConfig = ImageConfig{ diff --git a/go/core/internal/controller/translator/agent/deployments.go b/go/core/internal/controller/translator/agent/deployments.go index 2a55255697..fc9415fcca 100644 --- a/go/core/internal/controller/translator/agent/deployments.go +++ b/go/core/internal/controller/translator/agent/deployments.go @@ -125,12 +125,7 @@ func validateExtraContainers(containers []corev1.Container) error { func resolvePythonRuntimeImage(registry string) (string, error) { repo := DefaultImageConfig.Repository - if d := normalizeImageDigest(PythonADKImageDigest); d != "" { - return fmt.Sprintf("%s/%s@%s", registry, repo, d), nil - } - return "", fmt.Errorf( - "app image digest is not set at link time; rebuild the controller after pushing agent runtime images", - ) + return resolveRuntimeImage(registry, repo, PythonADKImageDigest, "app") } func resolveGoRuntimeImage(registry string, full bool) (string, error) { @@ -141,13 +136,32 @@ func resolveGoRuntimeImage(registry string, full bool) (string, error) { digest = GoADKFullImageDigest imageLabel = "golang-adk-full" } - if d := normalizeImageDigest(digest); d != "" { - return fmt.Sprintf("%s/%s@%s", registry, repo, d), nil + return resolveRuntimeImage(registry, repo, digest, imageLabel) +} + +// resolveRuntimeImage builds the image reference for a declarative agent runtime. +// +// By default it pins the image by its link-time digest (registry/repository@sha256:...) +// for supply-chain integrity. When digest pinning is disabled via +// PinRuntimeImageDigest, it falls back to a tag reference (registry/repository:tag) +// so the image is resolvable in private registries that do not preserve the upstream +// manifest digest. See https://github.com/kagent-dev/kagent/issues/2055. +func resolveRuntimeImage(registry, repository, digest, imageLabel string) (string, error) { + cfg := ImageConfig{ + Registry: registry, + Repository: repository, + Tag: DefaultImageConfig.Tag, + } + if PinRuntimeImageDigest { + if normalizeImageDigest(digest) == "" { + return "", fmt.Errorf( + "%s image digest is not set at link time; rebuild the controller after pushing agent runtime images, or set --pin-runtime-image-digest=false to reference the image by tag", + imageLabel, + ) + } + cfg.Digest = digest } - return "", fmt.Errorf( - "%s image digest is not set at link time; rebuild the controller after pushing agent runtime images", - imageLabel, - ) + return cfg.PinnedImage(), nil } func resolveInlineDeployment(agent v1alpha2.AgentObject, mdd *modelDeploymentData) (*resolvedDeployment, error) { diff --git a/go/core/internal/controller/translator/agent/runtime_test.go b/go/core/internal/controller/translator/agent/runtime_test.go index f196415ed8..9763e4034f 100644 --- a/go/core/internal/controller/translator/agent/runtime_test.go +++ b/go/core/internal/controller/translator/agent/runtime_test.go @@ -487,3 +487,149 @@ func TestRuntime_CustomRepositoryPath_WithSkillsUsesFullTag(t *testing.T) { assert.Contains(t, container.Image, "my-registry.com/custom/golang-adk", "Image should use custom repository with golang-adk") assert.Contains(t, container.Image, "@sha256:test-go-full", "Go runtime with skills should use digest-pinned golang-adk-full image") } + +// withRuntimeImageTagReference disables digest pinning and sets a deterministic +// image tag so tests can assert the tag-based reference emitted for private +// registries that do not preserve upstream manifest digests. +// See https://github.com/kagent-dev/kagent/issues/2055. +func withRuntimeImageTagReference(t *testing.T) { + t.Helper() + originalPin := translator.PinRuntimeImageDigest + originalTag := translator.DefaultImageConfig.Tag + translator.PinRuntimeImageDigest = false + translator.DefaultImageConfig.Tag = "v-test" + t.Cleanup(func() { + translator.PinRuntimeImageDigest = originalPin + translator.DefaultImageConfig.Tag = originalTag + }) +} + +func TestRuntime_PythonRuntime_TagReferenceWhenDigestPinningDisabled(t *testing.T) { + withPythonRuntimeDigest(t) + withRuntimeImageTagReference(t) + ctx := context.Background() + + agent := &v1alpha2.Agent{ + ObjectMeta: metav1.ObjectMeta{ + Name: "test-python-agent", + Namespace: "test", + }, + Spec: v1alpha2.AgentSpec{ + Type: v1alpha2.AgentType_Declarative, + Declarative: &v1alpha2.DeclarativeAgentSpec{ + Runtime: v1alpha2.DeclarativeRuntime_Python, + SystemMessage: "Test Python agent", + ModelConfig: "test-model", + }, + }, + } + + modelConfig := &v1alpha2.ModelConfig{ + ObjectMeta: metav1.ObjectMeta{ + Name: "test-model", + Namespace: "test", + }, + Spec: v1alpha2.ModelConfigSpec{ + Provider: "OpenAI", + Model: "gpt-4o", + }, + } + + scheme := schemev1.Scheme + err := v1alpha2.AddToScheme(scheme) + require.NoError(t, err) + + kubeClient := fake.NewClientBuilder(). + WithScheme(scheme). + WithObjects(agent, modelConfig). + Build() + + defaultModel := types.NamespacedName{ + Namespace: "test", + Name: "test-model", + } + translatorInstance := translator.NewAdkApiTranslator(kubeClient, defaultModel, nil, "", nil) + + result, err := translator.TranslateAgent(ctx, translatorInstance, agent) + require.NoError(t, err) + require.NotNil(t, result) + + var deployment *appsv1.Deployment + for _, obj := range result.Manifest { + if dep, ok := obj.(*appsv1.Deployment); ok { + deployment = dep + break + } + } + require.NotNil(t, deployment, "Deployment should be in manifest") + + require.Len(t, deployment.Spec.Template.Spec.Containers, 1) + container := deployment.Spec.Template.Spec.Containers[0] + assert.Contains(t, container.Image, "/app:v-test", "Python runtime should reference the app image by tag when digest pinning is disabled") + assert.NotContains(t, container.Image, "@sha256:", "Image must not be digest-pinned when digest pinning is disabled") +} + +func TestRuntime_GoRuntime_TagReferenceWhenDigestPinningDisabled(t *testing.T) { + withGoRuntimeDigests(t) + withRuntimeImageTagReference(t) + ctx := context.Background() + + agent := &v1alpha2.Agent{ + ObjectMeta: metav1.ObjectMeta{ + Name: "test-go-agent", + Namespace: "test", + }, + Spec: v1alpha2.AgentSpec{ + Type: v1alpha2.AgentType_Declarative, + Declarative: &v1alpha2.DeclarativeAgentSpec{ + Runtime: v1alpha2.DeclarativeRuntime_Go, + SystemMessage: "Test Go agent", + ModelConfig: "test-model", + }, + }, + } + + modelConfig := &v1alpha2.ModelConfig{ + ObjectMeta: metav1.ObjectMeta{ + Name: "test-model", + Namespace: "test", + }, + Spec: v1alpha2.ModelConfigSpec{ + Provider: "OpenAI", + Model: "gpt-4o", + }, + } + + scheme := schemev1.Scheme + err := v1alpha2.AddToScheme(scheme) + require.NoError(t, err) + + kubeClient := fake.NewClientBuilder(). + WithScheme(scheme). + WithObjects(agent, modelConfig). + Build() + + defaultModel := types.NamespacedName{ + Namespace: "test", + Name: "test-model", + } + translatorInstance := translator.NewAdkApiTranslator(kubeClient, defaultModel, nil, "", nil) + + result, err := translator.TranslateAgent(ctx, translatorInstance, agent) + require.NoError(t, err) + require.NotNil(t, result) + + var deployment *appsv1.Deployment + for _, obj := range result.Manifest { + if dep, ok := obj.(*appsv1.Deployment); ok { + deployment = dep + break + } + } + require.NotNil(t, deployment, "Deployment should be in manifest") + + require.Len(t, deployment.Spec.Template.Spec.Containers, 1) + container := deployment.Spec.Template.Spec.Containers[0] + assert.Contains(t, container.Image, "/golang-adk:v-test", "Go runtime should reference the golang-adk image by tag when digest pinning is disabled") + assert.NotContains(t, container.Image, "@sha256:", "Image must not be digest-pinned when digest pinning is disabled") +} diff --git a/go/core/pkg/app/app.go b/go/core/pkg/app/app.go index 6ee09b0e8c..c16512ebc5 100644 --- a/go/core/pkg/app/app.go +++ b/go/core/pkg/app/app.go @@ -214,6 +214,7 @@ func (cfg *Config) SetFlags(commandLine *flag.FlagSet) { commandLine.StringVar(&agent_translator.DefaultImageConfig.PullPolicy, "image-pull-policy", agent_translator.DefaultImageConfig.PullPolicy, "The pull policy to use for the image.") commandLine.StringVar(&agent_translator.DefaultImageConfig.PullSecret, "image-pull-secret", "", "The pull secret name for the agent image.") commandLine.StringVar(&agent_translator.DefaultImageConfig.Repository, "image-repository", agent_translator.DefaultImageConfig.Repository, "The repository to use for the agent image.") + commandLine.BoolVar(&agent_translator.PinRuntimeImageDigest, "pin-runtime-image-digest", agent_translator.PinRuntimeImageDigest, "Pin declarative agent images by their link-time digest (registry/repository@sha256:...). Set to false to reference images by tag (registry/repository:tag), e.g. when mirroring into a private registry that does not preserve the upstream manifest digest. See https://github.com/kagent-dev/kagent/issues/2055.") commandLine.StringVar(&agent_translator.DefaultSkillsInitImageConfig.Registry, "skills-init-image-registry", agent_translator.DefaultSkillsInitImageConfig.Registry, "The registry to use for the skills init image.") commandLine.StringVar(&agent_translator.DefaultSkillsInitImageConfig.Tag, "skills-init-image-tag", agent_translator.DefaultSkillsInitImageConfig.Tag, "The tag to use for the skills init image.") commandLine.StringVar(&agent_translator.DefaultSkillsInitImageConfig.PullPolicy, "skills-init-image-pull-policy", agent_translator.DefaultSkillsInitImageConfig.PullPolicy, "The pull policy to use for the skills init image.") diff --git a/helm/kagent/templates/controller-configmap.yaml b/helm/kagent/templates/controller-configmap.yaml index 3f3dc04539..be4fc6bade 100644 --- a/helm/kagent/templates/controller-configmap.yaml +++ b/helm/kagent/templates/controller-configmap.yaml @@ -16,6 +16,7 @@ data: IMAGE_REGISTRY: {{ .Values.controller.agentImage.registry | default .Values.registry | quote }} IMAGE_REPOSITORY: {{ .Values.controller.agentImage.repository | quote }} IMAGE_TAG: {{ coalesce .Values.controller.agentImage.tag .Values.tag .Chart.Version | quote }} + PIN_RUNTIME_IMAGE_DIGEST: {{ .Values.controller.pinRuntimeImageDigest | quote }} SKILLS_INIT_IMAGE_PULL_POLICY: {{ .Values.controller.skillsInitImage.pullPolicy | default .Values.imagePullPolicy | quote }} SKILLS_INIT_IMAGE_REGISTRY: {{ .Values.controller.skillsInitImage.registry | default .Values.registry | quote }} SKILLS_INIT_IMAGE_REPOSITORY: {{ .Values.controller.skillsInitImage.repository | quote }} diff --git a/helm/kagent/tests/controller-configmap_test.yaml b/helm/kagent/tests/controller-configmap_test.yaml new file mode 100644 index 0000000000..d03218bfdd --- /dev/null +++ b/helm/kagent/tests/controller-configmap_test.yaml @@ -0,0 +1,20 @@ +suite: test controller configmap +templates: + - controller-configmap.yaml +tests: + - it: should pin agent images by digest by default + asserts: + - isKind: + of: ConfigMap + - equal: + path: data.PIN_RUNTIME_IMAGE_DIGEST + value: "true" + + - it: should reference agent images by tag when digest pinning is disabled + set: + controller: + pinRuntimeImageDigest: false + asserts: + - equal: + path: data.PIN_RUNTIME_IMAGE_DIGEST + value: "false" diff --git a/helm/kagent/values.yaml b/helm/kagent/values.yaml index 8a4434cb75..3183f2ae8e 100644 --- a/helm/kagent/values.yaml +++ b/helm/kagent/values.yaml @@ -181,6 +181,12 @@ controller: repository: kagent-dev/kagent/app tag: "" # Will default to global, then Chart version pullPolicy: "" + # -- Pin declarative agent images by their link-time digest + # (registry/repository@sha256:...). Set to false to reference agent images by + # tag (registry/repository:tag), e.g. when mirroring into a private registry + # that does not preserve the upstream manifest digest. + # See https://github.com/kagent-dev/kagent/issues/2055. + pinRuntimeImageDigest: true # -- The image used by the skills-init container to clone skills from Git and pull OCI skill images. skillsInitImage: registry: ""