Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions cmd/ateapi/internal/controlapi/workflow_pause.go
Original file line number Diff line number Diff line change
Expand Up @@ -137,6 +137,7 @@ func (s *CallAteletPauseStep) Execute(ctx context.Context, input *PauseInput, st
Name: ctr.Name,
Image: ctr.Image,
Command: ctr.Command,
Readyz: toAteletReadyz(ctr.Readyz),
}
for _, env := range ctr.Env {
var val string
Expand Down
1 change: 1 addition & 0 deletions cmd/ateapi/internal/controlapi/workflow_suspend.go
Original file line number Diff line number Diff line change
Expand Up @@ -139,6 +139,7 @@ func (s *CallAteletSuspendStep) Execute(ctx context.Context, input *SuspendInput
Name: ctr.Name,
Image: ctr.Image,
Command: ctr.Command,
Readyz: toAteletReadyz(ctr.Readyz),
}
for _, env := range ctr.Env {
var val string
Expand Down
18 changes: 18 additions & 0 deletions cmd/ateapi/internal/controlapi/workload_spec.go
Original file line number Diff line number Diff line change
Expand Up @@ -47,6 +47,7 @@ func workloadSpecFromActorTemplate(ctx context.Context, kubeClient kubernetes.In
Name: ctr.Name,
Image: ctr.Image,
Command: ctr.Command,
Readyz: toAteletReadyz(ctr.Readyz),
}
for _, env := range ctr.Env {
ateletEnv, err := resolver.resolve(ctx, ctr.Name, env)
Expand All @@ -63,6 +64,23 @@ func workloadSpecFromActorTemplate(ctx context.Context, kubeClient kubernetes.In
return workloadSpec, nil
}

// toAteletReadyz projects the CRD readyz field onto the ateletpb wire type.
// Returns nil when the source is nil so containers without a probe stay
// unchanged on the wire.
func toAteletReadyz(in *atev1alpha1.ContainerReadyz) *ateletpb.Readyz {
if in == nil {
return nil
}
out := &ateletpb.Readyz{}
if in.HTTPGet != nil {
out.HttpGet = &ateletpb.HTTPGetAction{
Path: in.HTTPGet.Path,
Port: in.HTTPGet.Port,
}
}
return out
}

type envResolver struct {
kubeClient kubernetes.Interface
namespace string
Expand Down
44 changes: 44 additions & 0 deletions cmd/ateapi/internal/controlapi/workload_spec_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -98,6 +98,50 @@ func TestWorkloadSpecFromActorTemplateResolvesValueFromEnv(t *testing.T) {
}
}

func TestWorkloadSpecFromActorTemplatePropagatesReadyz(t *testing.T) {
ctx := context.Background()
got, err := workloadSpecFromActorTemplate(ctx, fake.NewSimpleClientset(), nil, &atev1alpha1.ActorTemplate{
ObjectMeta: metav1.ObjectMeta{Name: "tmpl-readyz", Namespace: "agent-ns"},
Spec: atev1alpha1.ActorTemplateSpec{
Containers: []atev1alpha1.Container{
{
Name: "with-probe",
Image: "main",
Readyz: &atev1alpha1.ContainerReadyz{
HTTPGet: &atev1alpha1.HTTPGetAction{Path: "/health", Port: 8080},
},
},
{
Name: "without-probe",
Image: "side",
},
},
},
})
if err != nil {
t.Fatalf("workloadSpecFromActorTemplate failed: %v", err)
}

want := &ateletpb.WorkloadSpec{
Containers: []*ateletpb.Container{
{
Name: "with-probe",
Image: "main",
Readyz: &ateletpb.Readyz{
HttpGet: &ateletpb.HTTPGetAction{Path: "/health", Port: 8080},
},
},
{
Name: "without-probe",
Image: "side",
},
},
}
if diff := cmp.Diff(want, got, protocmp.Transform()); diff != "" {
t.Errorf("WorkloadSpec mismatch (-want +got):\n%s", diff)
}
}

func TestWorkloadSpecFromActorTemplateOptionalSecretKeyRefSkipsMissingSecret(t *testing.T) {
optional := true
got, err := workloadSpecFromActorTemplate(context.Background(), fake.NewSimpleClientset(), nil, &atev1alpha1.ActorTemplate{
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -32,6 +32,14 @@ import (

const (
GoldenSnapshotCreationReason = "GoldenSnapshotCreation"

// goldenSnapshotWarmup is the default wall-clock delay between resuming
// the golden actor and taking its snapshot, used as a coarse "give the
// workload time to finish initializing" fallback for templates without
// a readiness probe. Templates whose containers all declare readyz skip
// this wait — ResumeActor only returns once readyz reports 200, so the
// workload is already initialized by the time we get here.
goldenSnapshotWarmup = 20 * time.Second
)

type ActorTemplateReconciler struct {
Expand Down Expand Up @@ -109,7 +117,7 @@ func (r *ActorTemplateReconciler) Reconcile(ctx context.Context, req ctrl.Reques
}

at.Status.Phase = atev1alpha1.PhaseWaitGoldenActor
at.Status.TakeGoldenSnapshotAt = metav1.NewTime(time.Now().Add(20 * time.Second))
at.Status.TakeGoldenSnapshotAt = metav1.NewTime(time.Now().Add(goldenSnapshotWarmupFor(at)))
if err := r.Status().Update(ctx, at); err != nil {
return ctrl.Result{}, err
}
Expand Down Expand Up @@ -163,3 +171,20 @@ func (r *ActorTemplateReconciler) Reconcile(ctx context.Context, req ctrl.Reques
func (r *ActorTemplateReconciler) SetupWithManager(mgr ctrl.Manager) error {
return ctrl.NewControllerManagedBy(mgr).For(&atev1alpha1.ActorTemplate{}).Complete(r)
}

// goldenSnapshotWarmupFor returns 0 when every container in the template has
// a readyz probe (so ResumeActor already blocked until the workload reported
// 200), and the default warmup otherwise. A template with no containers
// keeps the default — there is nothing to gate on.
func goldenSnapshotWarmupFor(at *atev1alpha1.ActorTemplate) time.Duration {
containers := at.Spec.Containers
if len(containers) == 0 {
return goldenSnapshotWarmup
}
for i := range containers {
if containers[i].Readyz == nil {
return goldenSnapshotWarmup
}
}
return 0
}
Original file line number Diff line number Diff line change
@@ -0,0 +1,84 @@
// Copyright 2026 Google LLC
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.

package controllers

import (
"testing"

atev1alpha1 "github.com/agent-substrate/substrate/pkg/api/v1alpha1"
)

func TestGoldenSnapshotWarmupFor(t *testing.T) {
probe := &atev1alpha1.ContainerReadyz{
HTTPGet: &atev1alpha1.HTTPGetAction{Port: 80},
}

tests := []struct {
name string
containers []atev1alpha1.Container
wantZero bool
}{
{
name: "no containers keeps default warmup",
containers: nil,
wantZero: false,
},
{
name: "all containers have readyz skips warmup",
containers: []atev1alpha1.Container{
{Name: "a", Readyz: probe},
{Name: "b", Readyz: probe},
},
wantZero: true,
},
{
name: "single container with readyz skips warmup",
containers: []atev1alpha1.Container{
{Name: "a", Readyz: probe},
},
wantZero: true,
},
{
name: "mixed containers keep warmup",
containers: []atev1alpha1.Container{
{Name: "a", Readyz: probe},
{Name: "b"},
},
wantZero: false,
},
{
name: "no readyz anywhere keeps warmup",
containers: []atev1alpha1.Container{
{Name: "a"},
{Name: "b"},
},
wantZero: false,
},
}
for _, tt := range tests {
t.Run(tt.name, func(t *testing.T) {
at := &atev1alpha1.ActorTemplate{
Spec: atev1alpha1.ActorTemplateSpec{Containers: tt.containers},
}
got := goldenSnapshotWarmupFor(at)
if tt.wantZero && got != 0 {
t.Errorf("goldenSnapshotWarmupFor = %v, want 0", got)
}
if !tt.wantZero && got != goldenSnapshotWarmup {
t.Errorf("goldenSnapshotWarmupFor = %v, want %v", got, goldenSnapshotWarmup)
}
})
}
}
24 changes: 22 additions & 2 deletions cmd/atelet/main.go
Original file line number Diff line number Diff line change
Expand Up @@ -705,11 +705,31 @@ func (s *AteomHerder) dialAteom(ctx context.Context, targetAteomUid string) (ate
}

// buildAteomWorkloadSpec projects the atelet-facing workload spec onto
// the ateom-facing one — currently just the container names.
// the ateom-facing one.
func buildAteomWorkloadSpec(spec *ateletpb.WorkloadSpec) *ateompb.WorkloadSpec {
out := &ateompb.WorkloadSpec{}
for _, ctr := range spec.GetContainers() {
out.Containers = append(out.Containers, &ateompb.Container{Name: ctr.GetName()})
out.Containers = append(out.Containers, &ateompb.Container{
Name: ctr.GetName(),
Readyz: toAteomReadyz(ctr.GetReadyz()),
})
}
return out
}

// toAteomReadyz converts an ateletpb readyz probe into the ateompb wire
// type. Returns nil when the source is nil so containers without a probe
// stay unchanged on the wire to ateom.
func toAteomReadyz(in *ateletpb.Readyz) *ateompb.Readyz {
if in == nil {
return nil
}
out := &ateompb.Readyz{}
if hg := in.GetHttpGet(); hg != nil {
out.HttpGet = &ateompb.HTTPGetAction{
Path: hg.GetPath(),
Port: hg.GetPort(),
}
}
return out
}
Expand Down
36 changes: 36 additions & 0 deletions cmd/atelet/main_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -28,8 +28,11 @@ import (

"github.com/agent-substrate/substrate/internal/ateompath"
"github.com/agent-substrate/substrate/internal/proto/ateletpb"
"github.com/agent-substrate/substrate/internal/proto/ateompb"
"github.com/google/go-cmp/cmp"
"google.golang.org/grpc/codes"
"google.golang.org/grpc/status"
"google.golang.org/protobuf/testing/protocmp"
)

func TestWriteFileAtomic(t *testing.T) {
Expand Down Expand Up @@ -377,3 +380,36 @@ func TestRPCBoundariesReject(t *testing.T) {
wantInvalidArgument(t, "Restore", err)
})
}

func TestBuildAteomWorkloadSpecForwardsReadyz(t *testing.T) {
in := &ateletpb.WorkloadSpec{
PauseImage: "pause",
Containers: []*ateletpb.Container{
{
Name: "with-probe",
Image: "main",
Readyz: &ateletpb.Readyz{
HttpGet: &ateletpb.HTTPGetAction{Path: "/health", Port: 8080},
},
},
{
Name: "without-probe",
},
},
}
want := &ateompb.WorkloadSpec{
Containers: []*ateompb.Container{
{
Name: "with-probe",
Readyz: &ateompb.Readyz{
HttpGet: &ateompb.HTTPGetAction{Path: "/health", Port: 8080},
},
},
{Name: "without-probe"},
},
}
got := buildAteomWorkloadSpec(in)
if diff := cmp.Diff(want, got, protocmp.Transform()); diff != "" {
t.Errorf("buildAteomWorkloadSpec mismatch (-want +got):\n%s", diff)
}
}
11 changes: 11 additions & 0 deletions cmd/ateom-gvisor/main.go
Original file line number Diff line number Diff line change
Expand Up @@ -33,6 +33,7 @@ import (
"github.com/agent-substrate/substrate/internal/ateompath"
"github.com/agent-substrate/substrate/internal/contextlogging"
"github.com/agent-substrate/substrate/internal/proto/ateompb"
"github.com/agent-substrate/substrate/internal/readyz"
"github.com/agent-substrate/substrate/internal/serverboot"
"github.com/agent-substrate/substrate/internal/version"
"github.com/google/nftables"
Expand Down Expand Up @@ -227,6 +228,11 @@ func (s *AteomService) RunWorkload(ctx context.Context, req *ateompb.RunWorkload
}
}

// Block until every readyz-enabled container reports 200.
if err := readyz.WaitAll(ctx, req.GetSpec().GetContainers(), actorVethIP); err != nil {
return nil, fmt.Errorf("while waiting for container readyz: %w", err)
}

s.actorLogger.EmitLifecycleLog("Actor started", req.GetActorId(), req.GetActorTemplateName(), req.GetActorTemplateNamespace())

return &ateompb.RunWorkloadResponse{}, nil
Expand Down Expand Up @@ -383,6 +389,11 @@ func (s *AteomService) RestoreWorkload(ctx context.Context, req *ateompb.Restore
}
}

// Block until every readyz-enabled container reports 200.
if err := readyz.WaitAll(ctx, req.GetSpec().GetContainers(), actorVethIP); err != nil {
return nil, fmt.Errorf("while waiting for container readyz: %w", err)
}

s.actorLogger.EmitLifecycleLog("Actor restored", req.GetActorId(), req.GetActorTemplateName(), req.GetActorTemplateNamespace())

return &ateompb.RestoreWorkloadResponse{}, nil
Expand Down
6 changes: 6 additions & 0 deletions cmd/ateom-microvm/restore.go
Original file line number Diff line number Diff line change
Expand Up @@ -30,6 +30,7 @@ import (
"github.com/agent-substrate/substrate/cmd/ateom-microvm/internal/kata"
"github.com/agent-substrate/substrate/internal/ateompath"
"github.com/agent-substrate/substrate/internal/proto/ateompb"
"github.com/agent-substrate/substrate/internal/readyz"
"google.golang.org/grpc/codes"
"google.golang.org/grpc/status"
)
Expand Down Expand Up @@ -184,6 +185,11 @@ func (s *AteomService) RestoreWorkload(ctx context.Context, req *ateompb.Restore
return nil, fmt.Errorf("while resuming restored guest: %w", err)
}

// Block until every readyz-enabled container reports 200.
if err := readyz.WaitAll(ctx, containers, actorVethIP); err != nil {
return nil, fmt.Errorf("while waiting for container readyz: %w", err)
}

ra := &runningActor{chCmd: chCmd, apiSocket: apiSocket, baseID: srcID, restoreSourceDir: restoreDir}

// Re-attach stdout/stderr forwarding: the restored guest's container + kata-agent
Expand Down
6 changes: 6 additions & 0 deletions cmd/ateom-microvm/run.go
Original file line number Diff line number Diff line change
Expand Up @@ -31,6 +31,7 @@ import (
"github.com/agent-substrate/substrate/cmd/ateom-microvm/internal/third_party/kata/agentpb"
"github.com/agent-substrate/substrate/internal/ateompath"
"github.com/agent-substrate/substrate/internal/proto/ateompb"
"github.com/agent-substrate/substrate/internal/readyz"
specs "github.com/opencontainers/runtime-spec/specs-go"
"golang.org/x/sys/unix"
"google.golang.org/grpc/codes"
Expand Down Expand Up @@ -340,6 +341,11 @@ func (s *AteomService) RunWorkload(ctx context.Context, req *ateompb.RunWorkload
return nil, err
}

// Block until every readyz-enabled container reports 200.
if err := readyz.WaitAll(ctx, containers, actorVethIP); err != nil {
return nil, fmt.Errorf("while waiting for container readyz: %w", err)
}

ra := &runningActor{chCmd: chCmd, apiSocket: apiSocket, containerName: containerName, baseID: id, logAgent: ac}
s.running[id] = ra

Expand Down
4 changes: 4 additions & 0 deletions demos/counter/counter-microvm.yaml.tmpl
Original file line number Diff line number Diff line change
Expand Up @@ -99,6 +99,10 @@ spec:
- name: counter
image: ko://github.com/agent-substrate/substrate/demos/counter
command: ["/ko-app/counter"]
readyz:
httpGet:
path: /readyz
port: 80
workerSelector:
matchLabels:
workload: counter-microvm
Expand Down
Loading
Loading