From f6b7405812ac65aab8b68f077b8105aaf895a91a Mon Sep 17 00:00:00 2001 From: dberkov Date: Fri, 12 Jun 2026 18:44:29 -0700 Subject: [PATCH 01/17] Add VolumeSource to API --- .../generated/ate.dev_actortemplates.yaml | 83 +++++ pkg/api/v1alpha1/actortemplate_types.go | 86 +++++ .../v1alpha1/actortemplate_validation_test.go | 309 ++++++++++++++++++ pkg/api/v1alpha1/zz_generated.deepcopy.go | 80 ++++- 4 files changed, 557 insertions(+), 1 deletion(-) diff --git a/manifests/ate-install/generated/ate.dev_actortemplates.yaml b/manifests/ate-install/generated/ate.dev_actortemplates.yaml index aec84f9a..bcff9d58 100644 --- a/manifests/ate-install/generated/ate.dev_actortemplates.yaml +++ b/manifests/ate-install/generated/ate.dev_actortemplates.yaml @@ -179,6 +179,29 @@ spec: maximum: 65535 minimum: 1 type: integer + volumeMounts: + description: volumeMounts define the volumes to mount + into this container. + items: + description: VolumeMount describes a mounting of a + Volume within a actor. + properties: + mountPath: + description: |- + Path within the actor at which the volume should be mounted. Must + not contain ':'. + maxLength: 4096 + type: string + name: + description: This must match the Name of a Volume. + maxLength: 63 + type: string + required: + - mountPath + - name + type: object + maxItems: 32 + type: array required: - port type: object @@ -230,9 +253,56 @@ spec: description: Location to store snapshots in. minLength: 1 type: string + onCommit: + description: |- + OnCommit specifies what to include in the snapshot when a commit is requested. + If not provided, the "process" behavior is used by default. + The OnCommit must be a subset of the OnPause content. + + For example: + - if OnPause is "process", then OnCommit can be "process" or "homedir". + - if OnPause is "homedir", then OnCommit must be "homedir". + enum: + - process + - homedir + type: string + onPause: + description: |- + OnPause specifies what to include in the snapshot when the actor is paused. + If not provided, the "process" behavior is used by default. + enum: + - process + - homedir + type: string required: - location type: object + x-kubernetes-validations: + - message: OnCommit must be a subset of OnPause + rule: '(has(self.onPause) ? self.onPause : ''process'') == ''process'' + || (has(self.onCommit) ? self.onCommit : ''process'') == (has(self.onPause) + ? self.onPause : ''process'')' + volumes: + description: Volumes defines the volumes to mount into all containers + in the actor. + items: + properties: + homeDir: + description: homeDir represents a directory on rootfs that will + participate in snapshots. + type: object + name: + description: name of the volume. + maxLength: 63 + type: string + required: + - name + type: object + x-kubernetes-validations: + - message: exactly one of the fields in [homeDir] must be set + rule: '[has(self.homeDir)].filter(x,x==true).size() == 1' + maxItems: 32 + type: array workerSelector: description: |- WorkerSelector restricts which worker pools actors from this template may @@ -291,6 +361,19 @@ spec: x-kubernetes-validations: - message: Spec is immutable rule: self == oldSelf + - message: A container may mount at most one HomeDir-typed volume + rule: '!has(self.containers) || self.containers.all(c, !has(c.volumeMounts) + || c.volumeMounts.filter(vm, has(self.volumes) && self.volumes.exists(v, + v.name == vm.name && has(v.homeDir))).size() <= 1)' + - message: 'MountPath for a HomeDir volume must be a clean absolute Unix + path: must start with ''/'', not be ''/'', and contain no '':'', ''..'', + ''.'', ''//'', trailing ''/'', or control characters' + rule: '!has(self.containers) || self.containers.all(c, !has(c.volumeMounts) + || c.volumeMounts.all(vm, !has(self.volumes) || !self.volumes.exists(v, + v.name == vm.name && has(v.homeDir)) || (vm.mountPath.startsWith(''/'') + && size(vm.mountPath) > 1 && !vm.mountPath.endsWith(''/'') && !vm.mountPath.contains(''//'') + && !vm.mountPath.contains('':'') && !vm.mountPath.matches(''[\x00-\x1f\x7f]'') + && !vm.mountPath.matches(''(^|/)[.][.]?(/|$)''))))' status: description: status is the observed state of ActorTemplate properties: diff --git a/pkg/api/v1alpha1/actortemplate_types.go b/pkg/api/v1alpha1/actortemplate_types.go index 12e8754c..f0800129 100644 --- a/pkg/api/v1alpha1/actortemplate_types.go +++ b/pkg/api/v1alpha1/actortemplate_types.go @@ -29,6 +29,48 @@ const ( PhaseFailed PhaseType = "Failed" ) +// Represents a directory on rootfs that will participate in snapshots. +type HomedirVolumeSource struct { +} + +// Represents the source of a volume to mount. +// Exactly one of its members must be specified. +// +// When adding a new source type, list it in the ExactlyOneOf marker below. +// +// +kubebuilder:validation:ExactlyOneOf={homeDir} +type VolumeSource struct { + // homeDir represents a directory on rootfs that will participate in snapshots. + // +optional + HomeDir *HomedirVolumeSource `json:"homeDir,omitempty" protobuf:"bytes,2,opt,name=homeDir"` +} + +type Volume struct { + // name of the volume. + // + // +required + // +kubebuilder:validation:MaxLength=63 + Name string `json:"name" protobuf:"bytes,1,opt,name=name"` + + // volumeSource represents the location and type of the mounted volume. + VolumeSource `json:",inline" protobuf:"bytes,2,opt,name=volumeSource"` +} + +// VolumeMount describes a mounting of a Volume within a actor. +type VolumeMount struct { + // This must match the Name of a Volume. + // + // +required + // +kubebuilder:validation:MaxLength=63 + Name string `json:"name" protobuf:"bytes,1,opt,name=name"` + // Path within the actor at which the volume should be mounted. Must + // not contain ':'. + // + // +required + // +kubebuilder:validation:MaxLength=4096 + MountPath string `json:"mountPath" protobuf:"bytes,3,opt,name=mountPath"` +} + // A single application container that you want to run within a WorkerPool. type Container struct { // Name of the container. @@ -94,6 +136,12 @@ type HTTPGetAction struct { // +kubebuilder:validation:Minimum=1 // +kubebuilder:validation:Maximum=65535 Port int32 `json:"port"` + + // volumeMounts define the volumes to mount into this container. + // + // +optional + // +kubebuilder:validation:MaxItems=32 + VolumeMounts []VolumeMount `json:"volumeMounts,omitempty"` } // EnvVar represents an environment variable supplied to a container in an @@ -163,15 +211,47 @@ type SecretKeySelector struct { Optional *bool `json:"optional,omitempty"` } +// SnapshotScope defines what components to include in a snapshot. +// +kubebuilder:validation:Enum=process;homedir +type SnapshotScope string + +const ( + // Process memory plus the full rootfs (homedir included). + SnapshotScopeProcess SnapshotScope = "process" + // Only the homedir; memory and the rest of rootfs are excluded. + SnapshotScopeHomedir SnapshotScope = "homedir" +) + +// +kubebuilder:validation:XValidation:rule="(has(self.onPause) ? self.onPause : 'process') == 'process' || (has(self.onCommit) ? self.onCommit : 'process') == (has(self.onPause) ? self.onPause : 'process')",message="OnCommit must be a subset of OnPause" type SnapshotsConfig struct { // Location to store snapshots in. // // +required // +kubebuilder:validation:MinLength=1 Location string `json:"location"` + + // OnPause specifies what to include in the snapshot when the actor is paused. + // If not provided, the "process" behavior is used by default. + // + // +optional + OnPause SnapshotScope `json:"onPause,omitempty"` + + // OnCommit specifies what to include in the snapshot when a commit is requested. + // If not provided, the "process" behavior is used by default. + // The OnCommit must be a subset of the OnPause content. + // + // For example: + // - if OnPause is "process", then OnCommit can be "process" or "homedir". + // - if OnPause is "homedir", then OnCommit must be "homedir". + // + // +optional + OnCommit SnapshotScope `json:"onCommit,omitempty"` } // ActorTemplateSpec defined desired spec of an actor. +// +// +kubebuilder:validation:XValidation:rule="!has(self.containers) || self.containers.all(c, !has(c.volumeMounts) || c.volumeMounts.filter(vm, has(self.volumes) && self.volumes.exists(v, v.name == vm.name && has(v.homeDir))).size() <= 1)",message="A container may mount at most one HomeDir-typed volume" +// +kubebuilder:validation:XValidation:rule="!has(self.containers) || self.containers.all(c, !has(c.volumeMounts) || c.volumeMounts.all(vm, !has(self.volumes) || !self.volumes.exists(v, v.name == vm.name && has(v.homeDir)) || (vm.mountPath.startsWith('/') && size(vm.mountPath) > 1 && !vm.mountPath.endsWith('/') && !vm.mountPath.contains('//') && !vm.mountPath.contains(':') && !vm.mountPath.matches('[\\x00-\\x1f\\x7f]') && !vm.mountPath.matches('(^|/)[.][.]?(/|$)'))))",message="MountPath for a HomeDir volume must be a clean absolute Unix path: must start with '/', not be '/', and contain no ':', '..', '.', '//', trailing '/', or control characters" type ActorTemplateSpec struct { // PauseImage is the container to use as the root sandbox container. // @@ -223,6 +303,12 @@ type ActorTemplateSpec struct { // // +optional WorkerSelector *metav1.LabelSelector `json:"workerSelector,omitempty"` + + // Volumes defines the volumes to mount into all containers in the actor. + // + // +optional + // +kubebuilder:validation:MaxItems=32 + Volumes []Volume `json:"volumes,omitempty"` } // TODO: add validation diff --git a/pkg/api/v1alpha1/actortemplate_validation_test.go b/pkg/api/v1alpha1/actortemplate_validation_test.go index 1558c9dd..c793add2 100644 --- a/pkg/api/v1alpha1/actortemplate_validation_test.go +++ b/pkg/api/v1alpha1/actortemplate_validation_test.go @@ -480,6 +480,315 @@ func TestActorTemplateValidation(t *testing.T) { }, wantErr: true, errMsg: "Unsupported value", + }, { + name: "SnapshotsConfig: OnPause=process, OnCommit=process", + mutate: func(at *ActorTemplate) { + at.Spec.SnapshotsConfig.OnPause = SnapshotScopeProcess + at.Spec.SnapshotsConfig.OnCommit = SnapshotScopeProcess + }, + wantErr: false, + }, { + name: "SnapshotsConfig: OnPause=process, OnCommit=homedir", + mutate: func(at *ActorTemplate) { + at.Spec.SnapshotsConfig.OnPause = SnapshotScopeProcess + at.Spec.SnapshotsConfig.OnCommit = SnapshotScopeHomedir + }, + wantErr: false, + }, { + name: "SnapshotsConfig: OnPause=homedir, OnCommit=homedir", + mutate: func(at *ActorTemplate) { + at.Spec.SnapshotsConfig.OnPause = SnapshotScopeHomedir + at.Spec.SnapshotsConfig.OnCommit = SnapshotScopeHomedir + }, + wantErr: false, + }, { + name: "SnapshotsConfig: OnPause=homedir, OnCommit=process (invalid)", + mutate: func(at *ActorTemplate) { + at.Spec.SnapshotsConfig.OnPause = SnapshotScopeHomedir + at.Spec.SnapshotsConfig.OnCommit = SnapshotScopeProcess + }, + wantErr: true, + errMsg: "OnCommit must be a subset of OnPause", + }, { + name: "SnapshotsConfig: OnPause=homedir, OnCommit unset (defaults to process, invalid)", + mutate: func(at *ActorTemplate) { + at.Spec.SnapshotsConfig.OnPause = SnapshotScopeHomedir + }, + wantErr: true, + errMsg: "OnCommit must be a subset of OnPause", + }, { + name: "SnapshotsConfig: OnPause unset (defaults to process), OnCommit=homedir", + mutate: func(at *ActorTemplate) { + at.Spec.SnapshotsConfig.OnCommit = SnapshotScopeHomedir + }, + wantErr: false, + }, { + name: "SnapshotsConfig: OnPause invalid enum value", + mutate: func(at *ActorTemplate) { + at.Spec.SnapshotsConfig.OnPause = SnapshotScope("bogus") + }, + wantErr: true, + errMsg: "Unsupported value", + }, { + name: "SnapshotsConfig: OnCommit invalid enum value", + mutate: func(at *ActorTemplate) { + at.Spec.SnapshotsConfig.OnCommit = SnapshotScope("bogus") + }, + wantErr: true, + errMsg: "Unsupported value", + }, { + name: "Volumes: 1 HomeDir mount is valid", + mutate: func(at *ActorTemplate) { + at.Spec.Volumes = []Volume{ + {Name: "vol1", VolumeSource: VolumeSource{HomeDir: &HomedirVolumeSource{}}}, + } + at.Spec.Containers[0].VolumeMounts = []VolumeMount{ + {Name: "vol1", MountPath: "/home"}, + } + }, + wantErr: false, + }, { + name: "Volumes: 2 HomeDir mounts in same container is invalid", + mutate: func(at *ActorTemplate) { + at.Spec.Volumes = []Volume{ + {Name: "vol1", VolumeSource: VolumeSource{HomeDir: &HomedirVolumeSource{}}}, + {Name: "vol2", VolumeSource: VolumeSource{HomeDir: &HomedirVolumeSource{}}}, + } + at.Spec.Containers[0].VolumeMounts = []VolumeMount{ + {Name: "vol1", MountPath: "/home1"}, + {Name: "vol2", MountPath: "/home2"}, + } + }, + wantErr: true, + errMsg: "A container may mount at most one HomeDir-typed volume", + }, { + name: "Volumes: 2 HomeDir mounts in different containers is valid", + mutate: func(at *ActorTemplate) { + at.Spec.Volumes = []Volume{ + {Name: "vol1", VolumeSource: VolumeSource{HomeDir: &HomedirVolumeSource{}}}, + {Name: "vol2", VolumeSource: VolumeSource{HomeDir: &HomedirVolumeSource{}}}, + } + at.Spec.Containers = append(at.Spec.Containers, Container{ + Name: "sidecar", + Image: "busybox@sha256:326e0e090a9a4057e62a1b94236e7a2df2f2f76722f67232e0e47854e4df9c53", + VolumeMounts: []VolumeMount{ + {Name: "vol2", MountPath: "/home2"}, + }, + }) + at.Spec.Containers[0].VolumeMounts = []VolumeMount{ + {Name: "vol1", MountPath: "/home1"}, + } + }, + wantErr: false, + }, { + name: "Volumes: VolumeSource with no source set is invalid", + mutate: func(at *ActorTemplate) { + at.Spec.Volumes = []Volume{ + {Name: "vol1", VolumeSource: VolumeSource{}}, + } + }, + wantErr: true, + errMsg: "exactly one of the fields in [homeDir] must be set", + }, { + name: "Volumes: VolumeSource with no source set is invalid (mixed with a valid HomeDir volume)", + mutate: func(at *ActorTemplate) { + at.Spec.Volumes = []Volume{ + {Name: "vol1", VolumeSource: VolumeSource{HomeDir: &HomedirVolumeSource{}}}, + {Name: "vol2", VolumeSource: VolumeSource{}}, + } + at.Spec.Containers[0].VolumeMounts = []VolumeMount{ + {Name: "vol1", MountPath: "/home1"}, + {Name: "vol2", MountPath: "/mnt"}, + } + }, + wantErr: true, + errMsg: "exactly one of the fields in [homeDir] must be set", + }, { + name: "Volumes: HomeDir MountPath with nested absolute path is valid", + mutate: func(at *ActorTemplate) { + at.Spec.Volumes = []Volume{ + {Name: "vol1", VolumeSource: VolumeSource{HomeDir: &HomedirVolumeSource{}}}, + } + at.Spec.Containers[0].VolumeMounts = []VolumeMount{ + {Name: "vol1", MountPath: "/home/user/data"}, + } + }, + wantErr: false, + }, { + name: "Volumes: HomeDir MountPath as bare root is invalid", + mutate: func(at *ActorTemplate) { + at.Spec.Volumes = []Volume{ + {Name: "vol1", VolumeSource: VolumeSource{HomeDir: &HomedirVolumeSource{}}}, + } + at.Spec.Containers[0].VolumeMounts = []VolumeMount{ + {Name: "vol1", MountPath: "/"}, + } + }, + wantErr: true, + errMsg: "MountPath for a HomeDir volume must be a clean absolute Unix path", + }, { + name: "Volumes: HomeDir MountPath with relative path is invalid", + mutate: func(at *ActorTemplate) { + at.Spec.Volumes = []Volume{ + {Name: "vol1", VolumeSource: VolumeSource{HomeDir: &HomedirVolumeSource{}}}, + } + at.Spec.Containers[0].VolumeMounts = []VolumeMount{ + {Name: "vol1", MountPath: "home/user"}, + } + }, + wantErr: true, + errMsg: "MountPath for a HomeDir volume must be a clean absolute Unix path", + }, { + name: "Volumes: HomeDir MountPath as empty string is invalid", + mutate: func(at *ActorTemplate) { + at.Spec.Volumes = []Volume{ + {Name: "vol1", VolumeSource: VolumeSource{HomeDir: &HomedirVolumeSource{}}}, + } + at.Spec.Containers[0].VolumeMounts = []VolumeMount{ + {Name: "vol1", MountPath: ""}, + } + }, + wantErr: true, + errMsg: "MountPath for a HomeDir volume must be a clean absolute Unix path", + }, { + name: "Volumes: HomeDir MountPath with leading whitespace is invalid", + mutate: func(at *ActorTemplate) { + at.Spec.Volumes = []Volume{ + {Name: "vol1", VolumeSource: VolumeSource{HomeDir: &HomedirVolumeSource{}}}, + } + at.Spec.Containers[0].VolumeMounts = []VolumeMount{ + {Name: "vol1", MountPath: " /home"}, + } + }, + wantErr: true, + errMsg: "MountPath for a HomeDir volume must be a clean absolute Unix path", + }, { + name: "Volumes: HomeDir MountPath with trailing slash is invalid", + mutate: func(at *ActorTemplate) { + at.Spec.Volumes = []Volume{ + {Name: "vol1", VolumeSource: VolumeSource{HomeDir: &HomedirVolumeSource{}}}, + } + at.Spec.Containers[0].VolumeMounts = []VolumeMount{ + {Name: "vol1", MountPath: "/home/"}, + } + }, + wantErr: true, + errMsg: "MountPath for a HomeDir volume must be a clean absolute Unix path", + }, { + name: "Volumes: HomeDir MountPath with consecutive slashes is invalid", + mutate: func(at *ActorTemplate) { + at.Spec.Volumes = []Volume{ + {Name: "vol1", VolumeSource: VolumeSource{HomeDir: &HomedirVolumeSource{}}}, + } + at.Spec.Containers[0].VolumeMounts = []VolumeMount{ + {Name: "vol1", MountPath: "/home//user"}, + } + }, + wantErr: true, + errMsg: "MountPath for a HomeDir volume must be a clean absolute Unix path", + }, { + name: "Volumes: HomeDir MountPath containing ':' is invalid", + mutate: func(at *ActorTemplate) { + at.Spec.Volumes = []Volume{ + {Name: "vol1", VolumeSource: VolumeSource{HomeDir: &HomedirVolumeSource{}}}, + } + at.Spec.Containers[0].VolumeMounts = []VolumeMount{ + {Name: "vol1", MountPath: "/ho:me"}, + } + }, + wantErr: true, + errMsg: "MountPath for a HomeDir volume must be a clean absolute Unix path", + }, { + name: "Volumes: HomeDir MountPath with '..' component is invalid", + mutate: func(at *ActorTemplate) { + at.Spec.Volumes = []Volume{ + {Name: "vol1", VolumeSource: VolumeSource{HomeDir: &HomedirVolumeSource{}}}, + } + at.Spec.Containers[0].VolumeMounts = []VolumeMount{ + {Name: "vol1", MountPath: "/home/../etc"}, + } + }, + wantErr: true, + errMsg: "MountPath for a HomeDir volume must be a clean absolute Unix path", + }, { + name: "Volumes: HomeDir MountPath with trailing '..' is invalid", + mutate: func(at *ActorTemplate) { + at.Spec.Volumes = []Volume{ + {Name: "vol1", VolumeSource: VolumeSource{HomeDir: &HomedirVolumeSource{}}}, + } + at.Spec.Containers[0].VolumeMounts = []VolumeMount{ + {Name: "vol1", MountPath: "/home/.."}, + } + }, + wantErr: true, + errMsg: "MountPath for a HomeDir volume must be a clean absolute Unix path", + }, { + name: "Volumes: HomeDir MountPath with '.' component is invalid", + mutate: func(at *ActorTemplate) { + at.Spec.Volumes = []Volume{ + {Name: "vol1", VolumeSource: VolumeSource{HomeDir: &HomedirVolumeSource{}}}, + } + at.Spec.Containers[0].VolumeMounts = []VolumeMount{ + {Name: "vol1", MountPath: "/home/./user"}, + } + }, + wantErr: true, + errMsg: "MountPath for a HomeDir volume must be a clean absolute Unix path", + }, { + name: "Volumes: HomeDir MountPath containing dotfile is valid (only bare '.' / '..' components are rejected)", + mutate: func(at *ActorTemplate) { + at.Spec.Volumes = []Volume{ + {Name: "vol1", VolumeSource: VolumeSource{HomeDir: &HomedirVolumeSource{}}}, + } + at.Spec.Containers[0].VolumeMounts = []VolumeMount{ + {Name: "vol1", MountPath: "/home/.config"}, + } + }, + wantErr: false, + }, { + name: "Volumes: HomeDir MountPath with NUL byte is invalid", + mutate: func(at *ActorTemplate) { + at.Spec.Volumes = []Volume{ + {Name: "vol1", VolumeSource: VolumeSource{HomeDir: &HomedirVolumeSource{}}}, + } + at.Spec.Containers[0].VolumeMounts = []VolumeMount{ + {Name: "vol1", MountPath: "/home\x00/user"}, + } + }, + wantErr: true, + errMsg: "MountPath for a HomeDir volume must be a clean absolute Unix path", + }, { + name: "Volumes: HomeDir MountPath with control character is invalid", + mutate: func(at *ActorTemplate) { + at.Spec.Volumes = []Volume{ + {Name: "vol1", VolumeSource: VolumeSource{HomeDir: &HomedirVolumeSource{}}}, + } + at.Spec.Containers[0].VolumeMounts = []VolumeMount{ + {Name: "vol1", MountPath: "/home\t/user"}, + } + }, + wantErr: true, + errMsg: "MountPath for a HomeDir volume must be a clean absolute Unix path", + }, { + name: "Volumes: HomeDir mount with invalid MountPath in second container is rejected", + mutate: func(at *ActorTemplate) { + at.Spec.Volumes = []Volume{ + {Name: "vol1", VolumeSource: VolumeSource{HomeDir: &HomedirVolumeSource{}}}, + {Name: "vol2", VolumeSource: VolumeSource{HomeDir: &HomedirVolumeSource{}}}, + } + at.Spec.Containers = append(at.Spec.Containers, Container{ + Name: "sidecar", + Image: "busybox@sha256:326e0e090a9a4057e62a1b94236e7a2df2f2f76722f67232e0e47854e4df9c53", + VolumeMounts: []VolumeMount{ + {Name: "vol2", MountPath: "home2"}, + }, + }) + at.Spec.Containers[0].VolumeMounts = []VolumeMount{ + {Name: "vol1", MountPath: "/home1"}, + } + }, + wantErr: true, + errMsg: "MountPath for a HomeDir volume must be a clean absolute Unix path", }} for _, tt := range tests { diff --git a/pkg/api/v1alpha1/zz_generated.deepcopy.go b/pkg/api/v1alpha1/zz_generated.deepcopy.go index 687008f6..483b4bb9 100644 --- a/pkg/api/v1alpha1/zz_generated.deepcopy.go +++ b/pkg/api/v1alpha1/zz_generated.deepcopy.go @@ -99,6 +99,13 @@ func (in *ActorTemplateSpec) DeepCopyInto(out *ActorTemplateSpec) { *out = new(v1.LabelSelector) (*in).DeepCopyInto(*out) } + if in.Volumes != nil { + in, out := &in.Volumes, &out.Volumes + *out = make([]Volume, len(*in)) + for i := range *in { + (*in)[i].DeepCopyInto(&(*out)[i]) + } + } } // DeepCopy is an autogenerated deepcopy function, copying the receiver, creating a new ActorTemplateSpec. @@ -187,7 +194,7 @@ func (in *ContainerReadyz) DeepCopyInto(out *ContainerReadyz) { if in.HTTPGet != nil { in, out := &in.HTTPGet, &out.HTTPGet *out = new(HTTPGetAction) - **out = **in + (*in).DeepCopyInto(*out) } } @@ -249,6 +256,11 @@ func (in *EnvVarSource) DeepCopy() *EnvVarSource { // DeepCopyInto is an autogenerated deepcopy function, copying the receiver, writing into out. in must be non-nil. func (in *HTTPGetAction) DeepCopyInto(out *HTTPGetAction) { *out = *in + if in.VolumeMounts != nil { + in, out := &in.VolumeMounts, &out.VolumeMounts + *out = make([]VolumeMount, len(*in)) + copy(*out, *in) + } } // DeepCopy is an autogenerated deepcopy function, copying the receiver, creating a new HTTPGetAction. @@ -261,6 +273,21 @@ func (in *HTTPGetAction) DeepCopy() *HTTPGetAction { return out } +// DeepCopyInto is an autogenerated deepcopy function, copying the receiver, writing into out. in must be non-nil. +func (in *HomedirVolumeSource) DeepCopyInto(out *HomedirVolumeSource) { + *out = *in +} + +// DeepCopy is an autogenerated deepcopy function, copying the receiver, creating a new HomedirVolumeSource. +func (in *HomedirVolumeSource) DeepCopy() *HomedirVolumeSource { + if in == nil { + return nil + } + out := new(HomedirVolumeSource) + in.DeepCopyInto(out) + return out +} + // DeepCopyInto is an autogenerated deepcopy function, copying the receiver, writing into out. in must be non-nil. func (in *SandboxConfig) DeepCopyInto(out *SandboxConfig) { *out = *in @@ -387,6 +414,57 @@ func (in *SnapshotsConfig) DeepCopy() *SnapshotsConfig { return out } +// DeepCopyInto is an autogenerated deepcopy function, copying the receiver, writing into out. in must be non-nil. +func (in *Volume) DeepCopyInto(out *Volume) { + *out = *in + in.VolumeSource.DeepCopyInto(&out.VolumeSource) +} + +// DeepCopy is an autogenerated deepcopy function, copying the receiver, creating a new Volume. +func (in *Volume) DeepCopy() *Volume { + if in == nil { + return nil + } + out := new(Volume) + in.DeepCopyInto(out) + return out +} + +// DeepCopyInto is an autogenerated deepcopy function, copying the receiver, writing into out. in must be non-nil. +func (in *VolumeMount) DeepCopyInto(out *VolumeMount) { + *out = *in +} + +// DeepCopy is an autogenerated deepcopy function, copying the receiver, creating a new VolumeMount. +func (in *VolumeMount) DeepCopy() *VolumeMount { + if in == nil { + return nil + } + out := new(VolumeMount) + in.DeepCopyInto(out) + return out +} + +// DeepCopyInto is an autogenerated deepcopy function, copying the receiver, writing into out. in must be non-nil. +func (in *VolumeSource) DeepCopyInto(out *VolumeSource) { + *out = *in + if in.HomeDir != nil { + in, out := &in.HomeDir, &out.HomeDir + *out = new(HomedirVolumeSource) + **out = **in + } +} + +// DeepCopy is an autogenerated deepcopy function, copying the receiver, creating a new VolumeSource. +func (in *VolumeSource) DeepCopy() *VolumeSource { + if in == nil { + return nil + } + out := new(VolumeSource) + in.DeepCopyInto(out) + return out +} + // DeepCopyInto is an autogenerated deepcopy function, copying the receiver, writing into out. in must be non-nil. func (in *WorkerPool) DeepCopyInto(out *WorkerPool) { *out = *in From 05035964a14a5654ae0ba2774879c1f1c6a39693 Mon Sep 17 00:00:00 2001 From: dberkov Date: Sun, 21 Jun 2026 07:22:18 -0700 Subject: [PATCH 02/17] implement suspend/resume with homedir support --- cmd/ateapi/internal/controlapi/converter.go | 32 ++ .../internal/controlapi/converter_test.go | 60 ++ .../internal/controlapi/workflow_pause.go | 38 +- .../internal/controlapi/workflow_resume.go | 3 + .../internal/controlapi/workflow_suspend.go | 38 +- .../internal/controlapi/workload_spec.go | 23 + .../internal/controlapi/workload_spec_test.go | 321 +++++++---- cmd/atelet/main.go | 300 ++++++++-- cmd/atelet/main_test.go | 6 + cmd/atelet/oci.go | 21 +- cmd/atelet/oci_test.go | 44 +- cmd/ateom-gvisor/main.go | 84 ++- cmd/ateom-gvisor/runsc.go | 59 +- demos/counter/counter.go | 30 +- demos/counter/counter.yaml.tmpl | 8 + internal/ateompath/ateompath.go | 21 + internal/proto/ateletpb/atelet.pb.go | 540 ++++++++++++++---- internal/proto/ateletpb/atelet.proto | 50 +- internal/proto/ateompb/ateom.pb.go | 190 ++++-- internal/proto/ateompb/ateom.proto | 20 +- .../ate-install/sandboxconfig-gvisor.yaml | 8 +- 21 files changed, 1497 insertions(+), 399 deletions(-) create mode 100644 cmd/ateapi/internal/controlapi/converter.go create mode 100644 cmd/ateapi/internal/controlapi/converter_test.go diff --git a/cmd/ateapi/internal/controlapi/converter.go b/cmd/ateapi/internal/controlapi/converter.go new file mode 100644 index 00000000..66679e7a --- /dev/null +++ b/cmd/ateapi/internal/controlapi/converter.go @@ -0,0 +1,32 @@ +// Copyright 2026 Google LLC +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +package controlapi + +import ( + "github.com/agent-substrate/substrate/internal/proto/ateletpb" + atev1alpha1 "github.com/agent-substrate/substrate/pkg/api/v1alpha1" +) + +// convert atev1alpha1.SnapshotScope to ateletpb.SnapshotScope +func toAteletSnapshotScope(in atev1alpha1.SnapshotScope) ateletpb.SnapshotScope { + switch in { + case atev1alpha1.SnapshotScopeProcess: + return ateletpb.SnapshotScope_SNAPSHOT_SCOPE_PROCESS + case atev1alpha1.SnapshotScopeHomedir: + return ateletpb.SnapshotScope_SNAPSHOT_SCOPE_HOMEDIR + default: + return ateletpb.SnapshotScope_SNAPSHOT_SCOPE_PROCESS + } +} diff --git a/cmd/ateapi/internal/controlapi/converter_test.go b/cmd/ateapi/internal/controlapi/converter_test.go new file mode 100644 index 00000000..89390028 --- /dev/null +++ b/cmd/ateapi/internal/controlapi/converter_test.go @@ -0,0 +1,60 @@ +// Copyright 2026 Google LLC +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +package controlapi + +import ( + "testing" + + "github.com/agent-substrate/substrate/internal/proto/ateletpb" + atev1alpha1 "github.com/agent-substrate/substrate/pkg/api/v1alpha1" +) + +func TestToAteletSnapshotScope(t *testing.T) { + tests := []struct { + name string + in atev1alpha1.SnapshotScope + expected ateletpb.SnapshotScope + }{ + { + name: "Process scope", + in: atev1alpha1.SnapshotScopeProcess, + expected: ateletpb.SnapshotScope_SNAPSHOT_SCOPE_PROCESS, + }, + { + name: "HomeDir scope", + in: atev1alpha1.SnapshotScopeHomedir, + expected: ateletpb.SnapshotScope_SNAPSHOT_SCOPE_HOMEDIR, + }, + { + name: "Default scope (empty)", + in: "", + expected: ateletpb.SnapshotScope_SNAPSHOT_SCOPE_PROCESS, + }, + { + name: "Default scope (unknown)", + in: "unknown", + expected: ateletpb.SnapshotScope_SNAPSHOT_SCOPE_PROCESS, + }, + } + + for _, tt := range tests { + t.Run(tt.name, func(t *testing.T) { + result := toAteletSnapshotScope(tt.in) + if result != tt.expected { + t.Errorf("toAteletSnapshotScope(%q) = %v, want %v", tt.in, result, tt.expected) + } + }) + } +} diff --git a/cmd/ateapi/internal/controlapi/workflow_pause.go b/cmd/ateapi/internal/controlapi/workflow_pause.go index 3678c625..56ad4771 100644 --- a/cmd/ateapi/internal/controlapi/workflow_pause.go +++ b/cmd/ateapi/internal/controlapi/workflow_pause.go @@ -28,6 +28,7 @@ import ( listersv1alpha1 "github.com/agent-substrate/substrate/pkg/client/listers/api/v1alpha1" "github.com/agent-substrate/substrate/pkg/proto/ateapipb" "k8s.io/apimachinery/pkg/util/wait" + "k8s.io/client-go/kubernetes" ) // PauseInput holds the immutable parameters requested by the client. @@ -91,7 +92,9 @@ func (s *MarkPausingStep) Execute(ctx context.Context, input *PauseInput, state func (s *MarkPausingStep) RetryBackoff() *wait.Backoff { return nil } type CallAteletPauseStep struct { - dialer *AteletDialer + dialer *AteletDialer + kubeClient kubernetes.Interface + secretCache *envSecretCache } func (s *CallAteletPauseStep) Name() string { return "CallAteletPause" } @@ -114,6 +117,11 @@ func (s *CallAteletPauseStep) Execute(ctx context.Context, input *PauseInput, st } client := ateletpb.NewAteomHerderClient(ateletConn) + workloadSpec, err := workloadSpecFromActorTemplate(ctx, s.kubeClient, s.secretCache, state.ActorTemplate) + if err != nil { + return err + } + // Checkpoint does not carry the sandbox config: atelet uses the version the // actor is currently running (recorded on-node at Run/Restore) and pins it // into the snapshot manifest. @@ -122,36 +130,16 @@ func (s *CallAteletPauseStep) Execute(ctx context.Context, input *PauseInput, st ActorTemplateNamespace: state.Actor.GetActorTemplateNamespace(), ActorTemplateName: state.Actor.GetActorTemplateName(), ActorId: state.Actor.GetActorId(), - Spec: &ateletpb.WorkloadSpec{ - PauseImage: state.ActorTemplate.Spec.PauseImage, - }, - Type: ateletpb.CheckpointType_CHECKPOINT_TYPE_LOCAL, + Spec: workloadSpec, + Type: ateletpb.CheckpointType_CHECKPOINT_TYPE_LOCAL, Config: &ateletpb.CheckpointRequest_LocalConfig{ LocalConfig: &ateletpb.LocalCheckpointConfiguration{ SnapshotPrefix: state.Actor.InProgressSnapshot, }, }, + Scope: toAteletSnapshotScope(state.ActorTemplate.Spec.SnapshotsConfig.OnPause), } - for _, ctr := range state.ActorTemplate.Spec.Containers { - ateletCtr := &ateletpb.Container{ - Name: ctr.Name, - Image: ctr.Image, - Command: ctr.Command, - Readyz: toAteletReadyz(ctr.Readyz), - } - for _, env := range ctr.Env { - var val string - if env.Value != nil { - val = *env.Value - } - ateletEnv := &ateletpb.EnvEntry{ - Name: env.Name, - Value: val, - } - ateletCtr.Env = append(ateletCtr.Env, ateletEnv) - } - req.Spec.Containers = append(req.Spec.Containers, ateletCtr) - } + _, err = client.Checkpoint(ctx, req) if err != nil { return fmt.Errorf("while checkpointing workload: %w", err) diff --git a/cmd/ateapi/internal/controlapi/workflow_resume.go b/cmd/ateapi/internal/controlapi/workflow_resume.go index ddc877f9..49ca8161 100644 --- a/cmd/ateapi/internal/controlapi/workflow_resume.go +++ b/cmd/ateapi/internal/controlapi/workflow_resume.go @@ -275,6 +275,7 @@ func (s *CallAteletRestoreStep) Execute(ctx context.Context, input *ResumeInput, SnapshotPrefix: state.Actor.GetLatestSnapshotInfo().GetLocal().SnapshotPrefix, }, } + req.Scope = toAteletSnapshotScope(state.ActorTemplate.Spec.SnapshotsConfig.OnPause) case ateapipb.SnapshotType_SNAPSHOT_TYPE_EXTERNAL: req.Type = ateletpb.CheckpointType_CHECKPOINT_TYPE_EXTERNAL req.Config = &ateletpb.RestoreRequest_ExternalConfig{ @@ -282,6 +283,7 @@ func (s *CallAteletRestoreStep) Execute(ctx context.Context, input *ResumeInput, SnapshotUriPrefix: state.Actor.GetLatestSnapshotInfo().GetExternal().SnapshotUriPrefix, }, } + req.Scope = toAteletSnapshotScope(state.ActorTemplate.Spec.SnapshotsConfig.OnCommit) default: return fmt.Errorf("unsupported snapshot type: %v", state.Actor.GetLatestSnapshotInfo().GetType()) } @@ -308,6 +310,7 @@ func (s *CallAteletRestoreStep) Execute(ctx context.Context, input *ResumeInput, SnapshotUriPrefix: snapshot, }, }, + Scope: toAteletSnapshotScope(state.ActorTemplate.Spec.SnapshotsConfig.OnCommit), } _, err = client.Restore(ctx, req) if err != nil { diff --git a/cmd/ateapi/internal/controlapi/workflow_suspend.go b/cmd/ateapi/internal/controlapi/workflow_suspend.go index 73d8e34d..51df1ade 100644 --- a/cmd/ateapi/internal/controlapi/workflow_suspend.go +++ b/cmd/ateapi/internal/controlapi/workflow_suspend.go @@ -29,6 +29,7 @@ import ( listersv1alpha1 "github.com/agent-substrate/substrate/pkg/client/listers/api/v1alpha1" "github.com/agent-substrate/substrate/pkg/proto/ateapipb" "k8s.io/apimachinery/pkg/util/wait" + "k8s.io/client-go/kubernetes" ) // SuspendInput holds the immutable parameters requested by the client. @@ -93,7 +94,9 @@ func (s *MarkSuspendingStep) Execute(ctx context.Context, input *SuspendInput, s func (s *MarkSuspendingStep) RetryBackoff() *wait.Backoff { return nil } type CallAteletSuspendStep struct { - dialer *AteletDialer + dialer *AteletDialer + kubeClient kubernetes.Interface + secretCache *envSecretCache } func (s *CallAteletSuspendStep) Name() string { return "CallAteletSuspend" } @@ -116,6 +119,11 @@ func (s *CallAteletSuspendStep) Execute(ctx context.Context, input *SuspendInput } client := ateletpb.NewAteomHerderClient(ateletConn) + workloadSpec, err := workloadSpecFromActorTemplate(ctx, s.kubeClient, s.secretCache, state.ActorTemplate) + if err != nil { + return err + } + // Checkpoint does not carry the sandbox config: atelet uses the version the // actor is currently running (recorded on-node at Run/Restore) and pins it // into the snapshot manifest. @@ -124,36 +132,16 @@ func (s *CallAteletSuspendStep) Execute(ctx context.Context, input *SuspendInput ActorTemplateNamespace: state.Actor.GetActorTemplateNamespace(), ActorTemplateName: state.Actor.GetActorTemplateName(), ActorId: state.Actor.GetActorId(), - Spec: &ateletpb.WorkloadSpec{ - PauseImage: state.ActorTemplate.Spec.PauseImage, - }, - Type: ateletpb.CheckpointType_CHECKPOINT_TYPE_EXTERNAL, + Spec: workloadSpec, + Type: ateletpb.CheckpointType_CHECKPOINT_TYPE_EXTERNAL, Config: &ateletpb.CheckpointRequest_ExternalConfig{ ExternalConfig: &ateletpb.ExternalCheckpointConfiguration{ SnapshotUriPrefix: state.Actor.GetInProgressSnapshot(), }, }, + Scope: toAteletSnapshotScope(state.ActorTemplate.Spec.SnapshotsConfig.OnCommit), } - for _, ctr := range state.ActorTemplate.Spec.Containers { - ateletCtr := &ateletpb.Container{ - Name: ctr.Name, - Image: ctr.Image, - Command: ctr.Command, - Readyz: toAteletReadyz(ctr.Readyz), - } - for _, env := range ctr.Env { - var val string - if env.Value != nil { - val = *env.Value - } - ateletEnv := &ateletpb.EnvEntry{ - Name: env.Name, - Value: val, - } - ateletCtr.Env = append(ateletCtr.Env, ateletEnv) - } - req.Spec.Containers = append(req.Spec.Containers, ateletCtr) - } + _, err = client.Checkpoint(ctx, req) if err != nil { return fmt.Errorf("while checkpointing workload: %w", err) diff --git a/cmd/ateapi/internal/controlapi/workload_spec.go b/cmd/ateapi/internal/controlapi/workload_spec.go index 6986a46b..5d936262 100644 --- a/cmd/ateapi/internal/controlapi/workload_spec.go +++ b/cmd/ateapi/internal/controlapi/workload_spec.go @@ -36,6 +36,22 @@ func workloadSpecFromActorTemplate(ctx context.Context, kubeClient kubernetes.In workloadSpec := &ateletpb.WorkloadSpec{ PauseImage: actorTemplate.Spec.PauseImage, } + + // add volumes + for _, vol := range actorTemplate.Spec.Volumes { + // volume is homedir type + if vol.VolumeSource.HomeDir != nil { + ateletVol := &ateletpb.Volume{ + Name: vol.Name, + Type: ateletpb.VolumeType_VOLUME_TYPE_HOMEDIR, + Source: &ateletpb.Volume_HomeDir{ + HomeDir: &ateletpb.HomedirVolume{}, + }, + } + workloadSpec.Volumes = append(workloadSpec.Volumes, ateletVol) + } + } + resolver := envResolver{ kubeClient: kubeClient, namespace: actorTemplate.Namespace, @@ -58,6 +74,13 @@ func workloadSpecFromActorTemplate(ctx context.Context, kubeClient kubernetes.In ateletCtr.Env = append(ateletCtr.Env, ateletEnv) } } + for _, mount := range ctr.VolumeMounts { + ateletCtr.VolumeMounts = append(ateletCtr.VolumeMounts, &ateletpb.VolumeMount{ + Name: mount.Name, + MountPath: mount.MountPath, + }) + } + workloadSpec.Containers = append(workloadSpec.Containers, ateletCtr) } diff --git a/cmd/ateapi/internal/controlapi/workload_spec_test.go b/cmd/ateapi/internal/controlapi/workload_spec_test.go index 595f8090..9713c937 100644 --- a/cmd/ateapi/internal/controlapi/workload_spec_test.go +++ b/cmd/ateapi/internal/controlapi/workload_spec_test.go @@ -26,72 +26,65 @@ import ( "google.golang.org/protobuf/testing/protocmp" corev1 "k8s.io/api/core/v1" metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" + "k8s.io/apimachinery/pkg/runtime" "k8s.io/client-go/kubernetes/fake" "k8s.io/utils/ptr" ) -func TestWorkloadSpecFromActorTemplateResolvesValueFromEnv(t *testing.T) { - ctx := context.Background() - kubeClient := fake.NewSimpleClientset( - &corev1.Secret{ - ObjectMeta: metav1.ObjectMeta{ - Name: "some-secret", - Namespace: "agent-ns", - }, - Data: map[string][]byte{ - "some-key": []byte("some-value"), +func TestWorkloadSpecFromActorTemplate(t *testing.T) { + tests := []struct { + name string + secrets []runtime.Object + template *atev1alpha1.ActorTemplate + want *ateletpb.WorkloadSpec + wantErrCode codes.Code + }{ + { + name: "resolves literal and secretKeyRef env", + secrets: []runtime.Object{ + &corev1.Secret{ + ObjectMeta: metav1.ObjectMeta{Name: "some-secret", Namespace: "agent-ns"}, + Data: map[string][]byte{"some-key": []byte("some-value")}, + }, }, - }, - ) - - got, err := workloadSpecFromActorTemplate(ctx, kubeClient, nil, &atev1alpha1.ActorTemplate{ - ObjectMeta: metav1.ObjectMeta{ - Name: "tmpl1", - Namespace: "agent-ns", - }, - Spec: atev1alpha1.ActorTemplateSpec{ - PauseImage: "pause", - Containers: []atev1alpha1.Container{ - { - Name: "main", - Image: "main", - Command: []string{"/main"}, - Env: []atev1alpha1.EnvVar{ - { - Name: "LITERAL", - Value: ptr.To("plain"), - }, + template: &atev1alpha1.ActorTemplate{ + ObjectMeta: metav1.ObjectMeta{Name: "tmpl1", Namespace: "agent-ns"}, + Spec: atev1alpha1.ActorTemplateSpec{ + PauseImage: "pause", + Containers: []atev1alpha1.Container{ { - Name: "SOME_KEY", - ValueFrom: &atev1alpha1.EnvVarSource{ - SecretKeyRef: &atev1alpha1.SecretKeySelector{ - Name: "some-secret", - Key: "some-key", + Name: "main", + Image: "main", + Command: []string{"/main"}, + Env: []atev1alpha1.EnvVar{ + {Name: "LITERAL", Value: ptr.To("plain")}, + { + Name: "SOME_KEY", + ValueFrom: &atev1alpha1.EnvVarSource{ + SecretKeyRef: &atev1alpha1.SecretKeySelector{Name: "some-secret", Key: "some-key"}, + }, }, }, }, }, }, }, - }, - }) - if err != nil { - t.Fatalf("workloadSpecFromActorTemplate failed: %v", err) - } - - want := &ateletpb.WorkloadSpec{ - PauseImage: "pause", - Containers: []*ateletpb.Container{ - { - Name: "main", - Image: "main", - Command: []string{"/main"}, - Env: []*ateletpb.EnvEntry{ - {Name: "LITERAL", Value: "plain"}, - {Name: "SOME_KEY", Value: "some-value"}, + want: &ateletpb.WorkloadSpec{ + PauseImage: "pause", + Containers: []*ateletpb.Container{ + { + Name: "main", + Image: "main", + Command: []string{"/main"}, + Env: []*ateletpb.EnvEntry{ + {Name: "LITERAL", Value: "plain"}, + {Name: "SOME_KEY", Value: "some-value"}, + }, + }, }, }, }, +<<<<<<< HEAD } if diff := cmp.Diff(want, got, protocmp.Transform()); diff != "" { t.Errorf("WorkloadSpec mismatch (-want +got):\n%s", diff) @@ -155,86 +148,198 @@ func TestWorkloadSpecFromActorTemplateOptionalSecretKeyRefSkipsMissingSecret(t * Name: "main", Image: "main", Env: []atev1alpha1.EnvVar{ +======= + { + name: "skips optional missing secret", + template: &atev1alpha1.ActorTemplate{ + ObjectMeta: metav1.ObjectMeta{Name: "tmpl1", Namespace: "agent-ns"}, + Spec: atev1alpha1.ActorTemplateSpec{ + Containers: []atev1alpha1.Container{ +>>>>>>> 3962a2c (implement suspend/resume with homedir support) { - Name: "OPTIONAL", - ValueFrom: &atev1alpha1.EnvVarSource{ - SecretKeyRef: &atev1alpha1.SecretKeySelector{ - Name: "missing", - Key: "key", - Optional: &optional, + Name: "main", + Image: "main", + Env: []atev1alpha1.EnvVar{ + { + Name: "OPTIONAL", + ValueFrom: &atev1alpha1.EnvVarSource{ + SecretKeyRef: &atev1alpha1.SecretKeySelector{Name: "missing", Key: "key", Optional: ptr.To(true)}, + }, }, }, }, }, }, }, + want: &ateletpb.WorkloadSpec{ + Containers: []*ateletpb.Container{{Name: "main", Image: "main"}}, + }, }, - }) - if err != nil { - t.Fatalf("workloadSpecFromActorTemplate failed: %v", err) - } - if len(got.GetContainers()) != 1 { - t.Fatalf("expected one container, got %d", len(got.GetContainers())) - } - if len(got.GetContainers()[0].GetEnv()) != 0 { - t.Fatalf("expected optional missing env to be skipped, got %v", got.GetContainers()[0].GetEnv()) - } -} - -func TestWorkloadSpecFromActorTemplateSecretKeyRefMissingSecretFails(t *testing.T) { - _, err := workloadSpecFromActorTemplate(context.Background(), fake.NewSimpleClientset(), nil, &atev1alpha1.ActorTemplate{ - ObjectMeta: metav1.ObjectMeta{ - Name: "tmpl1", - Namespace: "agent-ns", - }, - Spec: atev1alpha1.ActorTemplateSpec{ - Containers: []atev1alpha1.Container{ - { - Name: "main", - Image: "main", - Env: []atev1alpha1.EnvVar{ + { + name: "required missing secret fails", + template: &atev1alpha1.ActorTemplate{ + ObjectMeta: metav1.ObjectMeta{Name: "tmpl1", Namespace: "agent-ns"}, + Spec: atev1alpha1.ActorTemplateSpec{ + Containers: []atev1alpha1.Container{ { - Name: "REQUIRED", - ValueFrom: &atev1alpha1.EnvVarSource{ - SecretKeyRef: &atev1alpha1.SecretKeySelector{ - Name: "missing", - Key: "key", + Name: "main", + Image: "main", + Env: []atev1alpha1.EnvVar{ + { + Name: "REQUIRED", + ValueFrom: &atev1alpha1.EnvVarSource{ + SecretKeyRef: &atev1alpha1.SecretKeySelector{Name: "missing", Key: "key"}, + }, }, }, }, }, }, }, + wantErrCode: codes.FailedPrecondition, }, - }) - if status.Code(err) != codes.FailedPrecondition { - t.Fatalf("expected FailedPrecondition, got %v: %v", status.Code(err), err) - } -} - -func TestWorkloadSpecFromActorTemplateEmptyValueFromFails(t *testing.T) { - _, err := workloadSpecFromActorTemplate(context.Background(), fake.NewSimpleClientset(), nil, &atev1alpha1.ActorTemplate{ - ObjectMeta: metav1.ObjectMeta{ - Name: "tmpl1", - Namespace: "agent-ns", + { + name: "empty valueFrom fails", + template: &atev1alpha1.ActorTemplate{ + ObjectMeta: metav1.ObjectMeta{Name: "tmpl1", Namespace: "agent-ns"}, + Spec: atev1alpha1.ActorTemplateSpec{ + Containers: []atev1alpha1.Container{ + { + Name: "main", + Image: "main", + Env: []atev1alpha1.EnvVar{ + {Name: "EMPTY", ValueFrom: &atev1alpha1.EnvVarSource{}}, + }, + }, + }, + }, + }, + wantErrCode: codes.FailedPrecondition, }, - Spec: atev1alpha1.ActorTemplateSpec{ - Containers: []atev1alpha1.Container{ - { - Name: "main", - Image: "main", - Env: []atev1alpha1.EnvVar{ + { + name: "converts HomeDir volume and mounts", + template: &atev1alpha1.ActorTemplate{ + ObjectMeta: metav1.ObjectMeta{Name: "tmpl1", Namespace: "agent-ns"}, + Spec: atev1alpha1.ActorTemplateSpec{ + PauseImage: "pause", + Volumes: []atev1alpha1.Volume{ + {Name: "home", VolumeSource: atev1alpha1.VolumeSource{HomeDir: &atev1alpha1.HomedirVolumeSource{}}}, + }, + Containers: []atev1alpha1.Container{ { - Name: "EMPTY", - ValueFrom: &atev1alpha1.EnvVarSource{}, + Name: "main", + Image: "main", + VolumeMounts: []atev1alpha1.VolumeMount{ + {Name: "home", MountPath: "/home/user"}, + {Name: "home", MountPath: "/workspace"}, + }, + }, + }, + }, + }, + want: &ateletpb.WorkloadSpec{ + PauseImage: "pause", + Volumes: []*ateletpb.Volume{ + { + Name: "home", + Type: ateletpb.VolumeType_VOLUME_TYPE_HOMEDIR, + Source: &ateletpb.Volume_HomeDir{HomeDir: &ateletpb.HomedirVolume{}}, + }, + }, + Containers: []*ateletpb.Container{ + { + Name: "main", + Image: "main", + VolumeMounts: []*ateletpb.VolumeMount{ + {Name: "home", MountPath: "/home/user"}, + {Name: "home", MountPath: "/workspace"}, }, }, }, }, }, - }) - if status.Code(err) != codes.FailedPrecondition { - t.Fatalf("expected FailedPrecondition, got %v: %v", status.Code(err), err) + { + name: "skips non-HomeDir volumes", + template: &atev1alpha1.ActorTemplate{ + ObjectMeta: metav1.ObjectMeta{Name: "tmpl1", Namespace: "agent-ns"}, + Spec: atev1alpha1.ActorTemplateSpec{ + Volumes: []atev1alpha1.Volume{ + {Name: "unsupported", VolumeSource: atev1alpha1.VolumeSource{}}, + {Name: "home", VolumeSource: atev1alpha1.VolumeSource{HomeDir: &atev1alpha1.HomedirVolumeSource{}}}, + }, + Containers: []atev1alpha1.Container{ + { + Name: "main", + Image: "main", + VolumeMounts: []atev1alpha1.VolumeMount{ + {Name: "home", MountPath: "/workspace"}, + }, + }, + }, + }, + }, + want: &ateletpb.WorkloadSpec{ + Volumes: []*ateletpb.Volume{ + { + Name: "home", + Type: ateletpb.VolumeType_VOLUME_TYPE_HOMEDIR, + Source: &ateletpb.Volume_HomeDir{HomeDir: &ateletpb.HomedirVolume{}}, + }, + }, + Containers: []*ateletpb.Container{ + { + Name: "main", + Image: "main", + VolumeMounts: []*ateletpb.VolumeMount{ + {Name: "home", MountPath: "/workspace"}, + }, + }, + }, + }, + }, + { + name: "container without volume mounts has none", + template: &atev1alpha1.ActorTemplate{ + ObjectMeta: metav1.ObjectMeta{Name: "tmpl1", Namespace: "agent-ns"}, + Spec: atev1alpha1.ActorTemplateSpec{ + Volumes: []atev1alpha1.Volume{ + {Name: "home", VolumeSource: atev1alpha1.VolumeSource{HomeDir: &atev1alpha1.HomedirVolumeSource{}}}, + }, + Containers: []atev1alpha1.Container{ + {Name: "main", Image: "main"}, + }, + }, + }, + want: &ateletpb.WorkloadSpec{ + Volumes: []*ateletpb.Volume{ + { + Name: "home", + Type: ateletpb.VolumeType_VOLUME_TYPE_HOMEDIR, + Source: &ateletpb.Volume_HomeDir{HomeDir: &ateletpb.HomedirVolume{}}, + }, + }, + Containers: []*ateletpb.Container{{Name: "main", Image: "main"}}, + }, + }, + } + + for _, tt := range tests { + t.Run(tt.name, func(t *testing.T) { + kubeClient := fake.NewSimpleClientset(tt.secrets...) + got, err := workloadSpecFromActorTemplate(context.Background(), kubeClient, nil, tt.template) + if tt.wantErrCode != codes.OK { + if status.Code(err) != tt.wantErrCode { + t.Fatalf("error code = %v, want %v: %v", status.Code(err), tt.wantErrCode, err) + } + return + } + if err != nil { + t.Fatalf("workloadSpecFromActorTemplate failed: %v", err) + } + if diff := cmp.Diff(tt.want, got, protocmp.Transform()); diff != "" { + t.Errorf("WorkloadSpec mismatch (-want +got):\n%s", diff) + } + }) } } diff --git a/cmd/atelet/main.go b/cmd/atelet/main.go index 9a8f88cd..14154b2f 100644 --- a/cmd/atelet/main.go +++ b/cmd/atelet/main.go @@ -338,6 +338,7 @@ func (s *AteomHerder) Checkpoint(ctx context.Context, req *ateletpb.CheckpointRe RunscPath: runscPathFor(assetPaths), RuntimeAssetPaths: assetPaths, Spec: buildAteomWorkloadSpec(req.GetSpec()), + Scope: toAteomSnapshotScope(req.GetScope()), }) if err != nil { return nil, fmt.Errorf("while calling ateom.CheckpointWorkload: %w", err) @@ -349,11 +350,11 @@ func (s *AteomHerder) Checkpoint(ctx context.Context, req *ateletpb.CheckpointRe switch req.GetType() { case ateletpb.CheckpointType_CHECKPOINT_TYPE_EXTERNAL: - if err := s.uploadExternalCheckpoint(ctx, req, checkpointDir, sandboxRec); err != nil { + if err := s.uploadExternalCheckpoint(ctx, req, checkpointDir, sandboxRec, req.GetScope() == ateletpb.SnapshotScope_SNAPSHOT_SCOPE_PROCESS, hasHomedirVolumeMount(req.GetSpec())); err != nil { return nil, err } case ateletpb.CheckpointType_CHECKPOINT_TYPE_LOCAL: - if err := s.moveLocalCheckpoint(ctx, req, checkpointDir, sandboxRec); err != nil { + if err := s.moveLocalCheckpoint(ctx, req, checkpointDir, sandboxRec, req.GetScope() == ateletpb.SnapshotScope_SNAPSHOT_SCOPE_PROCESS, hasHomedirVolumeMount(req.GetSpec())); err != nil { return nil, err } default: @@ -367,7 +368,35 @@ func (s *AteomHerder) Checkpoint(ctx context.Context, req *ateletpb.CheckpointRe return &ateletpb.CheckpointResponse{}, nil } -func (s *AteomHerder) moveLocalCheckpoint(ctx context.Context, req *ateletpb.CheckpointRequest, checkpointDir string, rec *sandboxAssetsRecord) error { +// returns true if at least one of the containers in the workload spec has a homedir volume mount +func hasHomedirVolumeMount(spec *ateletpb.WorkloadSpec) bool { + hdv := make(map[string]bool) + for _, v := range spec.GetVolumes() { + if v.GetType() == ateletpb.VolumeType_VOLUME_TYPE_HOMEDIR { + hdv[v.GetName()] = true + } + } + for _, ctr := range spec.GetContainers() { + for _, vm := range ctr.GetVolumeMounts() { + if hdv[vm.GetName()] { + return true + } + } + } + return false +} + +func toAteomSnapshotScope(scope ateletpb.SnapshotScope) ateompb.SnapshotScope { + // assumption the request already been valdated and scope is in the valid values set + switch scope { + case ateletpb.SnapshotScope_SNAPSHOT_SCOPE_HOMEDIR: + return ateompb.SnapshotScope_SNAPSHOT_SCOPE_HOMEDIR + default: + return ateompb.SnapshotScope_SNAPSHOT_SCOPE_PROCESS + } +} + +func (s *AteomHerder) moveLocalCheckpoint(ctx context.Context, req *ateletpb.CheckpointRequest, checkpointDir string, rec *sandboxAssetsRecord, moveProcessFiles bool, moveHomedirFiles bool) error { localCheckpointPath := filepath.Join(ateompath.LocalCheckpointsDir(req.GetActorTemplateNamespace(), req.GetActorTemplateName(), req.GetActorId()), req.GetLocalConfig().GetSnapshotPrefix()) if err := os.MkdirAll(localCheckpointPath, 0o700); err != nil { return fmt.Errorf("while creating local checkpoint directory: %w", err) @@ -375,14 +404,32 @@ func (s *AteomHerder) moveLocalCheckpoint(ctx context.Context, req *ateletpb.Che ns, tmpl := req.GetActorTemplateNamespace(), req.GetActorTemplateName() - // Move exactly the files ateom reported. - for _, fileName := range rec.SnapshotFiles { - src := filepath.Join(checkpointDir, fileName) - dst := filepath.Join(localCheckpointPath, fileName) - recordSnapshotSize(ctx, strings.TrimSuffix(fileName, ".img"), src, ns, tmpl) + if moveProcessFiles { + // Move exactly the files ateom reported. + for _, fileName := range rec.SnapshotFiles { + src := filepath.Join(checkpointDir, fileName) + dst := filepath.Join(localCheckpointPath, fileName) + recordSnapshotSize(ctx, strings.TrimSuffix(fileName, ".img"), src, ns, tmpl) - if err := os.Rename(src, dst); err != nil { - return fmt.Errorf("failed to move %s to %s: %w", src, dst, err) + if err := os.Rename(src, dst); err != nil { + return fmt.Errorf("failed to move %s to %s: %w", src, dst, err) + } + } + } + + if moveHomedirFiles { + // move homedir files + homedirCheckpointDir := filepath.Join(checkpointDir, ateompath.HomedirSnapshotsSubfoldderName) + homeDirLocaclCheckpointPath := filepath.Join(localCheckpointPath, ateompath.HomedirSnapshotsSubfoldderName) + if err := os.MkdirAll(homeDirLocaclCheckpointPath, 0o700); err != nil { + return fmt.Errorf("while creating local checkpoint directory: %w", err) + } + for _, fileName := range []string{"fscheckpoint.json", "multitar.img", "pages.img", "pages_meta.img"} { + src := filepath.Join(homedirCheckpointDir, fileName) + dst := filepath.Join(homeDirLocaclCheckpointPath, fileName) + if err := os.Rename(src, dst); err != nil { + return fmt.Errorf("failed to move %s to %s: %w", src, dst, err) + } } } @@ -399,25 +446,45 @@ func (s *AteomHerder) moveLocalCheckpoint(ctx context.Context, req *ateletpb.Che return nil } -func (s *AteomHerder) uploadExternalCheckpoint(ctx context.Context, req *ateletpb.CheckpointRequest, checkpointDir string, rec *sandboxAssetsRecord) error { +func (s *AteomHerder) uploadExternalCheckpoint(ctx context.Context, req *ateletpb.CheckpointRequest, checkpointDir string, rec *sandboxAssetsRecord, uploadProcessFiles bool, uploadHomedirFiles bool) error { ns, tmpl := req.GetActorTemplateNamespace(), req.GetActorTemplateName() prefix := strings.TrimSuffix(req.GetExternalConfig().GetSnapshotUriPrefix(), "/") // Upload exactly the files ateom reported (each zstd-compressed). g, gCtx := errgroup.WithContext(ctx) - for _, fileName := range rec.SnapshotFiles { - fileName := fileName - local := filepath.Join(checkpointDir, fileName) - recordSnapshotSize(ctx, strings.TrimSuffix(fileName, ".img"), local, ns, tmpl) - g.Go(func() error { - if err := ategcs.SendLocalFileToGCSWithZstd(gCtx, s.gcsClient, prefix+"/"+fileName+".zstd", local); err != nil { - return fmt.Errorf("while uploading %s to GCS: %w", fileName, err) - } - return nil - }) + + if uploadProcessFiles { + + for _, fileName := range rec.SnapshotFiles { + fileName := fileName + local := filepath.Join(checkpointDir, fileName) + recordSnapshotSize(ctx, strings.TrimSuffix(fileName, ".img"), local, ns, tmpl) + g.Go(func() error { + if err := ategcs.SendLocalFileToGCSWithZstd(gCtx, s.gcsClient, prefix+"/"+fileName+".zstd", local); err != nil { + return fmt.Errorf("while uploading %s to GCS: %w", fileName, err) + } + return nil + }) + } + if err := g.Wait(); err != nil { + return err + } } - if err := g.Wait(); err != nil { - return err + + if uploadHomedirFiles { + for _, fileName := range []string{"fscheckpoint.json", "multitar.img", "pages.img", "pages_meta.img"} { + impPath := filepath.Join(checkpointDir, ateompath.HomedirSnapshotsSubfoldderName, fileName) + + before, _, _ := strings.Cut(fileName, ".") + recordSnapshotSize(ctx, before, impPath, ns, tmpl) + + if err := uploadIfExists(ctx, s.gcsClient, + fmt.Sprintf("%s/homedir/%s.zstd", prefix, fileName), + impPath, + ); err != nil { + return err + } + } } // Pin the sandbox binaries + snapshot file list into a manifest beside the @@ -465,7 +532,11 @@ func (s *AteomHerder) Restore(ctx context.Context, req *ateletpb.RestoreRequest) if err != nil { return nil, fmt.Errorf("while fetching snapshot manifest: %w", err) } - if sandboxRec, err = unmarshalSandboxRecord(manifest); err != nil { + sandboxRec, err = unmarshalSandboxRecord(manifest) + if err != nil { + return nil, err + } + if err := s.downloadExternalCheckpoint(ctx, prefix, checkpointDir, req.GetScope()); err != nil { return nil, err } case ateletpb.CheckpointType_CHECKPOINT_TYPE_LOCAL: @@ -475,7 +546,11 @@ func (s *AteomHerder) Restore(ctx context.Context, req *ateletpb.RestoreRequest) if err != nil { return nil, fmt.Errorf("while reading local snapshot manifest: %w", err) } - if sandboxRec, err = unmarshalSandboxRecord(manifest); err != nil { + sandboxRec, err = unmarshalSandboxRecord(manifest) + if err != nil { + return nil, err + } + if err := s.copyLocalCheckpoint(ctx, snapshotPrefix, localCheckpointDir, checkpointDir, req.GetScope()); err != nil { return nil, err } default: @@ -537,6 +612,7 @@ func (s *AteomHerder) Restore(ctx context.Context, req *ateletpb.RestoreRequest) RunscPath: runscPathFor(assetPaths), RuntimeAssetPaths: assetPaths, Spec: buildAteomWorkloadSpec(req.GetSpec()), + Scope: toAteomSnapshotScope(req.GetScope()), }); err != nil { return nil, fmt.Errorf("while calling ateom.RestoreWorkload: %w", err) } @@ -556,18 +632,36 @@ func (s *AteomHerder) Restore(ctx context.Context, req *ateletpb.RestoreRequest) return &ateletpb.RestoreResponse{}, nil } -func (s *AteomHerder) copyLocalCheckpoint(ctx context.Context, snapshotPrefix string, srcDir, dstDir string, files []string) error { - for _, fileName := range files { - if ctx.Err() != nil { - return fmt.Errorf("context cancelled: %w", ctx.Err()) +func (s *AteomHerder) copyLocalCheckpoint(ctx context.Context, snapshotPrefix string, srcDir, dstDir string, files []string, scope ateletpb.SnapshotScope) error { + switch scope { + case ateletpb.SnapshotScope_SNAPSHOT_SCOPE_PROCESS: + for _, fileName := range files { + if ctx.Err() != nil { + return fmt.Errorf("context cancelled: %w", ctx.Err()) + } + src := filepath.Join(srcDir, snapshotPrefix, fileName) + dst := filepath.Join(dstDir, fileName) + if _, err := copyFile(src, dst); err != nil { + return fmt.Errorf("failed to copy %s to %s: %w", src, dst, err) + } } - src := filepath.Join(srcDir, snapshotPrefix, fileName) - dst := filepath.Join(dstDir, fileName) - if _, err := copyFile(src, dst); err != nil { - return fmt.Errorf("failed to copy %s to %s: %w", src, dst, err) + case ateletpb.SnapshotScope_SNAPSHOT_SCOPE_HOMEDIR: + hdDstDir := filepath.Join(dstDir, ateompath.HomedirSnapshotsSubfoldderName) + if err := os.MkdirAll(hdDstDir, 0o700); err != nil { + return fmt.Errorf("while creating homedir directory: %w", err) } - } + for _, fileName := range []string{"fscheckpoint.json", "multitar.img", "pages.img", "pages_meta.img"} { + if ctx.Err() != nil { + return fmt.Errorf("context cancelled: %w", ctx.Err()) + } + src := filepath.Join(srcDir, snapshotPrefix, ateompath.HomedirSnapshotsSubfoldderName, fileName) + dst := filepath.Join(hdDstDir, fileName) + if _, err := copyFile(src, dst); err != nil { + return fmt.Errorf("failed to copy %s to %s: %w", src, dst, err) + } + } + } return nil } @@ -596,19 +690,39 @@ func copyFile(src, dst string) (int64, error) { return nBytes, err } -func (s *AteomHerder) downloadExternalCheckpoint(ctx context.Context, snapshotUriPrefix string, dstDir string, files []string) error { +func (s *AteomHerder) downloadExternalCheckpoint(ctx context.Context, snapshotUriPrefix string, dstDir string, files []string, scope ateletpb.SnapshotScope) error { prefix := strings.TrimSuffix(snapshotUriPrefix, "/") g, gCtx := errgroup.WithContext(ctx) - for _, fileName := range files { - fileName := fileName - local := filepath.Join(dstDir, fileName) - g.Go(func() error { - if err := ategcs.FetchLocalFileFromGCSWithZstd(gCtx, s.gcsClient, prefix+"/"+fileName+".zstd", local); err != nil { - return fmt.Errorf("while downloading %s from GCS: %w", fileName, err) - } - return nil - }) + + switch scope { + case ateletpb.SnapshotScope_SNAPSHOT_SCOPE_PROCESS: + g, gCtx := errgroup.WithContext(ctx) + for _, fileName := range files { + fileName := fileName + local := filepath.Join(dstDir, fileName) + g.Go(func() error { + if err := ategcs.FetchLocalFileFromGCSWithZstd(gCtx, s.gcsClient, prefix+"/"+fileName+".zstd", local); err != nil { + return fmt.Errorf("while downloading %s from GCS: %w", fileName, err) + } + return nil + }) + } + if err := g.Wait(); err != nil { + return err + } + case ateletpb.SnapshotScope_SNAPSHOT_SCOPE_HOMEDIR: + for _, fileName := range []string{"fscheckpoint.json", "multitar.img", "pages.img", "pages_meta.img"} { + remote := fmt.Sprintf("%s/%s/%s.zstd", prefix, ateompath.HomedirSnapshotsSubfoldderName, fileName) + g.Go(func() error { + local := filepath.Join(dstDir, ateompath.HomedirSnapshotsSubfoldderName, fileName) + if err := ategcs.FetchLocalFileFromGCSWithZstd(gCtx, s.gcsClient, remote, local); err != nil { + return fmt.Errorf("while downloading %s from GCS: %w", remote, err) + } + return nil + }) + } } + if err := g.Wait(); err != nil { return err } @@ -637,10 +751,36 @@ func (s *AteomHerder) prepareOCIBundles( return fmt.Errorf("while writing actor identity file: %w", err) } + hdVolumes := make(map[string]bool) + // make directories for all homedir volumes + for _, vol := range spec.GetVolumes() { + if vol.GetType() == ateletpb.VolumeType_VOLUME_TYPE_HOMEDIR { + hdVolumes[vol.GetName()] = true + volPath := ateompath.HomedirVolumeMountPoint(actorTemplateNamespace, actorTemplateName, actorID, vol.GetName()) + if err := os.MkdirAll(volPath, 0o700); err != nil { + return fmt.Errorf("while creating %q: %w", volPath, err) + } + } + } + g, gCtx := errgroup.WithContext(ctx) // Pause container. g.Go(func() error { + annotations := map[string]string{ + "io.kubernetes.cri.container-type": "sandbox", + "io.kubernetes.cri.container-name": "pause", + } + // add anotation for every homedir volume + // TODO(dberkov) needs to revist this logic once gVisor will support multiple homedir volumes. + for _, vol := range spec.GetVolumes() { + if vol.GetType() == ateletpb.VolumeType_VOLUME_TYPE_HOMEDIR { + annotations["dev.gvisor.spec.mount.homedir.type"] = "bind" + annotations["dev.gvisor.spec.mount.homedir.share"] = "container" + annotations["dev.gvisor.spec.mount.homedir.source"] = ateompath.HomedirVolumeMountPoint(actorTemplateNamespace, actorTemplateName, actorID, vol.GetName()) + } + } + if err := prepareOCIDirectory( gCtx, s.pullCache, @@ -649,12 +789,10 @@ func (s *AteomHerder) prepareOCIBundles( spec.GetPauseImage(), []string{"/pause"}, nil, - map[string]string{ - "io.kubernetes.cri.container-type": "sandbox", - "io.kubernetes.cri.container-name": "pause", - }, + annotations, netnsPath, "", // pause is sandbox infra; it gets no actor identity mount. + nil, ); err != nil { return fmt.Errorf("while creating pause OCI bundle: %w", err) } @@ -668,6 +806,12 @@ func (s *AteomHerder) prepareOCIBundles( for _, env := range ctr.GetEnv() { envs = append(envs, fmt.Sprintf("%s=%s", env.GetName(), env.GetValue())) } + var hdMounts []*ateletpb.VolumeMount + for _, vm := range ctr.GetVolumeMounts() { + if hdVolumes[vm.GetName()] { + hdMounts = append(hdMounts, vm) + } + } g.Go(func() error { if err := prepareOCIDirectory( gCtx, @@ -684,6 +828,7 @@ func (s *AteomHerder) prepareOCIBundles( }, netnsPath, identityDir, + hdMounts, ); err != nil { return fmt.Errorf("while creating %q OCI bundle: %w", ctr.GetName(), err) } @@ -709,9 +854,16 @@ func (s *AteomHerder) dialAteom(ctx context.Context, targetAteomUid string) (ate func buildAteomWorkloadSpec(spec *ateletpb.WorkloadSpec) *ateompb.WorkloadSpec { out := &ateompb.WorkloadSpec{} for _, ctr := range spec.GetContainers() { + var hdMountPaths []string + for _, vm := range ctr.GetVolumeMounts() { + if hdVolumes[vm.GetName()] { + hdMountPaths = append(hdMountPaths, vm.GetMountPath()) + } + } out.Containers = append(out.Containers, &ateompb.Container{ - Name: ctr.GetName(), - Readyz: toAteomReadyz(ctr.GetReadyz()), + Name: ctr.GetName(), + HomeDirVolumes: hdMountPaths, + Readyz: toAteomReadyz(ctr.GetReadyz()), }) } return out @@ -734,6 +886,19 @@ func toAteomReadyz(in *ateletpb.Readyz) *ateompb.Readyz { return out } +// uploadIfExists uploads a local file to GCS (zstd-compressed) only if +// the file is present. Missing files are silently skipped — used for +// optional checkpoint side-files (pages.img, pages_meta.img). +func uploadIfExists(ctx context.Context, gcs ategcs.ObjectStorage, remoteURI, localPath string) error { + if _, err := os.Stat(localPath); err != nil { + return nil + } + if err := ategcs.SendLocalFileToGCSWithZstd(ctx, gcs, remoteURI, localPath); err != nil { + return fmt.Errorf("while uploading %s to GCS: %w", filepath.Base(localPath), err) + } + return out +} + type AteomDialer struct { conns *lru.Cache } @@ -776,6 +941,11 @@ func validateCheckpointRequest(req *ateletpb.CheckpointRequest) error { if err := validateActorRequest(req.GetActorTemplateNamespace(), req.GetActorTemplateName(), req.GetActorId(), req.GetTargetAteomUid(), req.GetSpec()); err != nil { return err } + + if err := validateSnapshotScope(req.GetScope()); err != nil { + return err + } + switch req.GetType() { case ateletpb.CheckpointType_CHECKPOINT_TYPE_EXTERNAL: if err := resources.ValidateSnapshotURIPrefix(req.GetExternalConfig().GetSnapshotUriPrefix()); err != nil { @@ -795,6 +965,11 @@ func validateRestoreRequest(req *ateletpb.RestoreRequest) error { if err := validateActorRequest(req.GetActorTemplateNamespace(), req.GetActorTemplateName(), req.GetActorId(), req.GetTargetAteomUid(), req.GetSpec()); err != nil { return err } + + if err := validateSnapshotScope(req.GetScope()); err != nil { + return err + } + switch req.GetType() { case ateletpb.CheckpointType_CHECKPOINT_TYPE_EXTERNAL: if err := resources.ValidateSnapshotURIPrefix(req.GetExternalConfig().GetSnapshotUriPrefix()); err != nil { @@ -810,6 +985,18 @@ func validateRestoreRequest(req *ateletpb.RestoreRequest) error { return nil } +func validateSnapshotScope(scope ateletpb.SnapshotScope) error { + switch scope { + case ateletpb.SnapshotScope_SNAPSHOT_SCOPE_PROCESS, + ateletpb.SnapshotScope_SNAPSHOT_SCOPE_HOMEDIR: + return nil + case ateletpb.SnapshotScope_SNAPSHOT_SCOPE_UNSPECIFIED: + return fmt.Errorf("snapshot scope must be non-zero") + default: + return fmt.Errorf("invalid snapshot scope: %v", scope) + } +} + // validateActorRequest is the shared core for the fields common to all three // RPCs. func validateActorRequest(namespace, template, actorID, targetAteomUID string, spec *ateletpb.WorkloadSpec) error { @@ -908,6 +1095,11 @@ func resetActorDirs(actorTemplateNamespace, actorTemplateName, actorID string) e return fmt.Errorf("while creating restore-state dir: %w", err) } + restoreStateHomeDir := filepath.Join(restoreStateDir, ateompath.HomedirSnapshotsSubfoldderName) + if err := os.MkdirAll(restoreStateHomeDir, 0o700); err != nil { + return fmt.Errorf("while creating restore-state homedir dir: %w", err) + } + // World-readable (0o755): bind-mounted into the actor, whose workload // reads it through the gofer. identityDir := ateompath.ActorIdentityDirPath(actorTemplateNamespace, actorTemplateName, actorID) @@ -918,5 +1110,13 @@ func resetActorDirs(actorTemplateNamespace, actorTemplateName, actorID string) e return fmt.Errorf("while creating actor identity dir: %w", err) } + homedirVolumesMountDir := ateompath.HommedirVolumeMountsDir(actorTemplateNamespace, actorTemplateName, actorID) + if err := os.RemoveAll(homedirVolumesMountDir); err != nil { + return fmt.Errorf("while deleting homedir volumes mount dir: %w", err) + } + if err := os.MkdirAll(homedirVolumesMountDir, 0o755); err != nil { + return fmt.Errorf("while creating homedir volumes mount dir: %w", err) + } + return nil } diff --git a/cmd/atelet/main_test.go b/cmd/atelet/main_test.go index cf531601..18fbedf1 100644 --- a/cmd/atelet/main_test.go +++ b/cmd/atelet/main_test.go @@ -135,6 +135,7 @@ func validCheckpointRequest() *ateletpb.CheckpointRequest { SnapshotUriPrefix: "gs://bucket/actors/1/snapshots/2/", }, }, + Scope: ateletpb.SnapshotScope_SNAPSHOT_SCOPE_PROCESS, } } @@ -151,6 +152,7 @@ func validRestoreRequest() *ateletpb.RestoreRequest { SnapshotUriPrefix: "gs://bucket/actors/1/snapshots/2/", }, }, + Scope: ateletpb.SnapshotScope_SNAPSHOT_SCOPE_PROCESS, } } @@ -200,6 +202,8 @@ func TestValidateCheckpointRequest(t *testing.T) { r.Config = &ateletpb.CheckpointRequest_LocalConfig{LocalConfig: &ateletpb.LocalCheckpointConfiguration{SnapshotPrefix: ""}} }), true}, {"unspecified snapshot type", makeReq(func(r *ateletpb.CheckpointRequest) { r.Type = ateletpb.CheckpointType_CHECKPOINT_TYPE_UNSPECIFIED }), true}, + {"unspecified snapshot scope", makeReq(func(r *ateletpb.CheckpointRequest) { r.Scope = ateletpb.SnapshotScope_SNAPSHOT_SCOPE_UNSPECIFIED }), true}, + {"invalid snapshot scope", makeReq(func(r *ateletpb.CheckpointRequest) { r.Scope = ateletpb.SnapshotScope(23) }), true}, } for _, tc := range tests { t.Run(tc.name, func(t *testing.T) { @@ -233,6 +237,8 @@ func TestValidateRestoreRequest(t *testing.T) { r.Config = &ateletpb.RestoreRequest_LocalConfig{LocalConfig: &ateletpb.LocalCheckpointConfiguration{SnapshotPrefix: ""}} }), true}, {"unspecified snapshot type", makeReq(func(r *ateletpb.RestoreRequest) { r.Type = ateletpb.CheckpointType_CHECKPOINT_TYPE_UNSPECIFIED }), true}, + {"unspecified snapshot scope", makeReq(func(r *ateletpb.RestoreRequest) { r.Scope = ateletpb.SnapshotScope_SNAPSHOT_SCOPE_UNSPECIFIED }), true}, + {"invalid snapshot scope", makeReq(func(r *ateletpb.RestoreRequest) { r.Scope = ateletpb.SnapshotScope(23) }), true}, } for _, tc := range tests { t.Run(tc.name, func(t *testing.T) { diff --git a/cmd/atelet/oci.go b/cmd/atelet/oci.go index 4451f636..fb0fe8f1 100644 --- a/cmd/atelet/oci.go +++ b/cmd/atelet/oci.go @@ -32,6 +32,8 @@ import ( "github.com/opencontainers/runtime-spec/specs-go" "go.opentelemetry.io/otel" "go.opentelemetry.io/otel/attribute" + + "github.com/agent-substrate/substrate/internal/proto/ateletpb" ) const ( @@ -51,7 +53,7 @@ const ( ActorIDFileName = "actor-id" ) -func prepareOCIDirectory(ctx context.Context, pullCache *memorypullcache.MemoryPullCache, actorTemplateNamespace, actorTemplateName, actorID, containerName, ref string, args []string, env []string, annotations map[string]string, netns string, identityDir string) error { +func prepareOCIDirectory(ctx context.Context, pullCache *memorypullcache.MemoryPullCache, actorTemplateNamespace, actorTemplateName, actorID, containerName, ref string, args []string, env []string, annotations map[string]string, netns string, identityDir string, homedirVolumeMounts []*ateletpb.VolumeMount) error { tracer := otel.Tracer("prepareOCIDirectory") ctx, span := tracer.Start(ctx, "prepareOCIDirectory") @@ -88,7 +90,7 @@ func prepareOCIDirectory(ctx context.Context, pullCache *memorypullcache.MemoryP } } - ociSpec := buildActorOCISpec(args, env, annotations, netns, identityDir) + ociSpec := buildActorOCISpec(actorTemplateNamespace, actorTemplateName, actorID, args, env, annotations, netns, identityDir, homedirVolumeMounts) ociSpecBytes, err := json.MarshalIndent(ociSpec, "", " ") if err != nil { return fmt.Errorf("while marshaling OCI spec: %w", err) @@ -105,7 +107,7 @@ func prepareOCIDirectory(ctx context.Context, pullCache *memorypullcache.MemoryP // When identityDir is non-empty it adds a read-only bind mount of that host // directory at IdentityMountPath so the actor can read its own ID (see // IdentityMountPath for why this is a bind mount rather than env vars). -func buildActorOCISpec(args []string, env []string, annotations map[string]string, netns string, identityDir string) *specs.Spec { +func buildActorOCISpec(actorTemplateNamespace string, actorTemplateName string, actorID string, args []string, env []string, annotations map[string]string, netns string, identityDir string, homedirVolumeMounts []*ateletpb.VolumeMount) *specs.Spec { envVars := []string{ "PATH=/usr/local/sbin:/usr/local/bin:/usr/sbin:/usr/bin:/sbin:/bin", } @@ -149,7 +151,7 @@ func buildActorOCISpec(args []string, env []string, annotations map[string]strin }) } - return &specs.Spec{ + spec := &specs.Spec{ Process: &specs.Process{ User: specs.User{ UID: 0, @@ -217,6 +219,17 @@ func buildActorOCISpec(args []string, env []string, annotations map[string]strin }, Annotations: annotations, } + + // Prepare and mount homedirs. + for _, vm := range homedirVolumeMounts { + spec.Mounts = append(spec.Mounts, specs.Mount{ + Destination: vm.GetMountPath(), + Type: "bind", + Source: ateompath.HomedirVolumeMountPoint(actorTemplateNamespace, actorTemplateName, actorID, vm.GetName()), + }) + } + + return spec } // createMountPoint creates the directory mountPath (an absolute in-rootfs diff --git a/cmd/atelet/oci_test.go b/cmd/atelet/oci_test.go index 5b53f252..442336f5 100644 --- a/cmd/atelet/oci_test.go +++ b/cmd/atelet/oci_test.go @@ -23,6 +23,9 @@ import ( "slices" "strings" "testing" + + "github.com/agent-substrate/substrate/internal/ateompath" + "github.com/agent-substrate/substrate/internal/proto/ateletpb" ) type tarEntry struct { @@ -84,11 +87,13 @@ func runUntar(t *testing.T, entries []tarEntry) (string, error) { // With an identity dir, a read-only bind mount appears at IdentityMountPath. func TestBuildActorOCISpec_IdentityMount(t *testing.T) { spec := buildActorOCISpec( + "ns", "tmpl", "id", []string{"/app"}, []string{"FOO=bar"}, map[string]string{"k": "v"}, "/run/netns/x", "/host/actors/ns:tmpl:id/identity", + nil, ) found := false for _, m := range spec.Mounts { @@ -113,7 +118,7 @@ func TestBuildActorOCISpec_IdentityMount(t *testing.T) { // Without an identity dir (the pause container), no identity mount appears. func TestBuildActorOCISpec_NoIdentityMountForPause(t *testing.T) { - bare := buildActorOCISpec([]string{"/pause"}, nil, nil, "/run/netns/x", "") + bare := buildActorOCISpec("ns", "tmpl", "id", []string{"/pause"}, nil, nil, "/run/netns/x", "", nil) for _, m := range bare.Mounts { if m.Destination == IdentityMountPath { t.Errorf("identity mount must be absent when identityDir is empty") @@ -121,6 +126,43 @@ func TestBuildActorOCISpec_NoIdentityMountForPause(t *testing.T) { } } +// Each homedir volume mount becomes a bind mount whose source is the +// per-actor on-host HomeDirMountPoint for that volume name. +func TestBuildActorOCISpec_HomedirVolumeMounts(t *testing.T) { + const ns, tmpl, id = "ns", "tmpl", "id" + homedirs := []*ateletpb.VolumeMount{ + {Name: "data", MountPath: "/var/data"}, + {Name: "cache", MountPath: "/var/cache"}, + } + spec := buildActorOCISpec( + ns, tmpl, id, + []string{"/app"}, nil, nil, + "/run/netns/x", + "", + homedirs, + ) + + for _, vm := range homedirs { + wantSrc := ateompath.HomedirVolumeMountPoint(ns, tmpl, id, vm.Name) + found := false + for _, m := range spec.Mounts { + if m.Destination != vm.MountPath { + continue + } + found = true + if m.Source != wantSrc { + t.Errorf("homedir %q source = %q, want %q", vm.Name, m.Source, wantSrc) + } + if m.Type != "bind" { + t.Errorf("homedir %q type = %q, want bind", vm.Name, m.Type) + } + } + if !found { + t.Fatalf("homedir mount for %q missing; mounts=%v", vm.MountPath, spec.Mounts) + } + } +} + func TestCreateMountPoint(t *testing.T) { t.Run("creates target inside rootfs", func(t *testing.T) { root := t.TempDir() diff --git a/cmd/ateom-gvisor/main.go b/cmd/ateom-gvisor/main.go index c9b2bae9..23d6fae3 100644 --- a/cmd/ateom-gvisor/main.go +++ b/cmd/ateom-gvisor/main.go @@ -23,6 +23,7 @@ import ( "log/slog" "net" "os" + "path/filepath" "runtime" "sort" "sync" @@ -205,7 +206,7 @@ func (s *AteomService) RunWorkload(ctx context.Context, req *ateompb.RunWorkload } // Create and start pause container - if err := rcmd.cmdCreate(ctx, os.Stdout, "pause"); err != nil { + if err := rcmd.cmdCreate(ctx, os.Stdout, "pause", nil); err != nil { return nil, fmt.Errorf("while creating pause container: %w", err) } if err := rcmd.cmdStart(ctx, os.Stdout, "pause"); err != nil { @@ -220,7 +221,7 @@ func (s *AteomService) RunWorkload(ctx context.Context, req *ateompb.RunWorkload return nil, fmt.Errorf("while starting json log pipe for %q: %w", ac.GetName(), err) } defer pw.Close() - if err := rcmd.cmdCreate(ctx, pw, ac.GetName()); err != nil { + if err := rcmd.cmdCreate(ctx, pw, ac.GetName(), nil); err != nil { return nil, fmt.Errorf("while creating %q application container: %w", ac.GetName(), err) } if err := rcmd.cmdStart(ctx, pw, ac.GetName()); err != nil { @@ -261,9 +262,35 @@ func (s *AteomService) CheckpointWorkload(ctx context.Context, req *ateompb.Chec return nil, fmt.Errorf("while creating checkpoint directory: %w", err) } - // Checkpoint pause container (root of the sandbox) - if err := rcmd.cmdCheckpoint(ctx, "pause", checkpointPath); err != nil { - return nil, fmt.Errorf("while checkpointing pause: %w", err) + // Always take homedir snapshot if at least one container has homedir volume mount + // TODO(dberkov): this is a temporary workaround until gVisor will support taking homedir snapshots in a single request with process snapshot. + var hdv []string + for _, ctr := range req.GetSpec().GetContainers() { + hdv = append(hdv, ctr.GetHomeDirVolumes()...) + } + if len(hdv) > 0 { + // TODO(dberkov) add control for "resume=true" flag + // Checkpoint each homedir volume + + // prepare homedir checkpoint folder + fsCheckpointPath := filepath.Join(checkpointPath, ateompath.HomedirSnapshotsSubfoldderName) + if err := os.MkdirAll(fsCheckpointPath, 0o700); err != nil { + return nil, fmt.Errorf("while creating fscheckpoint directory: %w", err) + } + + // keep gVisor running if process snapshot is requested. + leaveRunning := req.GetScope() == ateompb.SnapshotScope_SNAPSHOT_SCOPE_PROCESS + if err := rcmd.cmdFsCheckpoint(ctx, "pause", fsCheckpointPath, hdv, leaveRunning); err != nil { + return nil, fmt.Errorf("while fscheckpointing homedir %q: %w", hdv[0], err) + } + } + + // take process snapshot if requested + if req.GetScope() == ateompb.SnapshotScope_SNAPSHOT_SCOPE_PROCESS { + // Checkpoint pause container (root of the sandbox) + if err := rcmd.cmdCheckpoint(ctx, "pause", checkpointPath); err != nil { + return nil, fmt.Errorf("while checkpointing pause: %w", err) + } } // After checkpointing the sandbox root, runsc may no longer have a usable @@ -365,12 +392,25 @@ func (s *AteomService) RestoreWorkload(ctx context.Context, req *ateompb.Restore checkpointDir := ateompath.RestoreStateDir(req.GetActorTemplateNamespace(), req.GetActorTemplateName(), req.GetActorId()) - // Create and restore pause container - if err := rcmd.cmdCreate(ctx, os.Stdout, "pause"); err != nil { - return nil, fmt.Errorf("while creating pause container: %w", err) - } - if err := rcmd.cmdRestore(ctx, os.Stdout, "pause", checkpointDir); err != nil { - return nil, fmt.Errorf("while starting pause container: %w", err) + switch req.GetScope() { + case ateompb.SnapshotScope_SNAPSHOT_SCOPE_HOMEDIR: + // Create and restore pause container + if err := rcmd.cmdCreate(ctx, os.Stdout, "pause", []string{"--fs-restore-image-path", filepath.Join(checkpointDir, ateompath.HomedirSnapshotsSubfoldderName)}); err != nil { + return nil, fmt.Errorf("while creating pause container: %w", err) + } + if err := rcmd.cmdStart(ctx, os.Stdout, "pause"); err != nil { + return nil, fmt.Errorf("while starting pause container: %w", err) + } + case ateompb.SnapshotScope_SNAPSHOT_SCOPE_PROCESS: + // Create and restore pause container + if err := rcmd.cmdCreate(ctx, os.Stdout, "pause", nil); err != nil { + return nil, fmt.Errorf("while creating pause container: %w", err) + } + if err := rcmd.cmdRestore(ctx, os.Stdout, "pause", checkpointDir); err != nil { + return nil, fmt.Errorf("while starting pause container: %w", err) + } + default: + return nil, fmt.Errorf("unexpected snapshot scope: %v", req.GetScope()) } // Create and restore each application container, each with its own log pipe so @@ -381,11 +421,23 @@ func (s *AteomService) RestoreWorkload(ctx context.Context, req *ateompb.Restore return nil, fmt.Errorf("while starting json log pipe for %q: %w", ac.GetName(), err) } defer pw.Close() - if err := rcmd.cmdCreate(ctx, pw, ac.GetName()); err != nil { - return nil, fmt.Errorf("while creating %q application container: %w", ac.GetName(), err) - } - if err := rcmd.cmdRestore(ctx, pw, ac.GetName(), checkpointDir); err != nil { - return nil, fmt.Errorf("while starting %q application container: %w", ac.GetName(), err) + switch req.GetScope() { + case ateompb.SnapshotScope_SNAPSHOT_SCOPE_HOMEDIR: + if err := rcmd.cmdCreate(ctx, pw, ac.GetName(), nil); err != nil { + return nil, fmt.Errorf("while creating %q application container: %w", ac.GetName(), err) + } + if err := rcmd.cmdStart(ctx, pw, ac.GetName()); err != nil { + return nil, fmt.Errorf("while starting %q application container: %w", ac.GetName(), err) + } + case ateompb.SnapshotScope_SNAPSHOT_SCOPE_PROCESS: + if err := rcmd.cmdCreate(ctx, pw, ac.GetName(), nil); err != nil { + return nil, fmt.Errorf("while creating %q application container: %w", ac.GetName(), err) + } + if err := rcmd.cmdRestore(ctx, pw, ac.GetName(), checkpointDir); err != nil { + return nil, fmt.Errorf("while starting %q application container: %w", ac.GetName(), err) + } + default: + return nil, fmt.Errorf("unexpected snapshot scope: %v", req.GetScope()) } } diff --git a/cmd/ateom-gvisor/runsc.go b/cmd/ateom-gvisor/runsc.go index 19ba017a..430e4e9b 100644 --- a/cmd/ateom-gvisor/runsc.go +++ b/cmd/ateom-gvisor/runsc.go @@ -34,19 +34,17 @@ type runsc struct { actorID string } -func (r *runsc) cmdCreate(ctx context.Context, out io.Writer, containerName string) error { +func (r *runsc) cmdCreate(ctx context.Context, out io.Writer, containerName string, additionalArgs []string) error { reapLock.RLock() defer reapLock.RUnlock() slog.InfoContext(ctx, "About to run runsc create", slog.String("container", containerName)) - cmd := exec.CommandContext( - ctx, - r.path, + args := []string{ "-log-format", "json", "--alsologtostderr", // "-debug", - // "-debug-log", ateompath.RunscDebugLogDir(r.actorTemplateNamespace, r.actorTemplateName, r.actorID, containerName)+"/", + // "-debug-log", ateompath.RunscDebugLogDir(r.actorTemplateNamespace, r.actorTemplateName, r.actorID, containerName) + "/", // "-debug-to-user-log", // "-log-packets", // "-strace", @@ -54,7 +52,14 @@ func (r *runsc) cmdCreate(ctx context.Context, out io.Writer, containerName stri "create", "-bundle", ateompath.OCIBundlePath(r.actorTemplateNamespace, r.actorTemplateName, r.actorID, containerName), "-pid-file", ateompath.PIDFilePath(r.actorTemplateNamespace, r.actorTemplateName, r.actorID, containerName), - containerName, // Name of the container + } + + args = append(args, additionalArgs...) + args = append(args, containerName) // Name of the container + cmd := exec.CommandContext( + ctx, + r.path, + args..., ) cmd.Stdout = out cmd.Stderr = out @@ -129,6 +134,48 @@ func (r *runsc) cmdCheckpoint(ctx context.Context, containerName, checkpointPath return nil } +func (r *runsc) cmdFsCheckpoint(ctx context.Context, containerName, checkpointPath string, homedirMounts []string, leaveRunning bool) error { + reapLock.RLock() + defer reapLock.RUnlock() + + slog.InfoContext(ctx, "About to run runsc fscheckpoint", slog.String("container", containerName)) + + args := []string{ + "-log-format", "json", + "--alsologtostderr", + // "-debug", + // "-debug-log", ateompath.RunscDebugLogDir(r.actorTemplateNamespace, r.actorTemplateName, r.actorID, containerName)+"/", + // "-debug-to-user-log", + // "-log-packets", + // "-strace", + "-root", ateompath.RunSCStateDir(r.actorTemplateNamespace, r.actorTemplateName, r.actorID), + "fscheckpoint", + "-image-path", checkpointPath, + } + for _, hdv := range homedirMounts { + args = append(args, "-path", hdv) + } + if leaveRunning { + args = append(args, "-leave-running") + } + + // name of the container must be the last paramter. + args = append(args, containerName) + + cmd := exec.CommandContext( + ctx, + r.path, + args..., + ) + cmd.Stdout = os.Stdout + cmd.Stderr = os.Stderr + err := cmd.Run() + if err != nil { + return fmt.Errorf("while running `runsc fscheckpoint`: %w", err) + } + return nil +} + // We take a checkpoint only of the root container of the sandbox, but we need // to call restore on each container, using the same checkpoint. func (r *runsc) cmdRestore(ctx context.Context, out io.Writer, containerName, checkpointPath string) error { diff --git a/demos/counter/counter.go b/demos/counter/counter.go index de85661c..69c77920 100644 --- a/demos/counter/counter.go +++ b/demos/counter/counter.go @@ -27,6 +27,8 @@ import ( "net" "net/http" "os" + "strconv" + "sync" "sync/atomic" "time" @@ -36,8 +38,29 @@ import ( var ( requestCount uint64 ready atomic.Bool + fileMutex sync.Mutex ) +const fileCounterPath = "/home/counter/a.txt" + +func incrementFileCounter() int { + fileMutex.Lock() + defer fileMutex.Unlock() + counter := 0 + data, err := os.ReadFile(fileCounterPath) + if err == nil { + if i, err := strconv.Atoi(string(data)); err == nil { + counter = i + } + } + counter++ + err = os.WriteFile(fileCounterPath, []byte(strconv.Itoa(counter)), 0644) + if err != nil { + return -1 + } + return counter +} + func main() { pflag.Parse() ctx := context.Background() @@ -47,10 +70,13 @@ func main() { defaultMux := http.NewServeMux() defaultMux.HandleFunc("/", func(w http.ResponseWriter, r *http.Request) { ctx := r.Context() - count := atomic.AddUint64(&requestCount, 1) + fileCounter := incrementFileCounter() + + memoryCounter := atomic.AddUint64(&requestCount, 1) currentIP := getCurrentIP() - response := fmt.Sprintf("hello from: %s | preserved memory count: %d\n", currentIP, count) + response := fmt.Sprintf("hello from: %s | preserved memory count: %d | preserved file counter: %d\n", currentIP, memoryCounter, fileCounter) slog.InfoContext(ctx, "Handled request", slog.String("response", response)) + w.WriteHeader(http.StatusOK) w.Write([]byte(response)) }) diff --git a/demos/counter/counter.yaml.tmpl b/demos/counter/counter.yaml.tmpl index 652245b0..aeba0846 100644 --- a/demos/counter/counter.yaml.tmpl +++ b/demos/counter/counter.yaml.tmpl @@ -47,8 +47,16 @@ spec: httpGet: path: /readyz port: 80 + volumeMounts: + - name: myhomedir + mountPath: /home/counter workerSelector: matchLabels: workload: counter snapshotsConfig: + onPause: process + onCommit: homedir location: gs://${BUCKET_NAME}/ate-demo-counter/ + volumes: + - name: myhomedir + homeDir: {} diff --git a/internal/ateompath/ateompath.go b/internal/ateompath/ateompath.go index a779755c..16c0153f 100644 --- a/internal/ateompath/ateompath.go +++ b/internal/ateompath/ateompath.go @@ -23,6 +23,10 @@ const ( // The base path. This is both the path of the root shared folder on the // host filesystem, and when it is mounted into ateom and atelet containers. BasePath = "/var/lib/ateom-gvisor" + + // Homedir snapshots are temporarily stored in subfolder relative to process checkpoint path. + // This is because gVisor missing capability to separete homedir content from rest of rootfs upon checkpointing. + HomedirSnapshotsSubfoldderName = "homedir" ) var ( @@ -136,6 +140,23 @@ func LocalCheckpointsDir(actorTemplateNamespace, actorTemplateName, actorID stri ) } +// HommedirVolumeMountsDir is the directory where individual home directory volumes +// are mounted. +func HommedirVolumeMountsDir(actorTemplateNamespace, actorTemplateName, actorID string) string { + return filepath.Join( + ActorPath(actorTemplateNamespace, actorTemplateName, actorID), + HomedirSnapshotsSubfoldderName, + ) +} + +// HomedirVolumeMountPoint returns the path where a specific home directory volume is mounted on the nodeVM. +func HomedirVolumeMountPoint(actorTemplateNamespace, actorTemplateName, actorID, volumeName string) string { + return filepath.Join( + HommedirVolumeMountsDir(actorTemplateNamespace, actorTemplateName, actorID), + volumeName, + ) +} + // RestoreStateDir is the local directory to use to restore an actor from a // checkpoint downloaded from GCS. // diff --git a/internal/proto/ateletpb/atelet.pb.go b/internal/proto/ateletpb/atelet.pb.go index 005a5444..af5e9210 100644 --- a/internal/proto/ateletpb/atelet.pb.go +++ b/internal/proto/ateletpb/atelet.pb.go @@ -35,6 +35,52 @@ const ( _ = protoimpl.EnforceVersion(protoimpl.MaxVersion - 20) ) +type VolumeType int32 + +const ( + VolumeType_VOLUME_TYPE_UNSPECIFIED VolumeType = 0 + VolumeType_VOLUME_TYPE_HOMEDIR VolumeType = 1 +) + +// Enum value maps for VolumeType. +var ( + VolumeType_name = map[int32]string{ + 0: "VOLUME_TYPE_UNSPECIFIED", + 1: "VOLUME_TYPE_HOMEDIR", + } + VolumeType_value = map[string]int32{ + "VOLUME_TYPE_UNSPECIFIED": 0, + "VOLUME_TYPE_HOMEDIR": 1, + } +) + +func (x VolumeType) Enum() *VolumeType { + p := new(VolumeType) + *p = x + return p +} + +func (x VolumeType) String() string { + return protoimpl.X.EnumStringOf(x.Descriptor(), protoreflect.EnumNumber(x)) +} + +func (VolumeType) Descriptor() protoreflect.EnumDescriptor { + return file_atelet_proto_enumTypes[0].Descriptor() +} + +func (VolumeType) Type() protoreflect.EnumType { + return &file_atelet_proto_enumTypes[0] +} + +func (x VolumeType) Number() protoreflect.EnumNumber { + return protoreflect.EnumNumber(x) +} + +// Deprecated: Use VolumeType.Descriptor instead. +func (VolumeType) EnumDescriptor() ([]byte, []int) { + return file_atelet_proto_rawDescGZIP(), []int{0} +} + type CheckpointType int32 const ( @@ -71,11 +117,11 @@ func (x CheckpointType) String() string { } func (CheckpointType) Descriptor() protoreflect.EnumDescriptor { - return file_atelet_proto_enumTypes[0].Descriptor() + return file_atelet_proto_enumTypes[1].Descriptor() } func (CheckpointType) Type() protoreflect.EnumType { - return &file_atelet_proto_enumTypes[0] + return &file_atelet_proto_enumTypes[1] } func (x CheckpointType) Number() protoreflect.EnumNumber { @@ -84,7 +130,59 @@ func (x CheckpointType) Number() protoreflect.EnumNumber { // Deprecated: Use CheckpointType.Descriptor instead. func (CheckpointType) EnumDescriptor() ([]byte, []int) { - return file_atelet_proto_rawDescGZIP(), []int{0} + return file_atelet_proto_rawDescGZIP(), []int{1} +} + +type SnapshotScope int32 + +const ( + // Not valid option; should never happen. + SnapshotScope_SNAPSHOT_SCOPE_UNSPECIFIED SnapshotScope = 0 + // Snapshot memory and the full rootfs (including homedir content). + SnapshotScope_SNAPSHOT_SCOPE_PROCESS SnapshotScope = 1 + // Snapshot only the homedir; memory and the rest of rootfs are excluded. + SnapshotScope_SNAPSHOT_SCOPE_HOMEDIR SnapshotScope = 2 +) + +// Enum value maps for SnapshotScope. +var ( + SnapshotScope_name = map[int32]string{ + 0: "SNAPSHOT_SCOPE_UNSPECIFIED", + 1: "SNAPSHOT_SCOPE_PROCESS", + 2: "SNAPSHOT_SCOPE_HOMEDIR", + } + SnapshotScope_value = map[string]int32{ + "SNAPSHOT_SCOPE_UNSPECIFIED": 0, + "SNAPSHOT_SCOPE_PROCESS": 1, + "SNAPSHOT_SCOPE_HOMEDIR": 2, + } +) + +func (x SnapshotScope) Enum() *SnapshotScope { + p := new(SnapshotScope) + *p = x + return p +} + +func (x SnapshotScope) String() string { + return protoimpl.X.EnumStringOf(x.Descriptor(), protoreflect.EnumNumber(x)) +} + +func (SnapshotScope) Descriptor() protoreflect.EnumDescriptor { + return file_atelet_proto_enumTypes[2].Descriptor() +} + +func (SnapshotScope) Type() protoreflect.EnumType { + return &file_atelet_proto_enumTypes[2] +} + +func (x SnapshotScope) Number() protoreflect.EnumNumber { + return protoreflect.EnumNumber(x) +} + +// Deprecated: Use SnapshotScope.Descriptor instead. +func (SnapshotScope) EnumDescriptor() ([]byte, []int) { + return file_atelet_proto_rawDescGZIP(), []int{2} } type RunRequest struct { @@ -337,6 +435,7 @@ type WorkloadSpec struct { state protoimpl.MessageState `protogen:"open.v1"` Containers []*Container `protobuf:"bytes,1,rep,name=containers,proto3" json:"containers,omitempty"` PauseImage string `protobuf:"bytes,2,opt,name=pause_image,json=pauseImage,proto3" json:"pause_image,omitempty"` + Volumes []*Volume `protobuf:"bytes,3,rep,name=volumes,proto3" json:"volumes,omitempty"` unknownFields protoimpl.UnknownFields sizeCache protoimpl.SizeCache } @@ -385,6 +484,183 @@ func (x *WorkloadSpec) GetPauseImage() string { return "" } +func (x *WorkloadSpec) GetVolumes() []*Volume { + if x != nil { + return x.Volumes + } + return nil +} + +type HomedirVolume struct { + state protoimpl.MessageState `protogen:"open.v1"` + unknownFields protoimpl.UnknownFields + sizeCache protoimpl.SizeCache +} + +func (x *HomedirVolume) Reset() { + *x = HomedirVolume{} + mi := &file_atelet_proto_msgTypes[5] + ms := protoimpl.X.MessageStateOf(protoimpl.Pointer(x)) + ms.StoreMessageInfo(mi) +} + +func (x *HomedirVolume) String() string { + return protoimpl.X.MessageStringOf(x) +} + +func (*HomedirVolume) ProtoMessage() {} + +func (x *HomedirVolume) ProtoReflect() protoreflect.Message { + mi := &file_atelet_proto_msgTypes[5] + if x != nil { + ms := protoimpl.X.MessageStateOf(protoimpl.Pointer(x)) + if ms.LoadMessageInfo() == nil { + ms.StoreMessageInfo(mi) + } + return ms + } + return mi.MessageOf(x) +} + +// Deprecated: Use HomedirVolume.ProtoReflect.Descriptor instead. +func (*HomedirVolume) Descriptor() ([]byte, []int) { + return file_atelet_proto_rawDescGZIP(), []int{5} +} + +type Volume struct { + state protoimpl.MessageState `protogen:"open.v1"` + Name string `protobuf:"bytes,1,opt,name=name,proto3" json:"name,omitempty"` + Type VolumeType `protobuf:"varint,2,opt,name=type,proto3,enum=atelet.VolumeType" json:"type,omitempty"` + // Types that are valid to be assigned to Source: + // + // *Volume_HomeDir + Source isVolume_Source `protobuf_oneof:"source"` + unknownFields protoimpl.UnknownFields + sizeCache protoimpl.SizeCache +} + +func (x *Volume) Reset() { + *x = Volume{} + mi := &file_atelet_proto_msgTypes[6] + ms := protoimpl.X.MessageStateOf(protoimpl.Pointer(x)) + ms.StoreMessageInfo(mi) +} + +func (x *Volume) String() string { + return protoimpl.X.MessageStringOf(x) +} + +func (*Volume) ProtoMessage() {} + +func (x *Volume) ProtoReflect() protoreflect.Message { + mi := &file_atelet_proto_msgTypes[6] + if x != nil { + ms := protoimpl.X.MessageStateOf(protoimpl.Pointer(x)) + if ms.LoadMessageInfo() == nil { + ms.StoreMessageInfo(mi) + } + return ms + } + return mi.MessageOf(x) +} + +// Deprecated: Use Volume.ProtoReflect.Descriptor instead. +func (*Volume) Descriptor() ([]byte, []int) { + return file_atelet_proto_rawDescGZIP(), []int{6} +} + +func (x *Volume) GetName() string { + if x != nil { + return x.Name + } + return "" +} + +func (x *Volume) GetType() VolumeType { + if x != nil { + return x.Type + } + return VolumeType_VOLUME_TYPE_UNSPECIFIED +} + +func (x *Volume) GetSource() isVolume_Source { + if x != nil { + return x.Source + } + return nil +} + +func (x *Volume) GetHomeDir() *HomedirVolume { + if x != nil { + if x, ok := x.Source.(*Volume_HomeDir); ok { + return x.HomeDir + } + } + return nil +} + +type isVolume_Source interface { + isVolume_Source() +} + +type Volume_HomeDir struct { + HomeDir *HomedirVolume `protobuf:"bytes,3,opt,name=home_dir,json=homeDir,proto3,oneof"` +} + +func (*Volume_HomeDir) isVolume_Source() {} + +type VolumeMount struct { + state protoimpl.MessageState `protogen:"open.v1"` + Name string `protobuf:"bytes,1,opt,name=name,proto3" json:"name,omitempty"` + MountPath string `protobuf:"bytes,2,opt,name=mount_path,json=mountPath,proto3" json:"mount_path,omitempty"` + unknownFields protoimpl.UnknownFields + sizeCache protoimpl.SizeCache +} + +func (x *VolumeMount) Reset() { + *x = VolumeMount{} + mi := &file_atelet_proto_msgTypes[7] + ms := protoimpl.X.MessageStateOf(protoimpl.Pointer(x)) + ms.StoreMessageInfo(mi) +} + +func (x *VolumeMount) String() string { + return protoimpl.X.MessageStringOf(x) +} + +func (*VolumeMount) ProtoMessage() {} + +func (x *VolumeMount) ProtoReflect() protoreflect.Message { + mi := &file_atelet_proto_msgTypes[7] + if x != nil { + ms := protoimpl.X.MessageStateOf(protoimpl.Pointer(x)) + if ms.LoadMessageInfo() == nil { + ms.StoreMessageInfo(mi) + } + return ms + } + return mi.MessageOf(x) +} + +// Deprecated: Use VolumeMount.ProtoReflect.Descriptor instead. +func (*VolumeMount) Descriptor() ([]byte, []int) { + return file_atelet_proto_rawDescGZIP(), []int{7} +} + +func (x *VolumeMount) GetName() string { + if x != nil { + return x.Name + } + return "" +} + +func (x *VolumeMount) GetMountPath() string { + if x != nil { + return x.MountPath + } + return "" +} + type Container struct { state protoimpl.MessageState `protogen:"open.v1"` Name string `protobuf:"bytes,1,opt,name=name,proto3" json:"name,omitempty"` @@ -392,13 +668,14 @@ type Container struct { Command []string `protobuf:"bytes,3,rep,name=command,proto3" json:"command,omitempty"` Env []*EnvEntry `protobuf:"bytes,4,rep,name=env,proto3" json:"env,omitempty"` Readyz *Readyz `protobuf:"bytes,5,opt,name=readyz,proto3" json:"readyz,omitempty"` + VolumeMounts []*VolumeMount `protobuf:"bytes,6,rep,name=volume_mounts,json=volumeMounts,proto3" json:"volume_mounts,omitempty"` unknownFields protoimpl.UnknownFields sizeCache protoimpl.SizeCache } func (x *Container) Reset() { *x = Container{} - mi := &file_atelet_proto_msgTypes[5] + mi := &file_atelet_proto_msgTypes[8] ms := protoimpl.X.MessageStateOf(protoimpl.Pointer(x)) ms.StoreMessageInfo(mi) } @@ -410,7 +687,7 @@ func (x *Container) String() string { func (*Container) ProtoMessage() {} func (x *Container) ProtoReflect() protoreflect.Message { - mi := &file_atelet_proto_msgTypes[5] + mi := &file_atelet_proto_msgTypes[8] if x != nil { ms := protoimpl.X.MessageStateOf(protoimpl.Pointer(x)) if ms.LoadMessageInfo() == nil { @@ -423,7 +700,7 @@ func (x *Container) ProtoReflect() protoreflect.Message { // Deprecated: Use Container.ProtoReflect.Descriptor instead. func (*Container) Descriptor() ([]byte, []int) { - return file_atelet_proto_rawDescGZIP(), []int{5} + return file_atelet_proto_rawDescGZIP(), []int{8} } func (x *Container) GetName() string { @@ -461,6 +738,13 @@ func (x *Container) GetReadyz() *Readyz { return nil } +func (x *Container) GetVolumeMounts() []*VolumeMount { + if x != nil { + return x.VolumeMounts + } + return nil +} + type EnvEntry struct { state protoimpl.MessageState `protogen:"open.v1"` Name string `protobuf:"bytes,1,opt,name=name,proto3" json:"name,omitempty"` @@ -471,7 +755,7 @@ type EnvEntry struct { func (x *EnvEntry) Reset() { *x = EnvEntry{} - mi := &file_atelet_proto_msgTypes[6] + mi := &file_atelet_proto_msgTypes[9] ms := protoimpl.X.MessageStateOf(protoimpl.Pointer(x)) ms.StoreMessageInfo(mi) } @@ -483,7 +767,7 @@ func (x *EnvEntry) String() string { func (*EnvEntry) ProtoMessage() {} func (x *EnvEntry) ProtoReflect() protoreflect.Message { - mi := &file_atelet_proto_msgTypes[6] + mi := &file_atelet_proto_msgTypes[9] if x != nil { ms := protoimpl.X.MessageStateOf(protoimpl.Pointer(x)) if ms.LoadMessageInfo() == nil { @@ -496,7 +780,7 @@ func (x *EnvEntry) ProtoReflect() protoreflect.Message { // Deprecated: Use EnvEntry.ProtoReflect.Descriptor instead. func (*EnvEntry) Descriptor() ([]byte, []int) { - return file_atelet_proto_rawDescGZIP(), []int{6} + return file_atelet_proto_rawDescGZIP(), []int{9} } func (x *EnvEntry) GetName() string { @@ -524,7 +808,7 @@ type Readyz struct { func (x *Readyz) Reset() { *x = Readyz{} - mi := &file_atelet_proto_msgTypes[7] + mi := &file_atelet_proto_msgTypes[10] ms := protoimpl.X.MessageStateOf(protoimpl.Pointer(x)) ms.StoreMessageInfo(mi) } @@ -536,7 +820,7 @@ func (x *Readyz) String() string { func (*Readyz) ProtoMessage() {} func (x *Readyz) ProtoReflect() protoreflect.Message { - mi := &file_atelet_proto_msgTypes[7] + mi := &file_atelet_proto_msgTypes[10] if x != nil { ms := protoimpl.X.MessageStateOf(protoimpl.Pointer(x)) if ms.LoadMessageInfo() == nil { @@ -549,7 +833,7 @@ func (x *Readyz) ProtoReflect() protoreflect.Message { // Deprecated: Use Readyz.ProtoReflect.Descriptor instead. func (*Readyz) Descriptor() ([]byte, []int) { - return file_atelet_proto_rawDescGZIP(), []int{7} + return file_atelet_proto_rawDescGZIP(), []int{10} } func (x *Readyz) GetHttpGet() *HTTPGetAction { @@ -572,7 +856,7 @@ type HTTPGetAction struct { func (x *HTTPGetAction) Reset() { *x = HTTPGetAction{} - mi := &file_atelet_proto_msgTypes[8] + mi := &file_atelet_proto_msgTypes[11] ms := protoimpl.X.MessageStateOf(protoimpl.Pointer(x)) ms.StoreMessageInfo(mi) } @@ -584,7 +868,7 @@ func (x *HTTPGetAction) String() string { func (*HTTPGetAction) ProtoMessage() {} func (x *HTTPGetAction) ProtoReflect() protoreflect.Message { - mi := &file_atelet_proto_msgTypes[8] + mi := &file_atelet_proto_msgTypes[11] if x != nil { ms := protoimpl.X.MessageStateOf(protoimpl.Pointer(x)) if ms.LoadMessageInfo() == nil { @@ -597,7 +881,7 @@ func (x *HTTPGetAction) ProtoReflect() protoreflect.Message { // Deprecated: Use HTTPGetAction.ProtoReflect.Descriptor instead. func (*HTTPGetAction) Descriptor() ([]byte, []int) { - return file_atelet_proto_rawDescGZIP(), []int{8} + return file_atelet_proto_rawDescGZIP(), []int{11} } func (x *HTTPGetAction) GetPath() string { @@ -622,7 +906,7 @@ type RunResponse struct { func (x *RunResponse) Reset() { *x = RunResponse{} - mi := &file_atelet_proto_msgTypes[9] + mi := &file_atelet_proto_msgTypes[12] ms := protoimpl.X.MessageStateOf(protoimpl.Pointer(x)) ms.StoreMessageInfo(mi) } @@ -634,7 +918,7 @@ func (x *RunResponse) String() string { func (*RunResponse) ProtoMessage() {} func (x *RunResponse) ProtoReflect() protoreflect.Message { - mi := &file_atelet_proto_msgTypes[9] + mi := &file_atelet_proto_msgTypes[12] if x != nil { ms := protoimpl.X.MessageStateOf(protoimpl.Pointer(x)) if ms.LoadMessageInfo() == nil { @@ -647,7 +931,7 @@ func (x *RunResponse) ProtoReflect() protoreflect.Message { // Deprecated: Use RunResponse.ProtoReflect.Descriptor instead. func (*RunResponse) Descriptor() ([]byte, []int) { - return file_atelet_proto_rawDescGZIP(), []int{9} + return file_atelet_proto_rawDescGZIP(), []int{12} } type LocalCheckpointConfiguration struct { @@ -661,7 +945,7 @@ type LocalCheckpointConfiguration struct { func (x *LocalCheckpointConfiguration) Reset() { *x = LocalCheckpointConfiguration{} - mi := &file_atelet_proto_msgTypes[10] + mi := &file_atelet_proto_msgTypes[13] ms := protoimpl.X.MessageStateOf(protoimpl.Pointer(x)) ms.StoreMessageInfo(mi) } @@ -673,7 +957,7 @@ func (x *LocalCheckpointConfiguration) String() string { func (*LocalCheckpointConfiguration) ProtoMessage() {} func (x *LocalCheckpointConfiguration) ProtoReflect() protoreflect.Message { - mi := &file_atelet_proto_msgTypes[10] + mi := &file_atelet_proto_msgTypes[13] if x != nil { ms := protoimpl.X.MessageStateOf(protoimpl.Pointer(x)) if ms.LoadMessageInfo() == nil { @@ -686,7 +970,7 @@ func (x *LocalCheckpointConfiguration) ProtoReflect() protoreflect.Message { // Deprecated: Use LocalCheckpointConfiguration.ProtoReflect.Descriptor instead. func (*LocalCheckpointConfiguration) Descriptor() ([]byte, []int) { - return file_atelet_proto_rawDescGZIP(), []int{10} + return file_atelet_proto_rawDescGZIP(), []int{13} } func (x *LocalCheckpointConfiguration) GetSnapshotPrefix() string { @@ -715,7 +999,7 @@ type ExternalCheckpointConfiguration struct { func (x *ExternalCheckpointConfiguration) Reset() { *x = ExternalCheckpointConfiguration{} - mi := &file_atelet_proto_msgTypes[11] + mi := &file_atelet_proto_msgTypes[14] ms := protoimpl.X.MessageStateOf(protoimpl.Pointer(x)) ms.StoreMessageInfo(mi) } @@ -727,7 +1011,7 @@ func (x *ExternalCheckpointConfiguration) String() string { func (*ExternalCheckpointConfiguration) ProtoMessage() {} func (x *ExternalCheckpointConfiguration) ProtoReflect() protoreflect.Message { - mi := &file_atelet_proto_msgTypes[11] + mi := &file_atelet_proto_msgTypes[14] if x != nil { ms := protoimpl.X.MessageStateOf(protoimpl.Pointer(x)) if ms.LoadMessageInfo() == nil { @@ -740,7 +1024,7 @@ func (x *ExternalCheckpointConfiguration) ProtoReflect() protoreflect.Message { // Deprecated: Use ExternalCheckpointConfiguration.ProtoReflect.Descriptor instead. func (*ExternalCheckpointConfiguration) Descriptor() ([]byte, []int) { - return file_atelet_proto_rawDescGZIP(), []int{11} + return file_atelet_proto_rawDescGZIP(), []int{14} } func (x *ExternalCheckpointConfiguration) GetSnapshotUriPrefix() string { @@ -764,14 +1048,16 @@ type CheckpointRequest struct { // // *CheckpointRequest_LocalConfig // *CheckpointRequest_ExternalConfig - Config isCheckpointRequest_Config `protobuf_oneof:"config"` + Config isCheckpointRequest_Config `protobuf_oneof:"config"` + // What should be included in the checkpoint. + Scope SnapshotScope `protobuf:"varint,12,opt,name=scope,proto3,enum=atelet.SnapshotScope" json:"scope,omitempty"` unknownFields protoimpl.UnknownFields sizeCache protoimpl.SizeCache } func (x *CheckpointRequest) Reset() { *x = CheckpointRequest{} - mi := &file_atelet_proto_msgTypes[12] + mi := &file_atelet_proto_msgTypes[15] ms := protoimpl.X.MessageStateOf(protoimpl.Pointer(x)) ms.StoreMessageInfo(mi) } @@ -783,7 +1069,7 @@ func (x *CheckpointRequest) String() string { func (*CheckpointRequest) ProtoMessage() {} func (x *CheckpointRequest) ProtoReflect() protoreflect.Message { - mi := &file_atelet_proto_msgTypes[12] + mi := &file_atelet_proto_msgTypes[15] if x != nil { ms := protoimpl.X.MessageStateOf(protoimpl.Pointer(x)) if ms.LoadMessageInfo() == nil { @@ -796,7 +1082,7 @@ func (x *CheckpointRequest) ProtoReflect() protoreflect.Message { // Deprecated: Use CheckpointRequest.ProtoReflect.Descriptor instead. func (*CheckpointRequest) Descriptor() ([]byte, []int) { - return file_atelet_proto_rawDescGZIP(), []int{12} + return file_atelet_proto_rawDescGZIP(), []int{15} } func (x *CheckpointRequest) GetTargetAteomUid() string { @@ -866,6 +1152,13 @@ func (x *CheckpointRequest) GetExternalConfig() *ExternalCheckpointConfiguration return nil } +func (x *CheckpointRequest) GetScope() SnapshotScope { + if x != nil { + return x.Scope + } + return SnapshotScope_SNAPSHOT_SCOPE_UNSPECIFIED +} + type isCheckpointRequest_Config interface { isCheckpointRequest_Config() } @@ -890,7 +1183,7 @@ type CheckpointResponse struct { func (x *CheckpointResponse) Reset() { *x = CheckpointResponse{} - mi := &file_atelet_proto_msgTypes[13] + mi := &file_atelet_proto_msgTypes[16] ms := protoimpl.X.MessageStateOf(protoimpl.Pointer(x)) ms.StoreMessageInfo(mi) } @@ -902,7 +1195,7 @@ func (x *CheckpointResponse) String() string { func (*CheckpointResponse) ProtoMessage() {} func (x *CheckpointResponse) ProtoReflect() protoreflect.Message { - mi := &file_atelet_proto_msgTypes[13] + mi := &file_atelet_proto_msgTypes[16] if x != nil { ms := protoimpl.X.MessageStateOf(protoimpl.Pointer(x)) if ms.LoadMessageInfo() == nil { @@ -915,7 +1208,7 @@ func (x *CheckpointResponse) ProtoReflect() protoreflect.Message { // Deprecated: Use CheckpointResponse.ProtoReflect.Descriptor instead. func (*CheckpointResponse) Descriptor() ([]byte, []int) { - return file_atelet_proto_rawDescGZIP(), []int{13} + return file_atelet_proto_rawDescGZIP(), []int{16} } type RestoreRequest struct { @@ -932,14 +1225,16 @@ type RestoreRequest struct { // // *RestoreRequest_LocalConfig // *RestoreRequest_ExternalConfig - Config isRestoreRequest_Config `protobuf_oneof:"config"` + Config isRestoreRequest_Config `protobuf_oneof:"config"` + // What content to restore from the checkpoint. + Scope SnapshotScope `protobuf:"varint,12,opt,name=scope,proto3,enum=atelet.SnapshotScope" json:"scope,omitempty"` unknownFields protoimpl.UnknownFields sizeCache protoimpl.SizeCache } func (x *RestoreRequest) Reset() { *x = RestoreRequest{} - mi := &file_atelet_proto_msgTypes[14] + mi := &file_atelet_proto_msgTypes[17] ms := protoimpl.X.MessageStateOf(protoimpl.Pointer(x)) ms.StoreMessageInfo(mi) } @@ -951,7 +1246,7 @@ func (x *RestoreRequest) String() string { func (*RestoreRequest) ProtoMessage() {} func (x *RestoreRequest) ProtoReflect() protoreflect.Message { - mi := &file_atelet_proto_msgTypes[14] + mi := &file_atelet_proto_msgTypes[17] if x != nil { ms := protoimpl.X.MessageStateOf(protoimpl.Pointer(x)) if ms.LoadMessageInfo() == nil { @@ -964,7 +1259,7 @@ func (x *RestoreRequest) ProtoReflect() protoreflect.Message { // Deprecated: Use RestoreRequest.ProtoReflect.Descriptor instead. func (*RestoreRequest) Descriptor() ([]byte, []int) { - return file_atelet_proto_rawDescGZIP(), []int{14} + return file_atelet_proto_rawDescGZIP(), []int{17} } func (x *RestoreRequest) GetTargetAteomUid() string { @@ -1034,6 +1329,13 @@ func (x *RestoreRequest) GetExternalConfig() *ExternalCheckpointConfiguration { return nil } +func (x *RestoreRequest) GetScope() SnapshotScope { + if x != nil { + return x.Scope + } + return SnapshotScope_SNAPSHOT_SCOPE_UNSPECIFIED +} + type isRestoreRequest_Config interface { isRestoreRequest_Config() } @@ -1058,7 +1360,7 @@ type RestoreResponse struct { func (x *RestoreResponse) Reset() { *x = RestoreResponse{} - mi := &file_atelet_proto_msgTypes[15] + mi := &file_atelet_proto_msgTypes[18] ms := protoimpl.X.MessageStateOf(protoimpl.Pointer(x)) ms.StoreMessageInfo(mi) } @@ -1070,7 +1372,7 @@ func (x *RestoreResponse) String() string { func (*RestoreResponse) ProtoMessage() {} func (x *RestoreResponse) ProtoReflect() protoreflect.Message { - mi := &file_atelet_proto_msgTypes[15] + mi := &file_atelet_proto_msgTypes[18] if x != nil { ms := protoimpl.X.MessageStateOf(protoimpl.Pointer(x)) if ms.LoadMessageInfo() == nil { @@ -1083,7 +1385,7 @@ func (x *RestoreResponse) ProtoReflect() protoreflect.Message { // Deprecated: Use RestoreResponse.ProtoReflect.Descriptor instead. func (*RestoreResponse) Descriptor() ([]byte, []int) { - return file_atelet_proto_rawDescGZIP(), []int{15} + return file_atelet_proto_rawDescGZIP(), []int{18} } var File_atelet_proto protoreflect.FileDescriptor @@ -1114,19 +1416,31 @@ const file_atelet_proto_rawDesc = "" + "\x06assets\x18\x02 \x03(\v2!.atelet.SandboxAssets.AssetsEntryR\x06assets\x1aM\n" + "\vAssetsEntry\x12\x10\n" + "\x03key\x18\x01 \x01(\tR\x03key\x12(\n" + - "\x05value\x18\x02 \x01(\v2\x12.atelet.ArchAssetsR\x05value:\x028\x01\"b\n" + + "\x05value\x18\x02 \x01(\v2\x12.atelet.ArchAssetsR\x05value:\x028\x01\"\x8c\x01\n" + "\fWorkloadSpec\x121\n" + "\n" + "containers\x18\x01 \x03(\v2\x11.atelet.ContainerR\n" + "containers\x12\x1f\n" + "\vpause_image\x18\x02 \x01(\tR\n" + - "pauseImage\"\x9b\x01\n" + + "pauseImage\x12(\n" + + "\avolumes\x18\x03 \x03(\v2\x0e.atelet.VolumeR\avolumes\"\x0f\n" + + "\rHomedirVolume\"\x82\x01\n" + + "\x06Volume\x12\x12\n" + + "\x04name\x18\x01 \x01(\tR\x04name\x12&\n" + + "\x04type\x18\x02 \x01(\x0e2\x12.atelet.VolumeTypeR\x04type\x122\n" + + "\bhome_dir\x18\x03 \x01(\v2\x15.atelet.HomedirVolumeH\x00R\ahomeDirB\b\n" + + "\x06source\"@\n" + + "\vVolumeMount\x12\x12\n" + + "\x04name\x18\x01 \x01(\tR\x04name\x12\x1d\n" + + "\n" + + "mount_path\x18\x02 \x01(\tR\tmountPath\"\xd5\x01\n" + "\tContainer\x12\x12\n" + "\x04name\x18\x01 \x01(\tR\x04name\x12\x14\n" + "\x05image\x18\x02 \x01(\tR\x05image\x12\x18\n" + "\acommand\x18\x03 \x03(\tR\acommand\x12\"\n" + "\x03env\x18\x04 \x03(\v2\x10.atelet.EnvEntryR\x03env\x12&\n" + - "\x06readyz\x18\x05 \x01(\v2\x0e.atelet.ReadyzR\x06readyz\"4\n" + + "\x06readyz\x18\x05 \x01(\v2\x0e.atelet.ReadyzR\x06readyz\x128\n" + + "\rvolume_mounts\x18\x06 \x03(\v2\x13.atelet.VolumeMountR\fvolumeMounts\"4\n" + "\bEnvEntry\x12\x12\n" + "\x04name\x18\x01 \x01(\tR\x04name\x12\x14\n" + "\x05value\x18\x02 \x01(\tR\x05value\":\n" + @@ -1139,7 +1453,7 @@ const file_atelet_proto_rawDesc = "" + "\x1cLocalCheckpointConfiguration\x12'\n" + "\x0fsnapshot_prefix\x18\x01 \x01(\tR\x0esnapshotPrefix\"Q\n" + "\x1fExternalCheckpointConfiguration\x12.\n" + - "\x13snapshot_uri_prefix\x18\x01 \x01(\tR\x11snapshotUriPrefix\"\xcd\x03\n" + + "\x13snapshot_uri_prefix\x18\x01 \x01(\tR\x11snapshotUriPrefix\"\xfa\x03\n" + "\x11CheckpointRequest\x12(\n" + "\x10target_ateom_uid\x18\x01 \x01(\tR\x0etargetAteomUid\x128\n" + "\x18actor_template_namespace\x18\x03 \x01(\tR\x16actorTemplateNamespace\x12.\n" + @@ -1149,9 +1463,10 @@ const file_atelet_proto_rawDesc = "" + "\x04type\x18\t \x01(\x0e2\x16.atelet.CheckpointTypeR\x04type\x12I\n" + "\flocal_config\x18\n" + " \x01(\v2$.atelet.LocalCheckpointConfigurationH\x00R\vlocalConfig\x12R\n" + - "\x0fexternal_config\x18\v \x01(\v2'.atelet.ExternalCheckpointConfigurationH\x00R\x0eexternalConfigB\b\n" + + "\x0fexternal_config\x18\v \x01(\v2'.atelet.ExternalCheckpointConfigurationH\x00R\x0eexternalConfig\x12+\n" + + "\x05scope\x18\f \x01(\x0e2\x15.atelet.SnapshotScopeR\x05scopeB\b\n" + "\x06configJ\x04\b\x06\x10\aJ\x04\b\b\x10\t\"\x14\n" + - "\x12CheckpointResponse\"\xca\x03\n" + + "\x12CheckpointResponse\"\xf7\x03\n" + "\x0eRestoreRequest\x12(\n" + "\x10target_ateom_uid\x18\x01 \x01(\tR\x0etargetAteomUid\x128\n" + "\x18actor_template_namespace\x18\x03 \x01(\tR\x16actorTemplateNamespace\x12.\n" + @@ -1161,13 +1476,22 @@ const file_atelet_proto_rawDesc = "" + "\x04type\x18\t \x01(\x0e2\x16.atelet.CheckpointTypeR\x04type\x12I\n" + "\flocal_config\x18\n" + " \x01(\v2$.atelet.LocalCheckpointConfigurationH\x00R\vlocalConfig\x12R\n" + - "\x0fexternal_config\x18\v \x01(\v2'.atelet.ExternalCheckpointConfigurationH\x00R\x0eexternalConfigB\b\n" + + "\x0fexternal_config\x18\v \x01(\v2'.atelet.ExternalCheckpointConfigurationH\x00R\x0eexternalConfig\x12+\n" + + "\x05scope\x18\f \x01(\x0e2\x15.atelet.SnapshotScopeR\x05scopeB\b\n" + "\x06configJ\x04\b\x06\x10\aJ\x04\b\b\x10\t\"\x11\n" + - "\x0fRestoreResponse*j\n" + + "\x0fRestoreResponse*B\n" + + "\n" + + "VolumeType\x12\x1b\n" + + "\x17VOLUME_TYPE_UNSPECIFIED\x10\x00\x12\x17\n" + + "\x13VOLUME_TYPE_HOMEDIR\x10\x01*j\n" + "\x0eCheckpointType\x12\x1f\n" + "\x1bCHECKPOINT_TYPE_UNSPECIFIED\x10\x00\x12\x19\n" + "\x15CHECKPOINT_TYPE_LOCAL\x10\x01\x12\x1c\n" + - "\x18CHECKPOINT_TYPE_EXTERNAL\x10\x022\xc4\x01\n" + + "\x18CHECKPOINT_TYPE_EXTERNAL\x10\x02*g\n" + + "\rSnapshotScope\x12\x1e\n" + + "\x1aSNAPSHOT_SCOPE_UNSPECIFIED\x10\x00\x12\x1a\n" + + "\x16SNAPSHOT_SCOPE_PROCESS\x10\x01\x12\x1a\n" + + "\x16SNAPSHOT_SCOPE_HOMEDIR\x10\x022\xc4\x01\n" + "\vAteomHerder\x120\n" + "\x03Run\x12\x12.atelet.RunRequest\x1a\x13.atelet.RunResponse\"\x00\x12E\n" + "\n" + @@ -1186,59 +1510,70 @@ func file_atelet_proto_rawDescGZIP() []byte { return file_atelet_proto_rawDescData } -var file_atelet_proto_enumTypes = make([]protoimpl.EnumInfo, 1) -var file_atelet_proto_msgTypes = make([]protoimpl.MessageInfo, 18) +var file_atelet_proto_enumTypes = make([]protoimpl.EnumInfo, 3) +var file_atelet_proto_msgTypes = make([]protoimpl.MessageInfo, 21) var file_atelet_proto_goTypes = []any{ - (CheckpointType)(0), // 0: atelet.CheckpointType - (*RunRequest)(nil), // 1: atelet.RunRequest - (*AssetFile)(nil), // 2: atelet.AssetFile - (*ArchAssets)(nil), // 3: atelet.ArchAssets - (*SandboxAssets)(nil), // 4: atelet.SandboxAssets - (*WorkloadSpec)(nil), // 5: atelet.WorkloadSpec - (*Container)(nil), // 6: atelet.Container - (*EnvEntry)(nil), // 7: atelet.EnvEntry - (*Readyz)(nil), // 8: atelet.Readyz - (*HTTPGetAction)(nil), // 9: atelet.HTTPGetAction - (*RunResponse)(nil), // 10: atelet.RunResponse - (*LocalCheckpointConfiguration)(nil), // 11: atelet.LocalCheckpointConfiguration - (*ExternalCheckpointConfiguration)(nil), // 12: atelet.ExternalCheckpointConfiguration - (*CheckpointRequest)(nil), // 13: atelet.CheckpointRequest - (*CheckpointResponse)(nil), // 14: atelet.CheckpointResponse - (*RestoreRequest)(nil), // 15: atelet.RestoreRequest - (*RestoreResponse)(nil), // 16: atelet.RestoreResponse - nil, // 17: atelet.ArchAssets.FilesEntry - nil, // 18: atelet.SandboxAssets.AssetsEntry + (VolumeType)(0), // 0: atelet.VolumeType + (CheckpointType)(0), // 1: atelet.CheckpointType + (SnapshotScope)(0), // 2: atelet.SnapshotScope + (*RunRequest)(nil), // 3: atelet.RunRequest + (*AssetFile)(nil), // 4: atelet.AssetFile + (*ArchAssets)(nil), // 5: atelet.ArchAssets + (*SandboxAssets)(nil), // 6: atelet.SandboxAssets + (*WorkloadSpec)(nil), // 7: atelet.WorkloadSpec + (*HomedirVolume)(nil), // 8: atelet.HomedirVolume + (*Volume)(nil), // 9: atelet.Volume + (*VolumeMount)(nil), // 10: atelet.VolumeMount + (*Container)(nil), // 11: atelet.Container + (*EnvEntry)(nil), // 12: atelet.EnvEntry + (*Readyz)(nil), // 13: atelet.Readyz + (*HTTPGetAction)(nil), // 14: atelet.HTTPGetAction + (*RunResponse)(nil), // 15: atelet.RunResponse + (*LocalCheckpointConfiguration)(nil), // 16: atelet.LocalCheckpointConfiguration + (*ExternalCheckpointConfiguration)(nil), // 17: atelet.ExternalCheckpointConfiguration + (*CheckpointRequest)(nil), // 18: atelet.CheckpointRequest + (*CheckpointResponse)(nil), // 19: atelet.CheckpointResponse + (*RestoreRequest)(nil), // 20: atelet.RestoreRequest + (*RestoreResponse)(nil), // 21: atelet.RestoreResponse + nil, // 22: atelet.ArchAssets.FilesEntry + nil, // 23: atelet.SandboxAssets.AssetsEntry } var file_atelet_proto_depIdxs = []int32{ - 5, // 0: atelet.RunRequest.spec:type_name -> atelet.WorkloadSpec - 4, // 1: atelet.RunRequest.sandbox_assets:type_name -> atelet.SandboxAssets - 17, // 2: atelet.ArchAssets.files:type_name -> atelet.ArchAssets.FilesEntry - 18, // 3: atelet.SandboxAssets.assets:type_name -> atelet.SandboxAssets.AssetsEntry - 6, // 4: atelet.WorkloadSpec.containers:type_name -> atelet.Container - 7, // 5: atelet.Container.env:type_name -> atelet.EnvEntry - 8, // 6: atelet.Container.readyz:type_name -> atelet.Readyz - 9, // 7: atelet.Readyz.http_get:type_name -> atelet.HTTPGetAction - 5, // 8: atelet.CheckpointRequest.spec:type_name -> atelet.WorkloadSpec - 0, // 9: atelet.CheckpointRequest.type:type_name -> atelet.CheckpointType - 11, // 10: atelet.CheckpointRequest.local_config:type_name -> atelet.LocalCheckpointConfiguration - 12, // 11: atelet.CheckpointRequest.external_config:type_name -> atelet.ExternalCheckpointConfiguration - 5, // 12: atelet.RestoreRequest.spec:type_name -> atelet.WorkloadSpec - 0, // 13: atelet.RestoreRequest.type:type_name -> atelet.CheckpointType - 11, // 14: atelet.RestoreRequest.local_config:type_name -> atelet.LocalCheckpointConfiguration - 12, // 15: atelet.RestoreRequest.external_config:type_name -> atelet.ExternalCheckpointConfiguration - 2, // 16: atelet.ArchAssets.FilesEntry.value:type_name -> atelet.AssetFile - 3, // 17: atelet.SandboxAssets.AssetsEntry.value:type_name -> atelet.ArchAssets - 1, // 18: atelet.AteomHerder.Run:input_type -> atelet.RunRequest - 13, // 19: atelet.AteomHerder.Checkpoint:input_type -> atelet.CheckpointRequest - 15, // 20: atelet.AteomHerder.Restore:input_type -> atelet.RestoreRequest - 10, // 21: atelet.AteomHerder.Run:output_type -> atelet.RunResponse - 14, // 22: atelet.AteomHerder.Checkpoint:output_type -> atelet.CheckpointResponse - 16, // 23: atelet.AteomHerder.Restore:output_type -> atelet.RestoreResponse - 21, // [21:24] is the sub-list for method output_type - 18, // [18:21] is the sub-list for method input_type - 18, // [18:18] is the sub-list for extension type_name - 18, // [18:18] is the sub-list for extension extendee - 0, // [0:18] is the sub-list for field type_name + 7, // 0: atelet.RunRequest.spec:type_name -> atelet.WorkloadSpec + 6, // 1: atelet.RunRequest.sandbox_assets:type_name -> atelet.SandboxAssets + 22, // 2: atelet.ArchAssets.files:type_name -> atelet.ArchAssets.FilesEntry + 23, // 3: atelet.SandboxAssets.assets:type_name -> atelet.SandboxAssets.AssetsEntry + 11, // 4: atelet.WorkloadSpec.containers:type_name -> atelet.Container + 9, // 5: atelet.WorkloadSpec.volumes:type_name -> atelet.Volume + 0, // 6: atelet.Volume.type:type_name -> atelet.VolumeType + 8, // 7: atelet.Volume.home_dir:type_name -> atelet.HomedirVolume + 12, // 8: atelet.Container.env:type_name -> atelet.EnvEntry + 13, // 9: atelet.Container.readyz:type_name -> atelet.Readyz + 10, // 10: atelet.Container.volume_mounts:type_name -> atelet.VolumeMount + 14, // 11: atelet.Readyz.http_get:type_name -> atelet.HTTPGetAction + 7, // 12: atelet.CheckpointRequest.spec:type_name -> atelet.WorkloadSpec + 1, // 13: atelet.CheckpointRequest.type:type_name -> atelet.CheckpointType + 16, // 14: atelet.CheckpointRequest.local_config:type_name -> atelet.LocalCheckpointConfiguration + 17, // 15: atelet.CheckpointRequest.external_config:type_name -> atelet.ExternalCheckpointConfiguration + 2, // 16: atelet.CheckpointRequest.scope:type_name -> atelet.SnapshotScope + 7, // 17: atelet.RestoreRequest.spec:type_name -> atelet.WorkloadSpec + 1, // 18: atelet.RestoreRequest.type:type_name -> atelet.CheckpointType + 16, // 19: atelet.RestoreRequest.local_config:type_name -> atelet.LocalCheckpointConfiguration + 17, // 20: atelet.RestoreRequest.external_config:type_name -> atelet.ExternalCheckpointConfiguration + 2, // 21: atelet.RestoreRequest.scope:type_name -> atelet.SnapshotScope + 4, // 22: atelet.ArchAssets.FilesEntry.value:type_name -> atelet.AssetFile + 5, // 23: atelet.SandboxAssets.AssetsEntry.value:type_name -> atelet.ArchAssets + 3, // 24: atelet.AteomHerder.Run:input_type -> atelet.RunRequest + 18, // 25: atelet.AteomHerder.Checkpoint:input_type -> atelet.CheckpointRequest + 20, // 26: atelet.AteomHerder.Restore:input_type -> atelet.RestoreRequest + 15, // 27: atelet.AteomHerder.Run:output_type -> atelet.RunResponse + 19, // 28: atelet.AteomHerder.Checkpoint:output_type -> atelet.CheckpointResponse + 21, // 29: atelet.AteomHerder.Restore:output_type -> atelet.RestoreResponse + 27, // [27:30] is the sub-list for method output_type + 24, // [24:27] is the sub-list for method input_type + 24, // [24:24] is the sub-list for extension type_name + 24, // [24:24] is the sub-list for extension extendee + 0, // [0:24] is the sub-list for field type_name } func init() { file_atelet_proto_init() } @@ -1246,11 +1581,14 @@ func file_atelet_proto_init() { if File_atelet_proto != nil { return } - file_atelet_proto_msgTypes[12].OneofWrappers = []any{ + file_atelet_proto_msgTypes[6].OneofWrappers = []any{ + (*Volume_HomeDir)(nil), + } + file_atelet_proto_msgTypes[15].OneofWrappers = []any{ (*CheckpointRequest_LocalConfig)(nil), (*CheckpointRequest_ExternalConfig)(nil), } - file_atelet_proto_msgTypes[14].OneofWrappers = []any{ + file_atelet_proto_msgTypes[17].OneofWrappers = []any{ (*RestoreRequest_LocalConfig)(nil), (*RestoreRequest_ExternalConfig)(nil), } @@ -1259,8 +1597,8 @@ func file_atelet_proto_init() { File: protoimpl.DescBuilder{ GoPackagePath: reflect.TypeOf(x{}).PkgPath(), RawDescriptor: unsafe.Slice(unsafe.StringData(file_atelet_proto_rawDesc), len(file_atelet_proto_rawDesc)), - NumEnums: 1, - NumMessages: 18, + NumEnums: 3, + NumMessages: 21, NumExtensions: 0, NumServices: 1, }, diff --git a/internal/proto/ateletpb/atelet.proto b/internal/proto/ateletpb/atelet.proto index 6c4046fc..8c88f60b 100644 --- a/internal/proto/ateletpb/atelet.proto +++ b/internal/proto/ateletpb/atelet.proto @@ -77,14 +77,39 @@ message SandboxAssets { message WorkloadSpec { repeated Container containers = 1; string pause_image = 2; + repeated Volume volumes = 3; +} + +enum VolumeType { + VOLUME_TYPE_UNSPECIFIED = 0; + VOLUME_TYPE_HOMEDIR = 1; +} + +message HomedirVolume { +} + +message Volume { + string name = 1; + + VolumeType type = 2; + + oneof source{ + HomedirVolume home_dir = 3; + } +} + +message VolumeMount{ + string name = 1; + string mount_path = 2; } message Container { - string name = 1; - string image = 2; - repeated string command = 3; - repeated EnvEntry env = 4; - Readyz readyz = 5; + string name = 1; + string image = 2; + repeated string command = 3; + repeated EnvEntry env = 4; + Readyz readyz = 5; + repeated VolumeMount volume_mounts = 6; } message EnvEntry { @@ -138,6 +163,15 @@ enum CheckpointType { CHECKPOINT_TYPE_EXTERNAL = 2; } +enum SnapshotScope { + // Not valid option; should never happen. + SNAPSHOT_SCOPE_UNSPECIFIED = 0; + // Snapshot memory and the full rootfs (including homedir content). + SNAPSHOT_SCOPE_PROCESS = 1; + // Snapshot only the homedir; memory and the rest of rootfs are excluded. + SNAPSHOT_SCOPE_HOMEDIR = 2; +} + message CheckpointRequest { string target_ateom_uid = 1; @@ -162,6 +196,9 @@ message CheckpointRequest { LocalCheckpointConfiguration local_config = 10; ExternalCheckpointConfiguration external_config = 11; } + + // What should be included in the checkpoint. + SnapshotScope scope = 12; } message CheckpointResponse { @@ -192,6 +229,9 @@ message RestoreRequest { LocalCheckpointConfiguration local_config = 10; ExternalCheckpointConfiguration external_config = 11; } + + // What content to restore from the checkpoint. + SnapshotScope scope = 12; } message RestoreResponse { diff --git a/internal/proto/ateompb/ateom.pb.go b/internal/proto/ateompb/ateom.pb.go index 158f277c..8c3a57c0 100644 --- a/internal/proto/ateompb/ateom.pb.go +++ b/internal/proto/ateompb/ateom.pb.go @@ -35,6 +35,58 @@ const ( _ = protoimpl.EnforceVersion(protoimpl.MaxVersion - 20) ) +type SnapshotScope int32 + +const ( + // Not valid option; should never happen. + SnapshotScope_SNAPSHOT_SCOPE_UNSPECIFIED SnapshotScope = 0 + // Snapshot memory and the full rootfs (including homedir content). + SnapshotScope_SNAPSHOT_SCOPE_PROCESS SnapshotScope = 1 + // Snapshot only the homedir; memory and the rest of rootfs are excluded. + SnapshotScope_SNAPSHOT_SCOPE_HOMEDIR SnapshotScope = 2 +) + +// Enum value maps for SnapshotScope. +var ( + SnapshotScope_name = map[int32]string{ + 0: "SNAPSHOT_SCOPE_UNSPECIFIED", + 1: "SNAPSHOT_SCOPE_PROCESS", + 2: "SNAPSHOT_SCOPE_HOMEDIR", + } + SnapshotScope_value = map[string]int32{ + "SNAPSHOT_SCOPE_UNSPECIFIED": 0, + "SNAPSHOT_SCOPE_PROCESS": 1, + "SNAPSHOT_SCOPE_HOMEDIR": 2, + } +) + +func (x SnapshotScope) Enum() *SnapshotScope { + p := new(SnapshotScope) + *p = x + return p +} + +func (x SnapshotScope) String() string { + return protoimpl.X.EnumStringOf(x.Descriptor(), protoreflect.EnumNumber(x)) +} + +func (SnapshotScope) Descriptor() protoreflect.EnumDescriptor { + return file_ateom_proto_enumTypes[0].Descriptor() +} + +func (SnapshotScope) Type() protoreflect.EnumType { + return &file_ateom_proto_enumTypes[0] +} + +func (x SnapshotScope) Number() protoreflect.EnumNumber { + return protoreflect.EnumNumber(x) +} + +// Deprecated: Use SnapshotScope.Descriptor instead. +func (SnapshotScope) EnumDescriptor() ([]byte, []int) { + return file_ateom_proto_rawDescGZIP(), []int{0} +} + type RunWorkloadRequest struct { state protoimpl.MessageState `protogen:"open.v1"` ActorTemplateNamespace string `protobuf:"bytes,1,opt,name=actor_template_namespace,json=actorTemplateNamespace,proto3" json:"actor_template_namespace,omitempty"` @@ -169,11 +221,12 @@ func (x *WorkloadSpec) GetContainers() []*Container { } type Container struct { - state protoimpl.MessageState `protogen:"open.v1"` - Name string `protobuf:"bytes,1,opt,name=name,proto3" json:"name,omitempty"` - Readyz *Readyz `protobuf:"bytes,2,opt,name=readyz,proto3" json:"readyz,omitempty"` - unknownFields protoimpl.UnknownFields - sizeCache protoimpl.SizeCache + state protoimpl.MessageState `protogen:"open.v1"` + Name string `protobuf:"bytes,1,opt,name=name,proto3" json:"name,omitempty"` + Readyz *Readyz `protobuf:"bytes,2,opt,name=readyz,proto3" json:"readyz,omitempty"` + HomeDirVolumes []string `protobuf:"bytes,3,rep,name=home_dir_volumes,json=homeDirVolumes,proto3" json:"home_dir_volumes,omitempty"` + unknownFields protoimpl.UnknownFields + sizeCache protoimpl.SizeCache } func (x *Container) Reset() { @@ -220,6 +273,13 @@ func (x *Container) GetReadyz() *Readyz { return nil } +func (x *Container) GetHomeDirVolumes() []string { + if x != nil { + return x.HomeDirVolumes + } + return nil +} + // Readyz describes how to check that a container is ready to serve. // Only HTTP is supported today. type Readyz struct { @@ -376,8 +436,10 @@ type CheckpointWorkloadRequest struct { // runtime_asset_paths maps a runtime asset name to the local on-disk path // atelet fetched it to (see RunWorkloadRequest). Empty for gVisor. RuntimeAssetPaths map[string]string `protobuf:"bytes,7,rep,name=runtime_asset_paths,json=runtimeAssetPaths,proto3" json:"runtime_asset_paths,omitempty" protobuf_key:"bytes,1,opt,name=key" protobuf_val:"bytes,2,opt,name=value"` - unknownFields protoimpl.UnknownFields - sizeCache protoimpl.SizeCache + // What content to include in the checkpoint. + Scope SnapshotScope `protobuf:"varint,8,opt,name=scope,proto3,enum=ateom.SnapshotScope" json:"scope,omitempty"` + unknownFields protoimpl.UnknownFields + sizeCache protoimpl.SizeCache } func (x *CheckpointWorkloadRequest) Reset() { @@ -459,6 +521,13 @@ func (x *CheckpointWorkloadRequest) GetRuntimeAssetPaths() map[string]string { return nil } +func (x *CheckpointWorkloadRequest) GetScope() SnapshotScope { + if x != nil { + return x.Scope + } + return SnapshotScope_SNAPSHOT_SCOPE_UNSPECIFIED +} + type CheckpointWorkloadResponse struct { state protoimpl.MessageState `protogen:"open.v1"` // snapshot_files lists the files ateom wrote into the checkpoint directory @@ -518,8 +587,10 @@ type RestoreWorkloadRequest struct { // runtime_asset_paths maps a runtime asset name to the local on-disk path // atelet fetched it to (see RunWorkloadRequest). Empty for gVisor. RuntimeAssetPaths map[string]string `protobuf:"bytes,7,rep,name=runtime_asset_paths,json=runtimeAssetPaths,proto3" json:"runtime_asset_paths,omitempty" protobuf_key:"bytes,1,opt,name=key" protobuf_val:"bytes,2,opt,name=value"` - unknownFields protoimpl.UnknownFields - sizeCache protoimpl.SizeCache + // What content to restore from the snapshot. + Scope SnapshotScope `protobuf:"varint,8,opt,name=scope,proto3,enum=ateom.SnapshotScope" json:"scope,omitempty"` + unknownFields protoimpl.UnknownFields + sizeCache protoimpl.SizeCache } func (x *RestoreWorkloadRequest) Reset() { @@ -601,6 +672,13 @@ func (x *RestoreWorkloadRequest) GetRuntimeAssetPaths() map[string]string { return nil } +func (x *RestoreWorkloadRequest) GetScope() SnapshotScope { + if x != nil { + return x.Scope + } + return SnapshotScope_SNAPSHOT_SCOPE_UNSPECIFIED +} + type RestoreWorkloadResponse struct { state protoimpl.MessageState `protogen:"open.v1"` unknownFields protoimpl.UnknownFields @@ -656,16 +734,17 @@ const file_ateom_proto_rawDesc = "" + "\fWorkloadSpec\x120\n" + "\n" + "containers\x18\x01 \x03(\v2\x10.ateom.ContainerR\n" + - "containers\"F\n" + + "containers\"p\n" + "\tContainer\x12\x12\n" + "\x04name\x18\x01 \x01(\tR\x04name\x12%\n" + - "\x06readyz\x18\x02 \x01(\v2\r.ateom.ReadyzR\x06readyz\"9\n" + + "\x06readyz\x18\x02 \x01(\v2\r.ateom.ReadyzR\x06readyz\x12(\n" + + "\x10home_dir_volumes\x18\x03 \x03(\tR\x0ehomeDirVolumes\"9\n" + "\x06Readyz\x12/\n" + "\bhttp_get\x18\x01 \x01(\v2\x14.ateom.HTTPGetActionR\ahttpGet\"7\n" + "\rHTTPGetAction\x12\x12\n" + "\x04path\x18\x01 \x01(\tR\x04path\x12\x12\n" + "\x04port\x18\x02 \x01(\x05R\x04port\"\x15\n" + - "\x13RunWorkloadResponse\"\xc7\x03\n" + + "\x13RunWorkloadResponse\"\xf3\x03\n" + "\x19CheckpointWorkloadRequest\x128\n" + "\x18actor_template_namespace\x18\x01 \x01(\tR\x16actorTemplateNamespace\x12.\n" + "\x13actor_template_name\x18\x02 \x01(\tR\x11actorTemplateName\x12\x19\n" + @@ -674,12 +753,13 @@ const file_ateom_proto_rawDesc = "" + "runsc_path\x18\x04 \x01(\tR\trunscPath\x12'\n" + "\x04spec\x18\x05 \x01(\v2\x13.ateom.WorkloadSpecR\x04spec\x12.\n" + "\x13snapshot_uri_prefix\x18\x06 \x01(\tR\x11snapshotUriPrefix\x12g\n" + - "\x13runtime_asset_paths\x18\a \x03(\v27.ateom.CheckpointWorkloadRequest.RuntimeAssetPathsEntryR\x11runtimeAssetPaths\x1aD\n" + + "\x13runtime_asset_paths\x18\a \x03(\v27.ateom.CheckpointWorkloadRequest.RuntimeAssetPathsEntryR\x11runtimeAssetPaths\x12*\n" + + "\x05scope\x18\b \x01(\x0e2\x14.ateom.SnapshotScopeR\x05scope\x1aD\n" + "\x16RuntimeAssetPathsEntry\x12\x10\n" + "\x03key\x18\x01 \x01(\tR\x03key\x12\x14\n" + "\x05value\x18\x02 \x01(\tR\x05value:\x028\x01\"C\n" + "\x1aCheckpointWorkloadResponse\x12%\n" + - "\x0esnapshot_files\x18\x01 \x03(\tR\rsnapshotFiles\"\xc1\x03\n" + + "\x0esnapshot_files\x18\x01 \x03(\tR\rsnapshotFiles\"\xed\x03\n" + "\x16RestoreWorkloadRequest\x128\n" + "\x18actor_template_namespace\x18\x01 \x01(\tR\x16actorTemplateNamespace\x12.\n" + "\x13actor_template_name\x18\x02 \x01(\tR\x11actorTemplateName\x12\x19\n" + @@ -688,11 +768,16 @@ const file_ateom_proto_rawDesc = "" + "runsc_path\x18\x04 \x01(\tR\trunscPath\x12'\n" + "\x04spec\x18\x05 \x01(\v2\x13.ateom.WorkloadSpecR\x04spec\x12.\n" + "\x13snapshot_uri_prefix\x18\x06 \x01(\tR\x11snapshotUriPrefix\x12d\n" + - "\x13runtime_asset_paths\x18\a \x03(\v24.ateom.RestoreWorkloadRequest.RuntimeAssetPathsEntryR\x11runtimeAssetPaths\x1aD\n" + + "\x13runtime_asset_paths\x18\a \x03(\v24.ateom.RestoreWorkloadRequest.RuntimeAssetPathsEntryR\x11runtimeAssetPaths\x12*\n" + + "\x05scope\x18\b \x01(\x0e2\x14.ateom.SnapshotScopeR\x05scope\x1aD\n" + "\x16RuntimeAssetPathsEntry\x12\x10\n" + "\x03key\x18\x01 \x01(\tR\x03key\x12\x14\n" + "\x05value\x18\x02 \x01(\tR\x05value:\x028\x01\"\x19\n" + - "\x17RestoreWorkloadResponse2\x80\x02\n" + + "\x17RestoreWorkloadResponse*g\n" + + "\rSnapshotScope\x12\x1e\n" + + "\x1aSNAPSHOT_SCOPE_UNSPECIFIED\x10\x00\x12\x1a\n" + + "\x16SNAPSHOT_SCOPE_PROCESS\x10\x01\x12\x1a\n" + + "\x16SNAPSHOT_SCOPE_HOMEDIR\x10\x022\x80\x02\n" + "\x05Ateom\x12F\n" + "\vRunWorkload\x12\x19.ateom.RunWorkloadRequest\x1a\x1a.ateom.RunWorkloadResponse\"\x00\x12[\n" + "\x12CheckpointWorkload\x12 .ateom.CheckpointWorkloadRequest\x1a!.ateom.CheckpointWorkloadResponse\"\x00\x12R\n" + @@ -710,43 +795,47 @@ func file_ateom_proto_rawDescGZIP() []byte { return file_ateom_proto_rawDescData } +var file_ateom_proto_enumTypes = make([]protoimpl.EnumInfo, 1) var file_ateom_proto_msgTypes = make([]protoimpl.MessageInfo, 13) var file_ateom_proto_goTypes = []any{ - (*RunWorkloadRequest)(nil), // 0: ateom.RunWorkloadRequest - (*WorkloadSpec)(nil), // 1: ateom.WorkloadSpec - (*Container)(nil), // 2: ateom.Container - (*Readyz)(nil), // 3: ateom.Readyz - (*HTTPGetAction)(nil), // 4: ateom.HTTPGetAction - (*RunWorkloadResponse)(nil), // 5: ateom.RunWorkloadResponse - (*CheckpointWorkloadRequest)(nil), // 6: ateom.CheckpointWorkloadRequest - (*CheckpointWorkloadResponse)(nil), // 7: ateom.CheckpointWorkloadResponse - (*RestoreWorkloadRequest)(nil), // 8: ateom.RestoreWorkloadRequest - (*RestoreWorkloadResponse)(nil), // 9: ateom.RestoreWorkloadResponse - nil, // 10: ateom.RunWorkloadRequest.RuntimeAssetPathsEntry - nil, // 11: ateom.CheckpointWorkloadRequest.RuntimeAssetPathsEntry - nil, // 12: ateom.RestoreWorkloadRequest.RuntimeAssetPathsEntry + (SnapshotScope)(0), // 0: ateom.SnapshotScope + (*RunWorkloadRequest)(nil), // 1: ateom.RunWorkloadRequest + (*WorkloadSpec)(nil), // 2: ateom.WorkloadSpec + (*Container)(nil), // 3: ateom.Container + (*Readyz)(nil), // 4: ateom.Readyz + (*HTTPGetAction)(nil), // 5: ateom.HTTPGetAction + (*RunWorkloadResponse)(nil), // 6: ateom.RunWorkloadResponse + (*CheckpointWorkloadRequest)(nil), // 7: ateom.CheckpointWorkloadRequest + (*CheckpointWorkloadResponse)(nil), // 8: ateom.CheckpointWorkloadResponse + (*RestoreWorkloadRequest)(nil), // 9: ateom.RestoreWorkloadRequest + (*RestoreWorkloadResponse)(nil), // 10: ateom.RestoreWorkloadResponse + nil, // 11: ateom.RunWorkloadRequest.RuntimeAssetPathsEntry + nil, // 12: ateom.CheckpointWorkloadRequest.RuntimeAssetPathsEntry + nil, // 13: ateom.RestoreWorkloadRequest.RuntimeAssetPathsEntry } var file_ateom_proto_depIdxs = []int32{ - 1, // 0: ateom.RunWorkloadRequest.spec:type_name -> ateom.WorkloadSpec - 10, // 1: ateom.RunWorkloadRequest.runtime_asset_paths:type_name -> ateom.RunWorkloadRequest.RuntimeAssetPathsEntry - 2, // 2: ateom.WorkloadSpec.containers:type_name -> ateom.Container - 3, // 3: ateom.Container.readyz:type_name -> ateom.Readyz - 4, // 4: ateom.Readyz.http_get:type_name -> ateom.HTTPGetAction - 1, // 5: ateom.CheckpointWorkloadRequest.spec:type_name -> ateom.WorkloadSpec - 11, // 6: ateom.CheckpointWorkloadRequest.runtime_asset_paths:type_name -> ateom.CheckpointWorkloadRequest.RuntimeAssetPathsEntry - 1, // 7: ateom.RestoreWorkloadRequest.spec:type_name -> ateom.WorkloadSpec - 12, // 8: ateom.RestoreWorkloadRequest.runtime_asset_paths:type_name -> ateom.RestoreWorkloadRequest.RuntimeAssetPathsEntry - 0, // 9: ateom.Ateom.RunWorkload:input_type -> ateom.RunWorkloadRequest - 6, // 10: ateom.Ateom.CheckpointWorkload:input_type -> ateom.CheckpointWorkloadRequest - 8, // 11: ateom.Ateom.RestoreWorkload:input_type -> ateom.RestoreWorkloadRequest - 5, // 12: ateom.Ateom.RunWorkload:output_type -> ateom.RunWorkloadResponse - 7, // 13: ateom.Ateom.CheckpointWorkload:output_type -> ateom.CheckpointWorkloadResponse - 9, // 14: ateom.Ateom.RestoreWorkload:output_type -> ateom.RestoreWorkloadResponse - 12, // [12:15] is the sub-list for method output_type - 9, // [9:12] is the sub-list for method input_type - 9, // [9:9] is the sub-list for extension type_name - 9, // [9:9] is the sub-list for extension extendee - 0, // [0:9] is the sub-list for field type_name + 2, // 0: ateom.RunWorkloadRequest.spec:type_name -> ateom.WorkloadSpec + 11, // 1: ateom.RunWorkloadRequest.runtime_asset_paths:type_name -> ateom.RunWorkloadRequest.RuntimeAssetPathsEntry + 3, // 2: ateom.WorkloadSpec.containers:type_name -> ateom.Container + 4, // 3: ateom.Container.readyz:type_name -> ateom.Readyz + 5, // 4: ateom.Readyz.http_get:type_name -> ateom.HTTPGetAction + 2, // 5: ateom.CheckpointWorkloadRequest.spec:type_name -> ateom.WorkloadSpec + 12, // 6: ateom.CheckpointWorkloadRequest.runtime_asset_paths:type_name -> ateom.CheckpointWorkloadRequest.RuntimeAssetPathsEntry + 0, // 7: ateom.CheckpointWorkloadRequest.scope:type_name -> ateom.SnapshotScope + 2, // 8: ateom.RestoreWorkloadRequest.spec:type_name -> ateom.WorkloadSpec + 13, // 9: ateom.RestoreWorkloadRequest.runtime_asset_paths:type_name -> ateom.RestoreWorkloadRequest.RuntimeAssetPathsEntry + 0, // 10: ateom.RestoreWorkloadRequest.scope:type_name -> ateom.SnapshotScope + 1, // 11: ateom.Ateom.RunWorkload:input_type -> ateom.RunWorkloadRequest + 7, // 12: ateom.Ateom.CheckpointWorkload:input_type -> ateom.CheckpointWorkloadRequest + 9, // 13: ateom.Ateom.RestoreWorkload:input_type -> ateom.RestoreWorkloadRequest + 6, // 14: ateom.Ateom.RunWorkload:output_type -> ateom.RunWorkloadResponse + 8, // 15: ateom.Ateom.CheckpointWorkload:output_type -> ateom.CheckpointWorkloadResponse + 10, // 16: ateom.Ateom.RestoreWorkload:output_type -> ateom.RestoreWorkloadResponse + 14, // [14:17] is the sub-list for method output_type + 11, // [11:14] is the sub-list for method input_type + 11, // [11:11] is the sub-list for extension type_name + 11, // [11:11] is the sub-list for extension extendee + 0, // [0:11] is the sub-list for field type_name } func init() { file_ateom_proto_init() } @@ -759,13 +848,14 @@ func file_ateom_proto_init() { File: protoimpl.DescBuilder{ GoPackagePath: reflect.TypeOf(x{}).PkgPath(), RawDescriptor: unsafe.Slice(unsafe.StringData(file_ateom_proto_rawDesc), len(file_ateom_proto_rawDesc)), - NumEnums: 0, + NumEnums: 1, NumMessages: 13, NumExtensions: 0, NumServices: 1, }, GoTypes: file_ateom_proto_goTypes, DependencyIndexes: file_ateom_proto_depIdxs, + EnumInfos: file_ateom_proto_enumTypes, MessageInfos: file_ateom_proto_msgTypes, }.Build() File_ateom_proto = out.File diff --git a/internal/proto/ateompb/ateom.proto b/internal/proto/ateompb/ateom.proto index 0260975b..3c582b43 100644 --- a/internal/proto/ateompb/ateom.proto +++ b/internal/proto/ateompb/ateom.proto @@ -69,8 +69,9 @@ message WorkloadSpec { } message Container { - string name = 1; - Readyz readyz = 2; + string name = 1; + Readyz readyz = 2; + repeated string home_dir_volumes = 3; } // Readyz describes how to check that a container is ready to serve. @@ -90,6 +91,15 @@ message HTTPGetAction { message RunWorkloadResponse { } +enum SnapshotScope { + // Not valid option; should never happen. + SNAPSHOT_SCOPE_UNSPECIFIED = 0; + // Snapshot memory and the full rootfs (including homedir content). + SNAPSHOT_SCOPE_PROCESS = 1; + // Snapshot only the homedir; memory and the rest of rootfs are excluded. + SNAPSHOT_SCOPE_HOMEDIR = 2; +} + message CheckpointWorkloadRequest { string actor_template_namespace = 1; string actor_template_name = 2; @@ -112,6 +122,9 @@ message CheckpointWorkloadRequest { // runtime_asset_paths maps a runtime asset name to the local on-disk path // atelet fetched it to (see RunWorkloadRequest). Empty for gVisor. map runtime_asset_paths = 7; + + // What content to include in the checkpoint. + SnapshotScope scope = 8; } message CheckpointWorkloadResponse { @@ -136,6 +149,9 @@ message RestoreWorkloadRequest { // runtime_asset_paths maps a runtime asset name to the local on-disk path // atelet fetched it to (see RunWorkloadRequest). Empty for gVisor. map runtime_asset_paths = 7; + + // What content to restore from the snapshot. + SnapshotScope scope = 8; } message RestoreWorkloadResponse { diff --git a/manifests/ate-install/sandboxconfig-gvisor.yaml b/manifests/ate-install/sandboxconfig-gvisor.yaml index 6900fd9f..7c4ad881 100644 --- a/manifests/ate-install/sandboxconfig-gvisor.yaml +++ b/manifests/ate-install/sandboxconfig-gvisor.yaml @@ -27,9 +27,9 @@ spec: assets: amd64: runsc: - url: "gs://gvisor/releases/release/20260608/x86_64/runsc" - sha256: "4ec073363641a44cc5d171f63f1e23b76016ef632eb3269395c79ac8aecb71bc" + url: "gs://gvisor/releases/release/20260622/x86_64/runsc" + sha256: "f18a948bf9c8bbb54eb998549a3a8d719a1c7de2efbe8fdd2ff0ee5fecd06f19" arm64: runsc: - url: "gs://gvisor/releases/release/20260608/aarch64/runsc" - sha256: "4bbd0e66f61ae770086e5ca7a47a8385f3e876aa8853e1bc6f7793e4dbd51a0f" + url: "gs://gvisor/releases/release/20260622/aarch64/runsc" + sha256: "62eee121f8c188e347c428acc96f111568ede3be37b906046b6f28bbe2cc40c0" From 7d6d1e65a7a1474a9e9110b369bef2d0e46725fd Mon Sep 17 00:00:00 2001 From: dberkov Date: Tue, 23 Jun 2026 19:23:49 -0700 Subject: [PATCH 03/17] e2e test --- internal/e2e/suites/demo/demo_test.go | 187 ++++++++++++++++++++++++-- 1 file changed, 173 insertions(+), 14 deletions(-) diff --git a/internal/e2e/suites/demo/demo_test.go b/internal/e2e/suites/demo/demo_test.go index 06508fc7..8fd581d3 100644 --- a/internal/e2e/suites/demo/demo_test.go +++ b/internal/e2e/suites/demo/demo_test.go @@ -42,7 +42,7 @@ func TestActorLifecycle(t *testing.T) { clients := e2e.GetClients() // Create actor template. - at, err := createActorTemplate(ctx, t, clients, nsObj) + at, err := createActorTemplate(ctx, t, clients, nsObj, v1alpha1.SnapshotScopeProcess, v1alpha1.SnapshotScopeProcess) if err != nil { t.Fatalf("failed to initialize ActorTemplate: %v", err) } @@ -73,7 +73,171 @@ func TestActorLifecycle(t *testing.T) { } }) } +} + +// Verify that file and memory counters behavior after pause and suspend, for different snapshot scopes. +// Test case: +// 1. Create actor. +// 2. Call to actor and validate memory and file counters. +// 3. Pause & Resume actor. +// 4. Call to actor and validate memory and file counters. +// 5. Suspend & Resume actor. +// 6. Call to actor and validate memory and file counters. +func TestHomedirLifecycle(t *testing.T) { + tests := []struct { + name string + onCommit v1alpha1.SnapshotScope + onPause v1alpha1.SnapshotScope + wantMemoryAfterPause int + wantFileAfterPause int + wantMemoryAfterSuspend int + wantFileAfterSuspend int + }{ + { + name: "onCommit:process, onPause:process", + onCommit: v1alpha1.SnapshotScopeProcess, + onPause: v1alpha1.SnapshotScopeProcess, + wantMemoryAfterPause: 2, + wantFileAfterPause: 2, + wantMemoryAfterSuspend: 3, + wantFileAfterSuspend: 3, + }, + { + name: "onCommit:homedir, onPause:process", + onCommit: v1alpha1.SnapshotScopeHomedir, + onPause: v1alpha1.SnapshotScopeProcess, + wantMemoryAfterPause: 2, + wantFileAfterPause: 2, + wantMemoryAfterSuspend: 1, + wantFileAfterSuspend: 3, + }, + { + name: "onCommit:homedir, onPause:homedir", + onCommit: v1alpha1.SnapshotScopeHomedir, + onPause: v1alpha1.SnapshotScopeHomedir, + wantMemoryAfterPause: 1, + wantFileAfterPause: 2, + wantMemoryAfterSuspend: 1, + wantFileAfterSuspend: 3, + }, + } + + for _, tc := range tests { + t.Run(tc.name, func(t *testing.T) { + t.Parallel() + // Create namespace + nsObj := e2e.CreateNamespace(t) + + ctx := context.Background() + clients := e2e.GetClients() + + // Create actor template. + at, err := createActorTemplate(ctx, t, clients, nsObj, tc.onCommit, tc.onPause) + if err != nil { + t.Fatalf("failed to initialize ActorTemplate: %v", err) + } + + // + // Create an Actor. + // + actorID := "homedir-lifecycle" + "-" + nsObj.Name + + t.Logf("Creating Actor %q using Substrate API...", actorID) + createResp, err := clients.SubstrateAPI.CreateActor(ctx, &ateapipb.CreateActorRequest{ + ActorId: actorID, + ActorTemplateNamespace: nsObj.Name, + ActorTemplateName: at.Name, + }) + if err != nil { + t.Fatalf("failed to create Actor: %v", err) + } + t.Logf("Successfully created Actor: %s", createResp.GetActor().GetActorId()) + defer func() { + clients.SubstrateAPI.DeleteActor(ctx, &ateapipb.DeleteActorRequest{ + ActorId: actorID, + }) + }() + + // Resuming the actor + t.Logf("Resuming Actor %q...", actorID) + if _, err := clients.SubstrateAPI.ResumeActor(ctx, &ateapipb.ResumeActorRequest{ + ActorId: actorID, + }); err != nil { + t.Fatalf("failed to resume Actor: %v", err) + } + waitForActorStatus(ctx, t, clients, actorID, ateapipb.Actor_STATUS_RUNNING) + resp, err := callActor(t, actorID) + if err != nil { + t.Fatalf("failed to call actor: %v", err) + } + validateCounterResponse(t, resp, "after creation", 1, 1) + + // + // Pausing the actor + // + t.Logf("Pausing Actor %q...", actorID) + if _, err := clients.SubstrateAPI.PauseActor(ctx, &ateapipb.PauseActorRequest{ + ActorId: actorID, + }); err != nil { + t.Fatalf("failed to pause Actor: %v", err) + } + waitForActorStatus(ctx, t, clients, actorID, ateapipb.Actor_STATUS_PAUSED) + + // Resuming the actor + t.Logf("Resuming Actor %q again...", actorID) + if _, err := clients.SubstrateAPI.ResumeActor(ctx, &ateapipb.ResumeActorRequest{ + ActorId: actorID, + }); err != nil { + t.Fatalf("failed to resume Actor again: %v", err) + } + waitForActorStatus(ctx, t, clients, actorID, ateapipb.Actor_STATUS_RUNNING) + + resp, err = callActor(t, actorID) + if err != nil { + t.Fatalf("failed to call actor again: %v", err) + } + validateCounterResponse(t, resp, "after paise", tc.wantMemoryAfterPause, tc.wantFileAfterPause) + + // + // Suspending the actor + // + t.Logf("Suspending Actor %q...", actorID) + if _, err := clients.SubstrateAPI.SuspendActor(ctx, &ateapipb.SuspendActorRequest{ + ActorId: actorID, + }); err != nil { + t.Fatalf("failed to suspend Actor: %v", err) + } + waitForActorStatus(ctx, t, clients, actorID, ateapipb.Actor_STATUS_SUSPENDED) + + // Resuming the actor + t.Logf("Resuming Actor %q again...", actorID) + if _, err := clients.SubstrateAPI.ResumeActor(ctx, &ateapipb.ResumeActorRequest{ + ActorId: actorID, + }); err != nil { + t.Fatalf("failed to resume Actor again: %v", err) + } + waitForActorStatus(ctx, t, clients, actorID, ateapipb.Actor_STATUS_RUNNING) + + resp, err = callActor(t, actorID) + if err != nil { + t.Fatalf("failed to call actor again: %v", err) + } + validateCounterResponse(t, resp, "after suspend", tc.wantMemoryAfterSuspend, tc.wantFileAfterSuspend) + }) + } +} + +func validateCounterResponse(t *testing.T, resp string, stage string, wantMemory, wantFile int) { + memoryCounterPrefix := "preserved memory count: " + fileCounterPrefix := "preserved file counter: " + + if !strings.Contains(resp, memoryCounterPrefix+fmt.Sprintf("%d", wantMemory)) { + t.Errorf("[%s] expected memory count %d, got response: %s", stage, wantMemory, resp) + } + if !strings.Contains(resp, fileCounterPrefix+fmt.Sprintf("%d", wantFile)) { + t.Errorf("[%s] expected file count %d, got response: %s", stage, wantFile, resp) + } } func createActor(ctx context.Context, t *testing.T, clients *e2e.Clients, nsObj *e2e.Namespace, at *v1alpha1.ActorTemplate) error { @@ -157,9 +321,7 @@ func pauseActor(ctx context.Context, t *testing.T, clients *e2e.Clients, nsObj * if err != nil { t.Fatalf("failed to call actor: %v", err) } - if !strings.Contains(resp, "preserved memory count: 1") { - t.Fatalf("expected count 1, got response: %s", resp) - } + validateCounterResponse(t, resp, "after creation", 1, 1) // Pausing the actor t.Logf("Pausing Actor %q...", actorID) @@ -183,9 +345,7 @@ func pauseActor(ctx context.Context, t *testing.T, clients *e2e.Clients, nsObj * if err != nil { t.Fatalf("failed to call actor again: %v", err) } - if !strings.Contains(resp, "preserved memory count: 2") { - t.Fatalf("expected count 2, got response: %s", resp) - } + validateCounterResponse(t, resp, "after pause", 2, 2) // Suspending the actor before deletion t.Logf("Suspending Actor %q before deletion...", actorID) @@ -240,9 +400,7 @@ func suspendActor(ctx context.Context, t *testing.T, clients *e2e.Clients, nsObj if err != nil { t.Fatalf("failed to call actor: %v", err) } - if !strings.Contains(resp, "preserved memory count: 1") { - t.Fatalf("expected count 1, got response: %s", resp) - } + validateCounterResponse(t, resp, "after creation", 1, 1) // Suspending the actor t.Logf("Suspending Actor %q...", actorID) @@ -266,9 +424,7 @@ func suspendActor(ctx context.Context, t *testing.T, clients *e2e.Clients, nsObj if err != nil { t.Fatalf("failed to call actor again: %v", err) } - if !strings.Contains(resp, "preserved memory count: 2") { - t.Fatalf("expected count 2, got response: %s", resp) - } + validateCounterResponse(t, resp, "after pause", 2, 2) // Suspending the actor before deletion t.Logf("Suspending Actor %q before deletion...", actorID) @@ -296,7 +452,7 @@ func suspendActor(ctx context.Context, t *testing.T, clients *e2e.Clients, nsObj return nil } -func createActorTemplate(ctx context.Context, t *testing.T, clients *e2e.Clients, nsObj *e2e.Namespace) (*v1alpha1.ActorTemplate, error) { +func createActorTemplate(ctx context.Context, t *testing.T, clients *e2e.Clients, nsObj *e2e.Namespace, onCommit, onPause v1alpha1.SnapshotScope) (*v1alpha1.ActorTemplate, error) { env, err := e2e.CheckEnv("BUCKET_NAME", "KO_DOCKER_REPO") if err != nil { t.Fatalf("CheckEnv failed: %v", err) @@ -364,7 +520,10 @@ func createActorTemplate(ctx context.Context, t *testing.T, clients *e2e.Clients Containers: existingAt.Spec.Containers, SnapshotsConfig: v1alpha1.SnapshotsConfig{ Location: "gs://" + env["BUCKET_NAME"] + "/ate-demo-counter", + OnPause: onPause, + OnCommit: onCommit, }, + Volumes: existingAt.Spec.Volumes, }, } _, err = clients.SubstrateK8s.ApiV1alpha1().ActorTemplates(nsObj.Name).Create(ctx, at, metav1.CreateOptions{}) From 8e5dbde7634d98ea1867264b5de9cd9060bafeb2 Mon Sep 17 00:00:00 2001 From: dberkov Date: Thu, 25 Jun 2026 07:32:30 -0700 Subject: [PATCH 04/17] adjust validations based on PR comments --- .../generated/ate.dev_actortemplates.yaml | 32 ++++--- pkg/api/v1alpha1/actortemplate_types.go | 10 ++- .../v1alpha1/actortemplate_validation_test.go | 87 ++++++++++++++++--- 3 files changed, 102 insertions(+), 27 deletions(-) diff --git a/manifests/ate-install/generated/ate.dev_actortemplates.yaml b/manifests/ate-install/generated/ate.dev_actortemplates.yaml index bcff9d58..1fb3483b 100644 --- a/manifests/ate-install/generated/ate.dev_actortemplates.yaml +++ b/manifests/ate-install/generated/ate.dev_actortemplates.yaml @@ -188,14 +188,27 @@ spec: properties: mountPath: description: |- - Path within the actor at which the volume should be mounted. Must - not contain ':'. + Path within the actor at which the volume should be mounted. Must be a + clean absolute Unix path: must start with '/', not be '/', and contain + no ':', '..', '.', '//', trailing '/', or control characters. maxLength: 4096 type: string + x-kubernetes-validations: + - message: 'MountPath must be a clean absolute + Unix path: must start with ''/'', not be ''/'', + and contain no '':'', ''..'', ''.'', ''//'', + trailing ''/'', or control characters' + rule: self.startsWith('/') && size(self) > 1 + && !self.endsWith('/') && !self.contains('//') + && !self.contains(':') && !self.matches('[\x00-\x1f\x7f]') + && !self.matches('(^|/)[.][.]?(/|$)') name: description: This must match the Name of a Volume. maxLength: 63 type: string + x-kubernetes-validations: + - message: Name must be a valid DNS label + rule: '!format.dns1123Label().validate(self).hasValue()' required: - mountPath - name @@ -295,6 +308,9 @@ spec: description: name of the volume. maxLength: 63 type: string + x-kubernetes-validations: + - message: Name must be a valid DNS label + rule: '!format.dns1123Label().validate(self).hasValue()' required: - name type: object @@ -365,15 +381,9 @@ spec: rule: '!has(self.containers) || self.containers.all(c, !has(c.volumeMounts) || c.volumeMounts.filter(vm, has(self.volumes) && self.volumes.exists(v, v.name == vm.name && has(v.homeDir))).size() <= 1)' - - message: 'MountPath for a HomeDir volume must be a clean absolute Unix - path: must start with ''/'', not be ''/'', and contain no '':'', ''..'', - ''.'', ''//'', trailing ''/'', or control characters' - rule: '!has(self.containers) || self.containers.all(c, !has(c.volumeMounts) - || c.volumeMounts.all(vm, !has(self.volumes) || !self.volumes.exists(v, - v.name == vm.name && has(v.homeDir)) || (vm.mountPath.startsWith(''/'') - && size(vm.mountPath) > 1 && !vm.mountPath.endsWith(''/'') && !vm.mountPath.contains(''//'') - && !vm.mountPath.contains('':'') && !vm.mountPath.matches(''[\x00-\x1f\x7f]'') - && !vm.mountPath.matches(''(^|/)[.][.]?(/|$)''))))' + - message: HomeDir volumes are not supported when sandboxClass is 'microvm' + rule: '!has(self.sandboxClass) || self.sandboxClass != ''microvm'' || + !has(self.volumes) || !self.volumes.exists(v, has(v.homeDir))' status: description: status is the observed state of ActorTemplate properties: diff --git a/pkg/api/v1alpha1/actortemplate_types.go b/pkg/api/v1alpha1/actortemplate_types.go index f0800129..34d746ec 100644 --- a/pkg/api/v1alpha1/actortemplate_types.go +++ b/pkg/api/v1alpha1/actortemplate_types.go @@ -50,6 +50,7 @@ type Volume struct { // // +required // +kubebuilder:validation:MaxLength=63 + // +kubebuilder:validation:XValidation:rule="!format.dns1123Label().validate(self).hasValue()",message="Name must be a valid DNS label" Name string `json:"name" protobuf:"bytes,1,opt,name=name"` // volumeSource represents the location and type of the mounted volume. @@ -62,12 +63,15 @@ type VolumeMount struct { // // +required // +kubebuilder:validation:MaxLength=63 + // +kubebuilder:validation:XValidation:rule="!format.dns1123Label().validate(self).hasValue()",message="Name must be a valid DNS label" Name string `json:"name" protobuf:"bytes,1,opt,name=name"` - // Path within the actor at which the volume should be mounted. Must - // not contain ':'. + // Path within the actor at which the volume should be mounted. Must be a + // clean absolute Unix path: must start with '/', not be '/', and contain + // no ':', '..', '.', '//', trailing '/', or control characters. // // +required // +kubebuilder:validation:MaxLength=4096 + // +kubebuilder:validation:XValidation:rule="self.startsWith('/') && size(self) > 1 && !self.endsWith('/') && !self.contains('//') && !self.contains(':') && !self.matches('[\\x00-\\x1f\\x7f]') && !self.matches('(^|/)[.][.]?(/|$)')",message="MountPath must be a clean absolute Unix path: must start with '/', not be '/', and contain no ':', '..', '.', '//', trailing '/', or control characters" MountPath string `json:"mountPath" protobuf:"bytes,3,opt,name=mountPath"` } @@ -251,7 +255,7 @@ type SnapshotsConfig struct { // ActorTemplateSpec defined desired spec of an actor. // // +kubebuilder:validation:XValidation:rule="!has(self.containers) || self.containers.all(c, !has(c.volumeMounts) || c.volumeMounts.filter(vm, has(self.volumes) && self.volumes.exists(v, v.name == vm.name && has(v.homeDir))).size() <= 1)",message="A container may mount at most one HomeDir-typed volume" -// +kubebuilder:validation:XValidation:rule="!has(self.containers) || self.containers.all(c, !has(c.volumeMounts) || c.volumeMounts.all(vm, !has(self.volumes) || !self.volumes.exists(v, v.name == vm.name && has(v.homeDir)) || (vm.mountPath.startsWith('/') && size(vm.mountPath) > 1 && !vm.mountPath.endsWith('/') && !vm.mountPath.contains('//') && !vm.mountPath.contains(':') && !vm.mountPath.matches('[\\x00-\\x1f\\x7f]') && !vm.mountPath.matches('(^|/)[.][.]?(/|$)'))))",message="MountPath for a HomeDir volume must be a clean absolute Unix path: must start with '/', not be '/', and contain no ':', '..', '.', '//', trailing '/', or control characters" +// +kubebuilder:validation:XValidation:rule="!has(self.sandboxClass) || self.sandboxClass != 'microvm' || !has(self.volumes) || !self.volumes.exists(v, has(v.homeDir))",message="HomeDir volumes are not supported when sandboxClass is 'microvm'" type ActorTemplateSpec struct { // PauseImage is the container to use as the root sandbox container. // diff --git a/pkg/api/v1alpha1/actortemplate_validation_test.go b/pkg/api/v1alpha1/actortemplate_validation_test.go index c793add2..87bddeb4 100644 --- a/pkg/api/v1alpha1/actortemplate_validation_test.go +++ b/pkg/api/v1alpha1/actortemplate_validation_test.go @@ -625,7 +625,7 @@ func TestActorTemplateValidation(t *testing.T) { } }, wantErr: true, - errMsg: "MountPath for a HomeDir volume must be a clean absolute Unix path", + errMsg: "MountPath must be a clean absolute Unix path", }, { name: "Volumes: HomeDir MountPath with relative path is invalid", mutate: func(at *ActorTemplate) { @@ -637,7 +637,7 @@ func TestActorTemplateValidation(t *testing.T) { } }, wantErr: true, - errMsg: "MountPath for a HomeDir volume must be a clean absolute Unix path", + errMsg: "MountPath must be a clean absolute Unix path", }, { name: "Volumes: HomeDir MountPath as empty string is invalid", mutate: func(at *ActorTemplate) { @@ -649,7 +649,7 @@ func TestActorTemplateValidation(t *testing.T) { } }, wantErr: true, - errMsg: "MountPath for a HomeDir volume must be a clean absolute Unix path", + errMsg: "MountPath must be a clean absolute Unix path", }, { name: "Volumes: HomeDir MountPath with leading whitespace is invalid", mutate: func(at *ActorTemplate) { @@ -661,7 +661,7 @@ func TestActorTemplateValidation(t *testing.T) { } }, wantErr: true, - errMsg: "MountPath for a HomeDir volume must be a clean absolute Unix path", + errMsg: "MountPath must be a clean absolute Unix path", }, { name: "Volumes: HomeDir MountPath with trailing slash is invalid", mutate: func(at *ActorTemplate) { @@ -673,7 +673,7 @@ func TestActorTemplateValidation(t *testing.T) { } }, wantErr: true, - errMsg: "MountPath for a HomeDir volume must be a clean absolute Unix path", + errMsg: "MountPath must be a clean absolute Unix path", }, { name: "Volumes: HomeDir MountPath with consecutive slashes is invalid", mutate: func(at *ActorTemplate) { @@ -685,7 +685,7 @@ func TestActorTemplateValidation(t *testing.T) { } }, wantErr: true, - errMsg: "MountPath for a HomeDir volume must be a clean absolute Unix path", + errMsg: "MountPath must be a clean absolute Unix path", }, { name: "Volumes: HomeDir MountPath containing ':' is invalid", mutate: func(at *ActorTemplate) { @@ -697,7 +697,7 @@ func TestActorTemplateValidation(t *testing.T) { } }, wantErr: true, - errMsg: "MountPath for a HomeDir volume must be a clean absolute Unix path", + errMsg: "MountPath must be a clean absolute Unix path", }, { name: "Volumes: HomeDir MountPath with '..' component is invalid", mutate: func(at *ActorTemplate) { @@ -709,7 +709,7 @@ func TestActorTemplateValidation(t *testing.T) { } }, wantErr: true, - errMsg: "MountPath for a HomeDir volume must be a clean absolute Unix path", + errMsg: "MountPath must be a clean absolute Unix path", }, { name: "Volumes: HomeDir MountPath with trailing '..' is invalid", mutate: func(at *ActorTemplate) { @@ -721,7 +721,7 @@ func TestActorTemplateValidation(t *testing.T) { } }, wantErr: true, - errMsg: "MountPath for a HomeDir volume must be a clean absolute Unix path", + errMsg: "MountPath must be a clean absolute Unix path", }, { name: "Volumes: HomeDir MountPath with '.' component is invalid", mutate: func(at *ActorTemplate) { @@ -733,7 +733,7 @@ func TestActorTemplateValidation(t *testing.T) { } }, wantErr: true, - errMsg: "MountPath for a HomeDir volume must be a clean absolute Unix path", + errMsg: "MountPath must be a clean absolute Unix path", }, { name: "Volumes: HomeDir MountPath containing dotfile is valid (only bare '.' / '..' components are rejected)", mutate: func(at *ActorTemplate) { @@ -756,7 +756,7 @@ func TestActorTemplateValidation(t *testing.T) { } }, wantErr: true, - errMsg: "MountPath for a HomeDir volume must be a clean absolute Unix path", + errMsg: "MountPath must be a clean absolute Unix path", }, { name: "Volumes: HomeDir MountPath with control character is invalid", mutate: func(at *ActorTemplate) { @@ -768,7 +768,7 @@ func TestActorTemplateValidation(t *testing.T) { } }, wantErr: true, - errMsg: "MountPath for a HomeDir volume must be a clean absolute Unix path", + errMsg: "MountPath must be a clean absolute Unix path", }, { name: "Volumes: HomeDir mount with invalid MountPath in second container is rejected", mutate: func(at *ActorTemplate) { @@ -788,7 +788,68 @@ func TestActorTemplateValidation(t *testing.T) { } }, wantErr: true, - errMsg: "MountPath for a HomeDir volume must be a clean absolute Unix path", + errMsg: "MountPath must be a clean absolute Unix path", + }, { + name: "Volumes: Volume Name with uppercase is invalid", + mutate: func(at *ActorTemplate) { + at.Spec.Volumes = []Volume{ + {Name: "Vol1", VolumeSource: VolumeSource{HomeDir: &HomedirVolumeSource{}}}, + } + }, + wantErr: true, + errMsg: "Name must be a valid DNS label", + }, { + name: "Volumes: Volume Name with underscore is invalid", + mutate: func(at *ActorTemplate) { + at.Spec.Volumes = []Volume{ + {Name: "vol_1", VolumeSource: VolumeSource{HomeDir: &HomedirVolumeSource{}}}, + } + }, + wantErr: true, + errMsg: "Name must be a valid DNS label", + }, { + name: "Volumes: VolumeMount Name with uppercase is invalid", + mutate: func(at *ActorTemplate) { + at.Spec.Volumes = []Volume{ + {Name: "vol1", VolumeSource: VolumeSource{HomeDir: &HomedirVolumeSource{}}}, + } + at.Spec.Containers[0].VolumeMounts = []VolumeMount{ + {Name: "Vol1", MountPath: "/home/user"}, + } + }, + wantErr: true, + errMsg: "Name must be a valid DNS label", + }, { + name: "Volumes: HomeDir volume with SandboxClass microvm is invalid", + mutate: func(at *ActorTemplate) { + at.Spec.SandboxClass = SandboxClassMicroVM + at.Spec.Volumes = []Volume{ + {Name: "vol1", VolumeSource: VolumeSource{HomeDir: &HomedirVolumeSource{}}}, + } + at.Spec.Containers[0].VolumeMounts = []VolumeMount{ + {Name: "vol1", MountPath: "/home/user"}, + } + }, + wantErr: true, + errMsg: "HomeDir volumes are not supported when sandboxClass is 'microvm'", + }, { + name: "Volumes: HomeDir volume with SandboxClass gvisor is valid", + mutate: func(at *ActorTemplate) { + at.Spec.SandboxClass = SandboxClassGvisor + at.Spec.Volumes = []Volume{ + {Name: "vol1", VolumeSource: VolumeSource{HomeDir: &HomedirVolumeSource{}}}, + } + at.Spec.Containers[0].VolumeMounts = []VolumeMount{ + {Name: "vol1", MountPath: "/home/user"}, + } + }, + wantErr: false, + }, { + name: "Volumes: SandboxClass microvm without HomeDir volumes is valid", + mutate: func(at *ActorTemplate) { + at.Spec.SandboxClass = SandboxClassMicroVM + }, + wantErr: false, }} for _, tt := range tests { From 214acab019355ca6eab12e4bf232728bd2c3ddcd Mon Sep 17 00:00:00 2001 From: dberkov Date: Thu, 25 Jun 2026 17:42:05 -0700 Subject: [PATCH 05/17] refactor: rename Homedir to DurableDir and Process/Homedir snapshot scopes to Full/Data --- cmd/ateapi/internal/controlapi/converter.go | 10 +- .../internal/controlapi/converter_test.go | 16 +- .../internal/controlapi/workload_spec.go | 10 +- .../internal/controlapi/workload_spec_test.go | 22 +-- cmd/atelet/main.go | 131 +++++++++-------- cmd/atelet/main_test.go | 4 +- cmd/atelet/oci.go | 12 +- cmd/atelet/oci_test.go | 20 +-- cmd/ateom-gvisor/main.go | 38 ++--- cmd/ateom-gvisor/runsc.go | 6 +- demos/counter/counter.yaml.tmpl | 10 +- internal/ateompath/ateompath.go | 20 +-- internal/e2e/suites/demo/demo_test.go | 24 +-- internal/proto/ateletpb/atelet.pb.go | 84 ++++++----- internal/proto/ateletpb/atelet.proto | 19 ++- internal/proto/ateompb/ateom.pb.go | 49 ++++--- internal/proto/ateompb/ateom.proto | 17 ++- .../generated/ate.dev_actortemplates.yaml | 41 +++--- pkg/api/v1alpha1/actortemplate_types.go | 39 ++--- .../v1alpha1/actortemplate_validation_test.go | 138 +++++++++--------- pkg/api/v1alpha1/zz_generated.deepcopy.go | 36 ++--- 21 files changed, 386 insertions(+), 360 deletions(-) diff --git a/cmd/ateapi/internal/controlapi/converter.go b/cmd/ateapi/internal/controlapi/converter.go index 66679e7a..46351097 100644 --- a/cmd/ateapi/internal/controlapi/converter.go +++ b/cmd/ateapi/internal/controlapi/converter.go @@ -22,11 +22,11 @@ import ( // convert atev1alpha1.SnapshotScope to ateletpb.SnapshotScope func toAteletSnapshotScope(in atev1alpha1.SnapshotScope) ateletpb.SnapshotScope { switch in { - case atev1alpha1.SnapshotScopeProcess: - return ateletpb.SnapshotScope_SNAPSHOT_SCOPE_PROCESS - case atev1alpha1.SnapshotScopeHomedir: - return ateletpb.SnapshotScope_SNAPSHOT_SCOPE_HOMEDIR + case atev1alpha1.SnapshotScopeFull: + return ateletpb.SnapshotScope_SNAPSHOT_SCOPE_FULL + case atev1alpha1.SnapshotScopeData: + return ateletpb.SnapshotScope_SNAPSHOT_SCOPE_DATA default: - return ateletpb.SnapshotScope_SNAPSHOT_SCOPE_PROCESS + return ateletpb.SnapshotScope_SNAPSHOT_SCOPE_FULL } } diff --git a/cmd/ateapi/internal/controlapi/converter_test.go b/cmd/ateapi/internal/controlapi/converter_test.go index 89390028..bdae0c84 100644 --- a/cmd/ateapi/internal/controlapi/converter_test.go +++ b/cmd/ateapi/internal/controlapi/converter_test.go @@ -28,24 +28,24 @@ func TestToAteletSnapshotScope(t *testing.T) { expected ateletpb.SnapshotScope }{ { - name: "Process scope", - in: atev1alpha1.SnapshotScopeProcess, - expected: ateletpb.SnapshotScope_SNAPSHOT_SCOPE_PROCESS, + name: "Full scope", + in: atev1alpha1.SnapshotScopeFull, + expected: ateletpb.SnapshotScope_SNAPSHOT_SCOPE_FULL, }, { - name: "HomeDir scope", - in: atev1alpha1.SnapshotScopeHomedir, - expected: ateletpb.SnapshotScope_SNAPSHOT_SCOPE_HOMEDIR, + name: "Data scope", + in: atev1alpha1.SnapshotScopeData, + expected: ateletpb.SnapshotScope_SNAPSHOT_SCOPE_DATA, }, { name: "Default scope (empty)", in: "", - expected: ateletpb.SnapshotScope_SNAPSHOT_SCOPE_PROCESS, + expected: ateletpb.SnapshotScope_SNAPSHOT_SCOPE_FULL, }, { name: "Default scope (unknown)", in: "unknown", - expected: ateletpb.SnapshotScope_SNAPSHOT_SCOPE_PROCESS, + expected: ateletpb.SnapshotScope_SNAPSHOT_SCOPE_FULL, }, } diff --git a/cmd/ateapi/internal/controlapi/workload_spec.go b/cmd/ateapi/internal/controlapi/workload_spec.go index 5d936262..c28a579b 100644 --- a/cmd/ateapi/internal/controlapi/workload_spec.go +++ b/cmd/ateapi/internal/controlapi/workload_spec.go @@ -39,13 +39,13 @@ func workloadSpecFromActorTemplate(ctx context.Context, kubeClient kubernetes.In // add volumes for _, vol := range actorTemplate.Spec.Volumes { - // volume is homedir type - if vol.VolumeSource.HomeDir != nil { + // volume is durable-dir type + if vol.VolumeSource.DurableDir != nil { ateletVol := &ateletpb.Volume{ Name: vol.Name, - Type: ateletpb.VolumeType_VOLUME_TYPE_HOMEDIR, - Source: &ateletpb.Volume_HomeDir{ - HomeDir: &ateletpb.HomedirVolume{}, + Type: ateletpb.VolumeType_VOLUME_TYPE_DURABLE_DIR, + Source: &ateletpb.Volume_DurableDir{ + DurableDir: &ateletpb.DurableDirVolume{}, }, } workloadSpec.Volumes = append(workloadSpec.Volumes, ateletVol) diff --git a/cmd/ateapi/internal/controlapi/workload_spec_test.go b/cmd/ateapi/internal/controlapi/workload_spec_test.go index 9713c937..a19dedc2 100644 --- a/cmd/ateapi/internal/controlapi/workload_spec_test.go +++ b/cmd/ateapi/internal/controlapi/workload_spec_test.go @@ -217,13 +217,13 @@ func TestWorkloadSpecFromActorTemplateOptionalSecretKeyRefSkipsMissingSecret(t * wantErrCode: codes.FailedPrecondition, }, { - name: "converts HomeDir volume and mounts", + name: "converts DurableDir volume and mounts", template: &atev1alpha1.ActorTemplate{ ObjectMeta: metav1.ObjectMeta{Name: "tmpl1", Namespace: "agent-ns"}, Spec: atev1alpha1.ActorTemplateSpec{ PauseImage: "pause", Volumes: []atev1alpha1.Volume{ - {Name: "home", VolumeSource: atev1alpha1.VolumeSource{HomeDir: &atev1alpha1.HomedirVolumeSource{}}}, + {Name: "home", VolumeSource: atev1alpha1.VolumeSource{DurableDir: &atev1alpha1.DurableDirVolumeSource{}}}, }, Containers: []atev1alpha1.Container{ { @@ -242,8 +242,8 @@ func TestWorkloadSpecFromActorTemplateOptionalSecretKeyRefSkipsMissingSecret(t * Volumes: []*ateletpb.Volume{ { Name: "home", - Type: ateletpb.VolumeType_VOLUME_TYPE_HOMEDIR, - Source: &ateletpb.Volume_HomeDir{HomeDir: &ateletpb.HomedirVolume{}}, + Type: ateletpb.VolumeType_VOLUME_TYPE_DURABLE_DIR, + Source: &ateletpb.Volume_DurableDir{DurableDir: &ateletpb.DurableDirVolume{}}, }, }, Containers: []*ateletpb.Container{ @@ -259,13 +259,13 @@ func TestWorkloadSpecFromActorTemplateOptionalSecretKeyRefSkipsMissingSecret(t * }, }, { - name: "skips non-HomeDir volumes", + name: "skips non-DurableDir volumes", template: &atev1alpha1.ActorTemplate{ ObjectMeta: metav1.ObjectMeta{Name: "tmpl1", Namespace: "agent-ns"}, Spec: atev1alpha1.ActorTemplateSpec{ Volumes: []atev1alpha1.Volume{ {Name: "unsupported", VolumeSource: atev1alpha1.VolumeSource{}}, - {Name: "home", VolumeSource: atev1alpha1.VolumeSource{HomeDir: &atev1alpha1.HomedirVolumeSource{}}}, + {Name: "home", VolumeSource: atev1alpha1.VolumeSource{DurableDir: &atev1alpha1.DurableDirVolumeSource{}}}, }, Containers: []atev1alpha1.Container{ { @@ -282,8 +282,8 @@ func TestWorkloadSpecFromActorTemplateOptionalSecretKeyRefSkipsMissingSecret(t * Volumes: []*ateletpb.Volume{ { Name: "home", - Type: ateletpb.VolumeType_VOLUME_TYPE_HOMEDIR, - Source: &ateletpb.Volume_HomeDir{HomeDir: &ateletpb.HomedirVolume{}}, + Type: ateletpb.VolumeType_VOLUME_TYPE_DURABLE_DIR, + Source: &ateletpb.Volume_DurableDir{DurableDir: &ateletpb.DurableDirVolume{}}, }, }, Containers: []*ateletpb.Container{ @@ -303,7 +303,7 @@ func TestWorkloadSpecFromActorTemplateOptionalSecretKeyRefSkipsMissingSecret(t * ObjectMeta: metav1.ObjectMeta{Name: "tmpl1", Namespace: "agent-ns"}, Spec: atev1alpha1.ActorTemplateSpec{ Volumes: []atev1alpha1.Volume{ - {Name: "home", VolumeSource: atev1alpha1.VolumeSource{HomeDir: &atev1alpha1.HomedirVolumeSource{}}}, + {Name: "home", VolumeSource: atev1alpha1.VolumeSource{DurableDir: &atev1alpha1.DurableDirVolumeSource{}}}, }, Containers: []atev1alpha1.Container{ {Name: "main", Image: "main"}, @@ -314,8 +314,8 @@ func TestWorkloadSpecFromActorTemplateOptionalSecretKeyRefSkipsMissingSecret(t * Volumes: []*ateletpb.Volume{ { Name: "home", - Type: ateletpb.VolumeType_VOLUME_TYPE_HOMEDIR, - Source: &ateletpb.Volume_HomeDir{HomeDir: &ateletpb.HomedirVolume{}}, + Type: ateletpb.VolumeType_VOLUME_TYPE_DURABLE_DIR, + Source: &ateletpb.Volume_DurableDir{DurableDir: &ateletpb.DurableDirVolume{}}, }, }, Containers: []*ateletpb.Container{{Name: "main", Image: "main"}}, diff --git a/cmd/atelet/main.go b/cmd/atelet/main.go index 14154b2f..8840b753 100644 --- a/cmd/atelet/main.go +++ b/cmd/atelet/main.go @@ -350,11 +350,11 @@ func (s *AteomHerder) Checkpoint(ctx context.Context, req *ateletpb.CheckpointRe switch req.GetType() { case ateletpb.CheckpointType_CHECKPOINT_TYPE_EXTERNAL: - if err := s.uploadExternalCheckpoint(ctx, req, checkpointDir, sandboxRec, req.GetScope() == ateletpb.SnapshotScope_SNAPSHOT_SCOPE_PROCESS, hasHomedirVolumeMount(req.GetSpec())); err != nil { + if err := s.uploadExternalCheckpoint(ctx, req, checkpointDir, sandboxRec, req.GetScope() == ateletpb.SnapshotScope_SNAPSHOT_SCOPE_FULL, hasDurableDirVolumeMount(req.GetSpec())); err != nil { return nil, err } case ateletpb.CheckpointType_CHECKPOINT_TYPE_LOCAL: - if err := s.moveLocalCheckpoint(ctx, req, checkpointDir, sandboxRec, req.GetScope() == ateletpb.SnapshotScope_SNAPSHOT_SCOPE_PROCESS, hasHomedirVolumeMount(req.GetSpec())); err != nil { + if err := s.moveLocalCheckpoint(ctx, req, checkpointDir, sandboxRec, req.GetScope() == ateletpb.SnapshotScope_SNAPSHOT_SCOPE_FULL, hasDurableDirVolumeMount(req.GetSpec())); err != nil { return nil, err } default: @@ -368,11 +368,11 @@ func (s *AteomHerder) Checkpoint(ctx context.Context, req *ateletpb.CheckpointRe return &ateletpb.CheckpointResponse{}, nil } -// returns true if at least one of the containers in the workload spec has a homedir volume mount -func hasHomedirVolumeMount(spec *ateletpb.WorkloadSpec) bool { +// returns true if at least one of the containers in the workload spec has a durable-dir volume mount +func hasDurableDirVolumeMount(spec *ateletpb.WorkloadSpec) bool { hdv := make(map[string]bool) for _, v := range spec.GetVolumes() { - if v.GetType() == ateletpb.VolumeType_VOLUME_TYPE_HOMEDIR { + if v.GetType() == ateletpb.VolumeType_VOLUME_TYPE_DURABLE_DIR { hdv[v.GetName()] = true } } @@ -389,14 +389,14 @@ func hasHomedirVolumeMount(spec *ateletpb.WorkloadSpec) bool { func toAteomSnapshotScope(scope ateletpb.SnapshotScope) ateompb.SnapshotScope { // assumption the request already been valdated and scope is in the valid values set switch scope { - case ateletpb.SnapshotScope_SNAPSHOT_SCOPE_HOMEDIR: - return ateompb.SnapshotScope_SNAPSHOT_SCOPE_HOMEDIR + case ateletpb.SnapshotScope_SNAPSHOT_SCOPE_DATA: + return ateompb.SnapshotScope_SNAPSHOT_SCOPE_DATA default: - return ateompb.SnapshotScope_SNAPSHOT_SCOPE_PROCESS + return ateompb.SnapshotScope_SNAPSHOT_SCOPE_FULL } } -func (s *AteomHerder) moveLocalCheckpoint(ctx context.Context, req *ateletpb.CheckpointRequest, checkpointDir string, rec *sandboxAssetsRecord, moveProcessFiles bool, moveHomedirFiles bool) error { +func (s *AteomHerder) moveLocalCheckpoint(ctx context.Context, req *ateletpb.CheckpointRequest, checkpointDir string, rec *sandboxAssetsRecord, moveProcessFiles bool, moveDurableDirFiles bool) error { localCheckpointPath := filepath.Join(ateompath.LocalCheckpointsDir(req.GetActorTemplateNamespace(), req.GetActorTemplateName(), req.GetActorId()), req.GetLocalConfig().GetSnapshotPrefix()) if err := os.MkdirAll(localCheckpointPath, 0o700); err != nil { return fmt.Errorf("while creating local checkpoint directory: %w", err) @@ -417,16 +417,16 @@ func (s *AteomHerder) moveLocalCheckpoint(ctx context.Context, req *ateletpb.Che } } - if moveHomedirFiles { - // move homedir files - homedirCheckpointDir := filepath.Join(checkpointDir, ateompath.HomedirSnapshotsSubfoldderName) - homeDirLocaclCheckpointPath := filepath.Join(localCheckpointPath, ateompath.HomedirSnapshotsSubfoldderName) - if err := os.MkdirAll(homeDirLocaclCheckpointPath, 0o700); err != nil { + if moveDurableDirFiles { + // move durable-dir files + durableDirCheckpointDir := filepath.Join(checkpointDir, ateompath.DurableDirSnapshotsSubfoldderName) + durableDirLocalCheckpointPath := filepath.Join(localCheckpointPath, ateompath.DurableDirSnapshotsSubfoldderName) + if err := os.MkdirAll(durableDirLocalCheckpointPath, 0o700); err != nil { return fmt.Errorf("while creating local checkpoint directory: %w", err) } for _, fileName := range []string{"fscheckpoint.json", "multitar.img", "pages.img", "pages_meta.img"} { - src := filepath.Join(homedirCheckpointDir, fileName) - dst := filepath.Join(homeDirLocaclCheckpointPath, fileName) + src := filepath.Join(durableDirCheckpointDir, fileName) + dst := filepath.Join(durableDirLocalCheckpointPath, fileName) if err := os.Rename(src, dst); err != nil { return fmt.Errorf("failed to move %s to %s: %w", src, dst, err) } @@ -446,7 +446,7 @@ func (s *AteomHerder) moveLocalCheckpoint(ctx context.Context, req *ateletpb.Che return nil } -func (s *AteomHerder) uploadExternalCheckpoint(ctx context.Context, req *ateletpb.CheckpointRequest, checkpointDir string, rec *sandboxAssetsRecord, uploadProcessFiles bool, uploadHomedirFiles bool) error { +func (s *AteomHerder) uploadExternalCheckpoint(ctx context.Context, req *ateletpb.CheckpointRequest, checkpointDir string, rec *sandboxAssetsRecord, uploadProcessFiles bool, uploadDurableDirFiles bool) error { ns, tmpl := req.GetActorTemplateNamespace(), req.GetActorTemplateName() prefix := strings.TrimSuffix(req.GetExternalConfig().GetSnapshotUriPrefix(), "/") @@ -471,15 +471,15 @@ func (s *AteomHerder) uploadExternalCheckpoint(ctx context.Context, req *ateletp } } - if uploadHomedirFiles { + if uploadDurableDirFiles { for _, fileName := range []string{"fscheckpoint.json", "multitar.img", "pages.img", "pages_meta.img"} { - impPath := filepath.Join(checkpointDir, ateompath.HomedirSnapshotsSubfoldderName, fileName) + impPath := filepath.Join(checkpointDir, ateompath.DurableDirSnapshotsSubfoldderName, fileName) before, _, _ := strings.Cut(fileName, ".") recordSnapshotSize(ctx, before, impPath, ns, tmpl) if err := uploadIfExists(ctx, s.gcsClient, - fmt.Sprintf("%s/homedir/%s.zstd", prefix, fileName), + fmt.Sprintf("%s/%s/%s.zstd", prefix, ateompath.DurableDirSnapshotsSubfoldderName, fileName), impPath, ); err != nil { return err @@ -634,7 +634,7 @@ func (s *AteomHerder) Restore(ctx context.Context, req *ateletpb.RestoreRequest) func (s *AteomHerder) copyLocalCheckpoint(ctx context.Context, snapshotPrefix string, srcDir, dstDir string, files []string, scope ateletpb.SnapshotScope) error { switch scope { - case ateletpb.SnapshotScope_SNAPSHOT_SCOPE_PROCESS: + case ateletpb.SnapshotScope_SNAPSHOT_SCOPE_FULL: for _, fileName := range files { if ctx.Err() != nil { return fmt.Errorf("context cancelled: %w", ctx.Err()) @@ -645,18 +645,18 @@ func (s *AteomHerder) copyLocalCheckpoint(ctx context.Context, snapshotPrefix st return fmt.Errorf("failed to copy %s to %s: %w", src, dst, err) } } - case ateletpb.SnapshotScope_SNAPSHOT_SCOPE_HOMEDIR: - hdDstDir := filepath.Join(dstDir, ateompath.HomedirSnapshotsSubfoldderName) - if err := os.MkdirAll(hdDstDir, 0o700); err != nil { - return fmt.Errorf("while creating homedir directory: %w", err) + case ateletpb.SnapshotScope_SNAPSHOT_SCOPE_DATA: + ddDstDir := filepath.Join(dstDir, ateompath.DurableDirSnapshotsSubfoldderName) + if err := os.MkdirAll(ddDstDir, 0o700); err != nil { + return fmt.Errorf("while creating durable-dir directory: %w", err) } for _, fileName := range []string{"fscheckpoint.json", "multitar.img", "pages.img", "pages_meta.img"} { if ctx.Err() != nil { return fmt.Errorf("context cancelled: %w", ctx.Err()) } - src := filepath.Join(srcDir, snapshotPrefix, ateompath.HomedirSnapshotsSubfoldderName, fileName) - dst := filepath.Join(hdDstDir, fileName) + src := filepath.Join(srcDir, snapshotPrefix, ateompath.DurableDirSnapshotsSubfoldderName, fileName) + dst := filepath.Join(ddDstDir, fileName) if _, err := copyFile(src, dst); err != nil { return fmt.Errorf("failed to copy %s to %s: %w", src, dst, err) } @@ -695,7 +695,7 @@ func (s *AteomHerder) downloadExternalCheckpoint(ctx context.Context, snapshotUr g, gCtx := errgroup.WithContext(ctx) switch scope { - case ateletpb.SnapshotScope_SNAPSHOT_SCOPE_PROCESS: + case ateletpb.SnapshotScope_SNAPSHOT_SCOPE_FULL: g, gCtx := errgroup.WithContext(ctx) for _, fileName := range files { fileName := fileName @@ -710,11 +710,11 @@ func (s *AteomHerder) downloadExternalCheckpoint(ctx context.Context, snapshotUr if err := g.Wait(); err != nil { return err } - case ateletpb.SnapshotScope_SNAPSHOT_SCOPE_HOMEDIR: + case ateletpb.SnapshotScope_SNAPSHOT_SCOPE_DATA: for _, fileName := range []string{"fscheckpoint.json", "multitar.img", "pages.img", "pages_meta.img"} { - remote := fmt.Sprintf("%s/%s/%s.zstd", prefix, ateompath.HomedirSnapshotsSubfoldderName, fileName) + remote := fmt.Sprintf("%s/%s/%s.zstd", prefix, ateompath.DurableDirSnapshotsSubfoldderName, fileName) g.Go(func() error { - local := filepath.Join(dstDir, ateompath.HomedirSnapshotsSubfoldderName, fileName) + local := filepath.Join(dstDir, ateompath.DurableDirSnapshotsSubfoldderName, fileName) if err := ategcs.FetchLocalFileFromGCSWithZstd(gCtx, s.gcsClient, remote, local); err != nil { return fmt.Errorf("while downloading %s from GCS: %w", remote, err) } @@ -751,12 +751,12 @@ func (s *AteomHerder) prepareOCIBundles( return fmt.Errorf("while writing actor identity file: %w", err) } - hdVolumes := make(map[string]bool) - // make directories for all homedir volumes + ddVolumes := make(map[string]bool) + // make directories for all durable-dir volumes for _, vol := range spec.GetVolumes() { - if vol.GetType() == ateletpb.VolumeType_VOLUME_TYPE_HOMEDIR { - hdVolumes[vol.GetName()] = true - volPath := ateompath.HomedirVolumeMountPoint(actorTemplateNamespace, actorTemplateName, actorID, vol.GetName()) + if vol.GetType() == ateletpb.VolumeType_VOLUME_TYPE_DURABLE_DIR { + ddVolumes[vol.GetName()] = true + volPath := ateompath.DurableDirVolumeMountPoint(actorTemplateNamespace, actorTemplateName, actorID, vol.GetName()) if err := os.MkdirAll(volPath, 0o700); err != nil { return fmt.Errorf("while creating %q: %w", volPath, err) } @@ -771,13 +771,13 @@ func (s *AteomHerder) prepareOCIBundles( "io.kubernetes.cri.container-type": "sandbox", "io.kubernetes.cri.container-name": "pause", } - // add anotation for every homedir volume - // TODO(dberkov) needs to revist this logic once gVisor will support multiple homedir volumes. + // add annotation for every durable-dir volume + // TODO(dberkov) needs to revisit this logic once gVisor supports multiple durable-dir volumes. for _, vol := range spec.GetVolumes() { - if vol.GetType() == ateletpb.VolumeType_VOLUME_TYPE_HOMEDIR { - annotations["dev.gvisor.spec.mount.homedir.type"] = "bind" - annotations["dev.gvisor.spec.mount.homedir.share"] = "container" - annotations["dev.gvisor.spec.mount.homedir.source"] = ateompath.HomedirVolumeMountPoint(actorTemplateNamespace, actorTemplateName, actorID, vol.GetName()) + if vol.GetType() == ateletpb.VolumeType_VOLUME_TYPE_DURABLE_DIR { + annotations["dev.gvisor.spec.mount.durabledir.type"] = "bind" + annotations["dev.gvisor.spec.mount.durabledir.share"] = "container" + annotations["dev.gvisor.spec.mount.durabledir.source"] = ateompath.DurableDirVolumeMountPoint(actorTemplateNamespace, actorTemplateName, actorID, vol.GetName()) } } @@ -806,10 +806,10 @@ func (s *AteomHerder) prepareOCIBundles( for _, env := range ctr.GetEnv() { envs = append(envs, fmt.Sprintf("%s=%s", env.GetName(), env.GetValue())) } - var hdMounts []*ateletpb.VolumeMount + var ddMounts []*ateletpb.VolumeMount for _, vm := range ctr.GetVolumeMounts() { - if hdVolumes[vm.GetName()] { - hdMounts = append(hdMounts, vm) + if ddVolumes[vm.GetName()] { + ddMounts = append(ddMounts, vm) } } g.Go(func() error { @@ -828,7 +828,7 @@ func (s *AteomHerder) prepareOCIBundles( }, netnsPath, identityDir, - hdMounts, + ddMounts, ); err != nil { return fmt.Errorf("while creating %q OCI bundle: %w", ctr.GetName(), err) } @@ -852,18 +852,25 @@ func (s *AteomHerder) dialAteom(ctx context.Context, targetAteomUid string) (ate // buildAteomWorkloadSpec projects the atelet-facing workload spec onto // the ateom-facing one. func buildAteomWorkloadSpec(spec *ateletpb.WorkloadSpec) *ateompb.WorkloadSpec { + ddVolumes := make(map[string]bool) + for _, vol := range spec.GetVolumes() { + if vol.GetType() == ateletpb.VolumeType_VOLUME_TYPE_DURABLE_DIR { + ddVolumes[vol.GetName()] = true + } + } + out := &ateompb.WorkloadSpec{} for _, ctr := range spec.GetContainers() { - var hdMountPaths []string + var ddMountPaths []string for _, vm := range ctr.GetVolumeMounts() { - if hdVolumes[vm.GetName()] { - hdMountPaths = append(hdMountPaths, vm.GetMountPath()) + if ddVolumes[vm.GetName()] { + ddMountPaths = append(ddMountPaths, vm.GetMountPath()) } } out.Containers = append(out.Containers, &ateompb.Container{ - Name: ctr.GetName(), - HomeDirVolumes: hdMountPaths, - Readyz: toAteomReadyz(ctr.GetReadyz()), + Name: ctr.GetName(), + DurableDirVolumes: ddMountPaths, + Readyz: toAteomReadyz(ctr.GetReadyz()), }) } return out @@ -987,8 +994,8 @@ func validateRestoreRequest(req *ateletpb.RestoreRequest) error { func validateSnapshotScope(scope ateletpb.SnapshotScope) error { switch scope { - case ateletpb.SnapshotScope_SNAPSHOT_SCOPE_PROCESS, - ateletpb.SnapshotScope_SNAPSHOT_SCOPE_HOMEDIR: + case ateletpb.SnapshotScope_SNAPSHOT_SCOPE_FULL, + ateletpb.SnapshotScope_SNAPSHOT_SCOPE_DATA: return nil case ateletpb.SnapshotScope_SNAPSHOT_SCOPE_UNSPECIFIED: return fmt.Errorf("snapshot scope must be non-zero") @@ -1095,9 +1102,9 @@ func resetActorDirs(actorTemplateNamespace, actorTemplateName, actorID string) e return fmt.Errorf("while creating restore-state dir: %w", err) } - restoreStateHomeDir := filepath.Join(restoreStateDir, ateompath.HomedirSnapshotsSubfoldderName) - if err := os.MkdirAll(restoreStateHomeDir, 0o700); err != nil { - return fmt.Errorf("while creating restore-state homedir dir: %w", err) + restoreStateDurableDir := filepath.Join(restoreStateDir, ateompath.DurableDirSnapshotsSubfoldderName) + if err := os.MkdirAll(restoreStateDurableDir, 0o700); err != nil { + return fmt.Errorf("while creating restore-state durable-dir dir: %w", err) } // World-readable (0o755): bind-mounted into the actor, whose workload @@ -1110,12 +1117,12 @@ func resetActorDirs(actorTemplateNamespace, actorTemplateName, actorID string) e return fmt.Errorf("while creating actor identity dir: %w", err) } - homedirVolumesMountDir := ateompath.HommedirVolumeMountsDir(actorTemplateNamespace, actorTemplateName, actorID) - if err := os.RemoveAll(homedirVolumesMountDir); err != nil { - return fmt.Errorf("while deleting homedir volumes mount dir: %w", err) + durableDirVolumesMountDir := ateompath.DurableDirVolumeMountsDir(actorTemplateNamespace, actorTemplateName, actorID) + if err := os.RemoveAll(durableDirVolumesMountDir); err != nil { + return fmt.Errorf("while deleting durable-dir volumes mount dir: %w", err) } - if err := os.MkdirAll(homedirVolumesMountDir, 0o755); err != nil { - return fmt.Errorf("while creating homedir volumes mount dir: %w", err) + if err := os.MkdirAll(durableDirVolumesMountDir, 0o755); err != nil { + return fmt.Errorf("while creating durable-dir volumes mount dir: %w", err) } return nil diff --git a/cmd/atelet/main_test.go b/cmd/atelet/main_test.go index 18fbedf1..fbdce282 100644 --- a/cmd/atelet/main_test.go +++ b/cmd/atelet/main_test.go @@ -135,7 +135,7 @@ func validCheckpointRequest() *ateletpb.CheckpointRequest { SnapshotUriPrefix: "gs://bucket/actors/1/snapshots/2/", }, }, - Scope: ateletpb.SnapshotScope_SNAPSHOT_SCOPE_PROCESS, + Scope: ateletpb.SnapshotScope_SNAPSHOT_SCOPE_FULL, } } @@ -152,7 +152,7 @@ func validRestoreRequest() *ateletpb.RestoreRequest { SnapshotUriPrefix: "gs://bucket/actors/1/snapshots/2/", }, }, - Scope: ateletpb.SnapshotScope_SNAPSHOT_SCOPE_PROCESS, + Scope: ateletpb.SnapshotScope_SNAPSHOT_SCOPE_FULL, } } diff --git a/cmd/atelet/oci.go b/cmd/atelet/oci.go index fb0fe8f1..a00df99e 100644 --- a/cmd/atelet/oci.go +++ b/cmd/atelet/oci.go @@ -53,7 +53,7 @@ const ( ActorIDFileName = "actor-id" ) -func prepareOCIDirectory(ctx context.Context, pullCache *memorypullcache.MemoryPullCache, actorTemplateNamespace, actorTemplateName, actorID, containerName, ref string, args []string, env []string, annotations map[string]string, netns string, identityDir string, homedirVolumeMounts []*ateletpb.VolumeMount) error { +func prepareOCIDirectory(ctx context.Context, pullCache *memorypullcache.MemoryPullCache, actorTemplateNamespace, actorTemplateName, actorID, containerName, ref string, args []string, env []string, annotations map[string]string, netns string, identityDir string, durableDirVolumeMounts []*ateletpb.VolumeMount) error { tracer := otel.Tracer("prepareOCIDirectory") ctx, span := tracer.Start(ctx, "prepareOCIDirectory") @@ -90,7 +90,7 @@ func prepareOCIDirectory(ctx context.Context, pullCache *memorypullcache.MemoryP } } - ociSpec := buildActorOCISpec(actorTemplateNamespace, actorTemplateName, actorID, args, env, annotations, netns, identityDir, homedirVolumeMounts) + ociSpec := buildActorOCISpec(actorTemplateNamespace, actorTemplateName, actorID, args, env, annotations, netns, identityDir, durableDirVolumeMounts) ociSpecBytes, err := json.MarshalIndent(ociSpec, "", " ") if err != nil { return fmt.Errorf("while marshaling OCI spec: %w", err) @@ -107,7 +107,7 @@ func prepareOCIDirectory(ctx context.Context, pullCache *memorypullcache.MemoryP // When identityDir is non-empty it adds a read-only bind mount of that host // directory at IdentityMountPath so the actor can read its own ID (see // IdentityMountPath for why this is a bind mount rather than env vars). -func buildActorOCISpec(actorTemplateNamespace string, actorTemplateName string, actorID string, args []string, env []string, annotations map[string]string, netns string, identityDir string, homedirVolumeMounts []*ateletpb.VolumeMount) *specs.Spec { +func buildActorOCISpec(actorTemplateNamespace string, actorTemplateName string, actorID string, args []string, env []string, annotations map[string]string, netns string, identityDir string, durableDirVolumeMounts []*ateletpb.VolumeMount) *specs.Spec { envVars := []string{ "PATH=/usr/local/sbin:/usr/local/bin:/usr/sbin:/usr/bin:/sbin:/bin", } @@ -220,12 +220,12 @@ func buildActorOCISpec(actorTemplateNamespace string, actorTemplateName string, Annotations: annotations, } - // Prepare and mount homedirs. - for _, vm := range homedirVolumeMounts { + // Prepare and mount durable-dir volumes. + for _, vm := range durableDirVolumeMounts { spec.Mounts = append(spec.Mounts, specs.Mount{ Destination: vm.GetMountPath(), Type: "bind", - Source: ateompath.HomedirVolumeMountPoint(actorTemplateNamespace, actorTemplateName, actorID, vm.GetName()), + Source: ateompath.DurableDirVolumeMountPoint(actorTemplateNamespace, actorTemplateName, actorID, vm.GetName()), }) } diff --git a/cmd/atelet/oci_test.go b/cmd/atelet/oci_test.go index 442336f5..a12c0e63 100644 --- a/cmd/atelet/oci_test.go +++ b/cmd/atelet/oci_test.go @@ -126,11 +126,11 @@ func TestBuildActorOCISpec_NoIdentityMountForPause(t *testing.T) { } } -// Each homedir volume mount becomes a bind mount whose source is the -// per-actor on-host HomeDirMountPoint for that volume name. -func TestBuildActorOCISpec_HomedirVolumeMounts(t *testing.T) { +// Each durable-dir volume mount becomes a bind mount whose source is the +// per-actor on-host DurableDirVolumeMountPoint for that volume name. +func TestBuildActorOCISpec_DurableDirVolumeMounts(t *testing.T) { const ns, tmpl, id = "ns", "tmpl", "id" - homedirs := []*ateletpb.VolumeMount{ + durableDirs := []*ateletpb.VolumeMount{ {Name: "data", MountPath: "/var/data"}, {Name: "cache", MountPath: "/var/cache"}, } @@ -139,11 +139,11 @@ func TestBuildActorOCISpec_HomedirVolumeMounts(t *testing.T) { []string{"/app"}, nil, nil, "/run/netns/x", "", - homedirs, + durableDirs, ) - for _, vm := range homedirs { - wantSrc := ateompath.HomedirVolumeMountPoint(ns, tmpl, id, vm.Name) + for _, vm := range durableDirs { + wantSrc := ateompath.DurableDirVolumeMountPoint(ns, tmpl, id, vm.Name) found := false for _, m := range spec.Mounts { if m.Destination != vm.MountPath { @@ -151,14 +151,14 @@ func TestBuildActorOCISpec_HomedirVolumeMounts(t *testing.T) { } found = true if m.Source != wantSrc { - t.Errorf("homedir %q source = %q, want %q", vm.Name, m.Source, wantSrc) + t.Errorf("durable-dir %q source = %q, want %q", vm.Name, m.Source, wantSrc) } if m.Type != "bind" { - t.Errorf("homedir %q type = %q, want bind", vm.Name, m.Type) + t.Errorf("durable-dir %q type = %q, want bind", vm.Name, m.Type) } } if !found { - t.Fatalf("homedir mount for %q missing; mounts=%v", vm.MountPath, spec.Mounts) + t.Fatalf("durable-dir mount for %q missing; mounts=%v", vm.MountPath, spec.Mounts) } } } diff --git a/cmd/ateom-gvisor/main.go b/cmd/ateom-gvisor/main.go index 23d6fae3..81b43d3c 100644 --- a/cmd/ateom-gvisor/main.go +++ b/cmd/ateom-gvisor/main.go @@ -262,31 +262,31 @@ func (s *AteomService) CheckpointWorkload(ctx context.Context, req *ateompb.Chec return nil, fmt.Errorf("while creating checkpoint directory: %w", err) } - // Always take homedir snapshot if at least one container has homedir volume mount - // TODO(dberkov): this is a temporary workaround until gVisor will support taking homedir snapshots in a single request with process snapshot. - var hdv []string + // Always take durable-dir snapshot if at least one container has a durable-dir volume mount. + // TODO(dberkov): this is a temporary workaround until gVisor supports taking durable-dir snapshots in a single request with the process snapshot. + var ddv []string for _, ctr := range req.GetSpec().GetContainers() { - hdv = append(hdv, ctr.GetHomeDirVolumes()...) + ddv = append(ddv, ctr.GetDurableDirVolumes()...) } - if len(hdv) > 0 { + if len(ddv) > 0 { // TODO(dberkov) add control for "resume=true" flag - // Checkpoint each homedir volume + // Checkpoint each durable-dir volume - // prepare homedir checkpoint folder - fsCheckpointPath := filepath.Join(checkpointPath, ateompath.HomedirSnapshotsSubfoldderName) + // prepare durable-dir checkpoint folder + fsCheckpointPath := filepath.Join(checkpointPath, ateompath.DurableDirSnapshotsSubfoldderName) if err := os.MkdirAll(fsCheckpointPath, 0o700); err != nil { return nil, fmt.Errorf("while creating fscheckpoint directory: %w", err) } - // keep gVisor running if process snapshot is requested. - leaveRunning := req.GetScope() == ateompb.SnapshotScope_SNAPSHOT_SCOPE_PROCESS - if err := rcmd.cmdFsCheckpoint(ctx, "pause", fsCheckpointPath, hdv, leaveRunning); err != nil { - return nil, fmt.Errorf("while fscheckpointing homedir %q: %w", hdv[0], err) + // keep gVisor running if full snapshot is requested. + leaveRunning := req.GetScope() == ateompb.SnapshotScope_SNAPSHOT_SCOPE_FULL + if err := rcmd.cmdFsCheckpoint(ctx, "pause", fsCheckpointPath, ddv, leaveRunning); err != nil { + return nil, fmt.Errorf("while fscheckpointing durable-dir %q: %w", ddv[0], err) } } - // take process snapshot if requested - if req.GetScope() == ateompb.SnapshotScope_SNAPSHOT_SCOPE_PROCESS { + // take full snapshot (memory + rootfs delta) if requested + if req.GetScope() == ateompb.SnapshotScope_SNAPSHOT_SCOPE_FULL { // Checkpoint pause container (root of the sandbox) if err := rcmd.cmdCheckpoint(ctx, "pause", checkpointPath); err != nil { return nil, fmt.Errorf("while checkpointing pause: %w", err) @@ -393,15 +393,15 @@ func (s *AteomService) RestoreWorkload(ctx context.Context, req *ateompb.Restore checkpointDir := ateompath.RestoreStateDir(req.GetActorTemplateNamespace(), req.GetActorTemplateName(), req.GetActorId()) switch req.GetScope() { - case ateompb.SnapshotScope_SNAPSHOT_SCOPE_HOMEDIR: + case ateompb.SnapshotScope_SNAPSHOT_SCOPE_DATA: // Create and restore pause container - if err := rcmd.cmdCreate(ctx, os.Stdout, "pause", []string{"--fs-restore-image-path", filepath.Join(checkpointDir, ateompath.HomedirSnapshotsSubfoldderName)}); err != nil { + if err := rcmd.cmdCreate(ctx, os.Stdout, "pause", []string{"--fs-restore-image-path", filepath.Join(checkpointDir, ateompath.DurableDirSnapshotsSubfoldderName)}); err != nil { return nil, fmt.Errorf("while creating pause container: %w", err) } if err := rcmd.cmdStart(ctx, os.Stdout, "pause"); err != nil { return nil, fmt.Errorf("while starting pause container: %w", err) } - case ateompb.SnapshotScope_SNAPSHOT_SCOPE_PROCESS: + case ateompb.SnapshotScope_SNAPSHOT_SCOPE_FULL: // Create and restore pause container if err := rcmd.cmdCreate(ctx, os.Stdout, "pause", nil); err != nil { return nil, fmt.Errorf("while creating pause container: %w", err) @@ -422,14 +422,14 @@ func (s *AteomService) RestoreWorkload(ctx context.Context, req *ateompb.Restore } defer pw.Close() switch req.GetScope() { - case ateompb.SnapshotScope_SNAPSHOT_SCOPE_HOMEDIR: + case ateompb.SnapshotScope_SNAPSHOT_SCOPE_DATA: if err := rcmd.cmdCreate(ctx, pw, ac.GetName(), nil); err != nil { return nil, fmt.Errorf("while creating %q application container: %w", ac.GetName(), err) } if err := rcmd.cmdStart(ctx, pw, ac.GetName()); err != nil { return nil, fmt.Errorf("while starting %q application container: %w", ac.GetName(), err) } - case ateompb.SnapshotScope_SNAPSHOT_SCOPE_PROCESS: + case ateompb.SnapshotScope_SNAPSHOT_SCOPE_FULL: if err := rcmd.cmdCreate(ctx, pw, ac.GetName(), nil); err != nil { return nil, fmt.Errorf("while creating %q application container: %w", ac.GetName(), err) } diff --git a/cmd/ateom-gvisor/runsc.go b/cmd/ateom-gvisor/runsc.go index 430e4e9b..ae35fc69 100644 --- a/cmd/ateom-gvisor/runsc.go +++ b/cmd/ateom-gvisor/runsc.go @@ -134,7 +134,7 @@ func (r *runsc) cmdCheckpoint(ctx context.Context, containerName, checkpointPath return nil } -func (r *runsc) cmdFsCheckpoint(ctx context.Context, containerName, checkpointPath string, homedirMounts []string, leaveRunning bool) error { +func (r *runsc) cmdFsCheckpoint(ctx context.Context, containerName, checkpointPath string, durableDirMounts []string, leaveRunning bool) error { reapLock.RLock() defer reapLock.RUnlock() @@ -152,8 +152,8 @@ func (r *runsc) cmdFsCheckpoint(ctx context.Context, containerName, checkpointPa "fscheckpoint", "-image-path", checkpointPath, } - for _, hdv := range homedirMounts { - args = append(args, "-path", hdv) + for _, ddv := range durableDirMounts { + args = append(args, "-path", ddv) } if leaveRunning { args = append(args, "-leave-running") diff --git a/demos/counter/counter.yaml.tmpl b/demos/counter/counter.yaml.tmpl index aeba0846..5ca0edda 100644 --- a/demos/counter/counter.yaml.tmpl +++ b/demos/counter/counter.yaml.tmpl @@ -48,15 +48,15 @@ spec: path: /readyz port: 80 volumeMounts: - - name: myhomedir + - name: data mountPath: /home/counter workerSelector: matchLabels: workload: counter snapshotsConfig: - onPause: process - onCommit: homedir + onPause: full + onCommit: data location: gs://${BUCKET_NAME}/ate-demo-counter/ volumes: - - name: myhomedir - homeDir: {} + - name: data + durableDir: {} diff --git a/internal/ateompath/ateompath.go b/internal/ateompath/ateompath.go index 16c0153f..2ad0ebf8 100644 --- a/internal/ateompath/ateompath.go +++ b/internal/ateompath/ateompath.go @@ -24,9 +24,9 @@ const ( // host filesystem, and when it is mounted into ateom and atelet containers. BasePath = "/var/lib/ateom-gvisor" - // Homedir snapshots are temporarily stored in subfolder relative to process checkpoint path. - // This is because gVisor missing capability to separete homedir content from rest of rootfs upon checkpointing. - HomedirSnapshotsSubfoldderName = "homedir" + // DurableDir snapshots are temporarily stored in a subfolder relative to the process checkpoint path. + // This is because gVisor is missing the capability to separate durable-dir content from the rest of rootfs upon checkpointing. + DurableDirSnapshotsSubfoldderName = "durabledir" ) var ( @@ -140,19 +140,19 @@ func LocalCheckpointsDir(actorTemplateNamespace, actorTemplateName, actorID stri ) } -// HommedirVolumeMountsDir is the directory where individual home directory volumes -// are mounted. -func HommedirVolumeMountsDir(actorTemplateNamespace, actorTemplateName, actorID string) string { +// DurableDirVolumeMountsDir is the directory where individual durable-dir +// volumes are mounted. +func DurableDirVolumeMountsDir(actorTemplateNamespace, actorTemplateName, actorID string) string { return filepath.Join( ActorPath(actorTemplateNamespace, actorTemplateName, actorID), - HomedirSnapshotsSubfoldderName, + DurableDirSnapshotsSubfoldderName, ) } -// HomedirVolumeMountPoint returns the path where a specific home directory volume is mounted on the nodeVM. -func HomedirVolumeMountPoint(actorTemplateNamespace, actorTemplateName, actorID, volumeName string) string { +// DurableDirVolumeMountPoint returns the path where a specific durable-dir volume is mounted on the nodeVM. +func DurableDirVolumeMountPoint(actorTemplateNamespace, actorTemplateName, actorID, volumeName string) string { return filepath.Join( - HommedirVolumeMountsDir(actorTemplateNamespace, actorTemplateName, actorID), + DurableDirVolumeMountsDir(actorTemplateNamespace, actorTemplateName, actorID), volumeName, ) } diff --git a/internal/e2e/suites/demo/demo_test.go b/internal/e2e/suites/demo/demo_test.go index 8fd581d3..bf9304b0 100644 --- a/internal/e2e/suites/demo/demo_test.go +++ b/internal/e2e/suites/demo/demo_test.go @@ -42,7 +42,7 @@ func TestActorLifecycle(t *testing.T) { clients := e2e.GetClients() // Create actor template. - at, err := createActorTemplate(ctx, t, clients, nsObj, v1alpha1.SnapshotScopeProcess, v1alpha1.SnapshotScopeProcess) + at, err := createActorTemplate(ctx, t, clients, nsObj, v1alpha1.SnapshotScopeFull, v1alpha1.SnapshotScopeFull) if err != nil { t.Fatalf("failed to initialize ActorTemplate: %v", err) } @@ -83,7 +83,7 @@ func TestActorLifecycle(t *testing.T) { // 4. Call to actor and validate memory and file counters. // 5. Suspend & Resume actor. // 6. Call to actor and validate memory and file counters. -func TestHomedirLifecycle(t *testing.T) { +func TestDurableDirLifecycle(t *testing.T) { tests := []struct { name string onCommit v1alpha1.SnapshotScope @@ -94,27 +94,27 @@ func TestHomedirLifecycle(t *testing.T) { wantFileAfterSuspend int }{ { - name: "onCommit:process, onPause:process", - onCommit: v1alpha1.SnapshotScopeProcess, - onPause: v1alpha1.SnapshotScopeProcess, + name: "onCommit:full, onPause:full", + onCommit: v1alpha1.SnapshotScopeFull, + onPause: v1alpha1.SnapshotScopeFull, wantMemoryAfterPause: 2, wantFileAfterPause: 2, wantMemoryAfterSuspend: 3, wantFileAfterSuspend: 3, }, { - name: "onCommit:homedir, onPause:process", - onCommit: v1alpha1.SnapshotScopeHomedir, - onPause: v1alpha1.SnapshotScopeProcess, + name: "onCommit:data, onPause:full", + onCommit: v1alpha1.SnapshotScopeData, + onPause: v1alpha1.SnapshotScopeFull, wantMemoryAfterPause: 2, wantFileAfterPause: 2, wantMemoryAfterSuspend: 1, wantFileAfterSuspend: 3, }, { - name: "onCommit:homedir, onPause:homedir", - onCommit: v1alpha1.SnapshotScopeHomedir, - onPause: v1alpha1.SnapshotScopeHomedir, + name: "onCommit:data, onPause:data", + onCommit: v1alpha1.SnapshotScopeData, + onPause: v1alpha1.SnapshotScopeData, wantMemoryAfterPause: 1, wantFileAfterPause: 2, wantMemoryAfterSuspend: 1, @@ -140,7 +140,7 @@ func TestHomedirLifecycle(t *testing.T) { // // Create an Actor. // - actorID := "homedir-lifecycle" + "-" + nsObj.Name + actorID := "durabledir-lifecycle" + "-" + nsObj.Name t.Logf("Creating Actor %q using Substrate API...", actorID) createResp, err := clients.SubstrateAPI.CreateActor(ctx, &ateapipb.CreateActorRequest{ diff --git a/internal/proto/ateletpb/atelet.pb.go b/internal/proto/ateletpb/atelet.pb.go index af5e9210..840ef4bc 100644 --- a/internal/proto/ateletpb/atelet.pb.go +++ b/internal/proto/ateletpb/atelet.pb.go @@ -39,18 +39,18 @@ type VolumeType int32 const ( VolumeType_VOLUME_TYPE_UNSPECIFIED VolumeType = 0 - VolumeType_VOLUME_TYPE_HOMEDIR VolumeType = 1 + VolumeType_VOLUME_TYPE_DURABLE_DIR VolumeType = 1 ) // Enum value maps for VolumeType. var ( VolumeType_name = map[int32]string{ 0: "VOLUME_TYPE_UNSPECIFIED", - 1: "VOLUME_TYPE_HOMEDIR", + 1: "VOLUME_TYPE_DURABLE_DIR", } VolumeType_value = map[string]int32{ "VOLUME_TYPE_UNSPECIFIED": 0, - "VOLUME_TYPE_HOMEDIR": 1, + "VOLUME_TYPE_DURABLE_DIR": 1, } ) @@ -138,23 +138,26 @@ type SnapshotScope int32 const ( // Not valid option; should never happen. SnapshotScope_SNAPSHOT_SCOPE_UNSPECIFIED SnapshotScope = 0 - // Snapshot memory and the full rootfs (including homedir content). - SnapshotScope_SNAPSHOT_SCOPE_PROCESS SnapshotScope = 1 - // Snapshot only the homedir; memory and the rest of rootfs are excluded. - SnapshotScope_SNAPSHOT_SCOPE_HOMEDIR SnapshotScope = 2 + // Capture process memory plus the full filesystem delta on top of the OCI + // image (including any attached DurableDir volumes). + SnapshotScope_SNAPSHOT_SCOPE_FULL SnapshotScope = 1 + // Capture only the contents of attached volumes that support snapshots + // (currently DurableDir-typed volumes). Memory and the rest of rootfs are + // excluded. + SnapshotScope_SNAPSHOT_SCOPE_DATA SnapshotScope = 2 ) // Enum value maps for SnapshotScope. var ( SnapshotScope_name = map[int32]string{ 0: "SNAPSHOT_SCOPE_UNSPECIFIED", - 1: "SNAPSHOT_SCOPE_PROCESS", - 2: "SNAPSHOT_SCOPE_HOMEDIR", + 1: "SNAPSHOT_SCOPE_FULL", + 2: "SNAPSHOT_SCOPE_DATA", } SnapshotScope_value = map[string]int32{ "SNAPSHOT_SCOPE_UNSPECIFIED": 0, - "SNAPSHOT_SCOPE_PROCESS": 1, - "SNAPSHOT_SCOPE_HOMEDIR": 2, + "SNAPSHOT_SCOPE_FULL": 1, + "SNAPSHOT_SCOPE_DATA": 2, } ) @@ -491,26 +494,26 @@ func (x *WorkloadSpec) GetVolumes() []*Volume { return nil } -type HomedirVolume struct { +type DurableDirVolume struct { state protoimpl.MessageState `protogen:"open.v1"` unknownFields protoimpl.UnknownFields sizeCache protoimpl.SizeCache } -func (x *HomedirVolume) Reset() { - *x = HomedirVolume{} +func (x *DurableDirVolume) Reset() { + *x = DurableDirVolume{} mi := &file_atelet_proto_msgTypes[5] ms := protoimpl.X.MessageStateOf(protoimpl.Pointer(x)) ms.StoreMessageInfo(mi) } -func (x *HomedirVolume) String() string { +func (x *DurableDirVolume) String() string { return protoimpl.X.MessageStringOf(x) } -func (*HomedirVolume) ProtoMessage() {} +func (*DurableDirVolume) ProtoMessage() {} -func (x *HomedirVolume) ProtoReflect() protoreflect.Message { +func (x *DurableDirVolume) ProtoReflect() protoreflect.Message { mi := &file_atelet_proto_msgTypes[5] if x != nil { ms := protoimpl.X.MessageStateOf(protoimpl.Pointer(x)) @@ -522,8 +525,8 @@ func (x *HomedirVolume) ProtoReflect() protoreflect.Message { return mi.MessageOf(x) } -// Deprecated: Use HomedirVolume.ProtoReflect.Descriptor instead. -func (*HomedirVolume) Descriptor() ([]byte, []int) { +// Deprecated: Use DurableDirVolume.ProtoReflect.Descriptor instead. +func (*DurableDirVolume) Descriptor() ([]byte, []int) { return file_atelet_proto_rawDescGZIP(), []int{5} } @@ -533,7 +536,7 @@ type Volume struct { Type VolumeType `protobuf:"varint,2,opt,name=type,proto3,enum=atelet.VolumeType" json:"type,omitempty"` // Types that are valid to be assigned to Source: // - // *Volume_HomeDir + // *Volume_DurableDir Source isVolume_Source `protobuf_oneof:"source"` unknownFields protoimpl.UnknownFields sizeCache protoimpl.SizeCache @@ -590,10 +593,10 @@ func (x *Volume) GetSource() isVolume_Source { return nil } -func (x *Volume) GetHomeDir() *HomedirVolume { +func (x *Volume) GetDurableDir() *DurableDirVolume { if x != nil { - if x, ok := x.Source.(*Volume_HomeDir); ok { - return x.HomeDir + if x, ok := x.Source.(*Volume_DurableDir); ok { + return x.DurableDir } } return nil @@ -603,11 +606,11 @@ type isVolume_Source interface { isVolume_Source() } -type Volume_HomeDir struct { - HomeDir *HomedirVolume `protobuf:"bytes,3,opt,name=home_dir,json=homeDir,proto3,oneof"` +type Volume_DurableDir struct { + DurableDir *DurableDirVolume `protobuf:"bytes,3,opt,name=durable_dir,json=durableDir,proto3,oneof"` } -func (*Volume_HomeDir) isVolume_Source() {} +func (*Volume_DurableDir) isVolume_Source() {} type VolumeMount struct { state protoimpl.MessageState `protogen:"open.v1"` @@ -1423,12 +1426,13 @@ const file_atelet_proto_rawDesc = "" + "containers\x12\x1f\n" + "\vpause_image\x18\x02 \x01(\tR\n" + "pauseImage\x12(\n" + - "\avolumes\x18\x03 \x03(\v2\x0e.atelet.VolumeR\avolumes\"\x0f\n" + - "\rHomedirVolume\"\x82\x01\n" + + "\avolumes\x18\x03 \x03(\v2\x0e.atelet.VolumeR\avolumes\"\x12\n" + + "\x10DurableDirVolume\"\x8b\x01\n" + "\x06Volume\x12\x12\n" + "\x04name\x18\x01 \x01(\tR\x04name\x12&\n" + - "\x04type\x18\x02 \x01(\x0e2\x12.atelet.VolumeTypeR\x04type\x122\n" + - "\bhome_dir\x18\x03 \x01(\v2\x15.atelet.HomedirVolumeH\x00R\ahomeDirB\b\n" + + "\x04type\x18\x02 \x01(\x0e2\x12.atelet.VolumeTypeR\x04type\x12;\n" + + "\vdurable_dir\x18\x03 \x01(\v2\x18.atelet.DurableDirVolumeH\x00R\n" + + "durableDirB\b\n" + "\x06source\"@\n" + "\vVolumeMount\x12\x12\n" + "\x04name\x18\x01 \x01(\tR\x04name\x12\x1d\n" + @@ -1479,19 +1483,19 @@ const file_atelet_proto_rawDesc = "" + "\x0fexternal_config\x18\v \x01(\v2'.atelet.ExternalCheckpointConfigurationH\x00R\x0eexternalConfig\x12+\n" + "\x05scope\x18\f \x01(\x0e2\x15.atelet.SnapshotScopeR\x05scopeB\b\n" + "\x06configJ\x04\b\x06\x10\aJ\x04\b\b\x10\t\"\x11\n" + - "\x0fRestoreResponse*B\n" + + "\x0fRestoreResponse*F\n" + "\n" + "VolumeType\x12\x1b\n" + - "\x17VOLUME_TYPE_UNSPECIFIED\x10\x00\x12\x17\n" + - "\x13VOLUME_TYPE_HOMEDIR\x10\x01*j\n" + + "\x17VOLUME_TYPE_UNSPECIFIED\x10\x00\x12\x1b\n" + + "\x17VOLUME_TYPE_DURABLE_DIR\x10\x01*j\n" + "\x0eCheckpointType\x12\x1f\n" + "\x1bCHECKPOINT_TYPE_UNSPECIFIED\x10\x00\x12\x19\n" + "\x15CHECKPOINT_TYPE_LOCAL\x10\x01\x12\x1c\n" + - "\x18CHECKPOINT_TYPE_EXTERNAL\x10\x02*g\n" + + "\x18CHECKPOINT_TYPE_EXTERNAL\x10\x02*a\n" + "\rSnapshotScope\x12\x1e\n" + - "\x1aSNAPSHOT_SCOPE_UNSPECIFIED\x10\x00\x12\x1a\n" + - "\x16SNAPSHOT_SCOPE_PROCESS\x10\x01\x12\x1a\n" + - "\x16SNAPSHOT_SCOPE_HOMEDIR\x10\x022\xc4\x01\n" + + "\x1aSNAPSHOT_SCOPE_UNSPECIFIED\x10\x00\x12\x17\n" + + "\x13SNAPSHOT_SCOPE_FULL\x10\x01\x12\x17\n" + + "\x13SNAPSHOT_SCOPE_DATA\x10\x022\xc4\x01\n" + "\vAteomHerder\x120\n" + "\x03Run\x12\x12.atelet.RunRequest\x1a\x13.atelet.RunResponse\"\x00\x12E\n" + "\n" + @@ -1521,7 +1525,7 @@ var file_atelet_proto_goTypes = []any{ (*ArchAssets)(nil), // 5: atelet.ArchAssets (*SandboxAssets)(nil), // 6: atelet.SandboxAssets (*WorkloadSpec)(nil), // 7: atelet.WorkloadSpec - (*HomedirVolume)(nil), // 8: atelet.HomedirVolume + (*DurableDirVolume)(nil), // 8: atelet.DurableDirVolume (*Volume)(nil), // 9: atelet.Volume (*VolumeMount)(nil), // 10: atelet.VolumeMount (*Container)(nil), // 11: atelet.Container @@ -1546,7 +1550,7 @@ var file_atelet_proto_depIdxs = []int32{ 11, // 4: atelet.WorkloadSpec.containers:type_name -> atelet.Container 9, // 5: atelet.WorkloadSpec.volumes:type_name -> atelet.Volume 0, // 6: atelet.Volume.type:type_name -> atelet.VolumeType - 8, // 7: atelet.Volume.home_dir:type_name -> atelet.HomedirVolume + 8, // 7: atelet.Volume.durable_dir:type_name -> atelet.DurableDirVolume 12, // 8: atelet.Container.env:type_name -> atelet.EnvEntry 13, // 9: atelet.Container.readyz:type_name -> atelet.Readyz 10, // 10: atelet.Container.volume_mounts:type_name -> atelet.VolumeMount @@ -1582,7 +1586,7 @@ func file_atelet_proto_init() { return } file_atelet_proto_msgTypes[6].OneofWrappers = []any{ - (*Volume_HomeDir)(nil), + (*Volume_DurableDir)(nil), } file_atelet_proto_msgTypes[15].OneofWrappers = []any{ (*CheckpointRequest_LocalConfig)(nil), diff --git a/internal/proto/ateletpb/atelet.proto b/internal/proto/ateletpb/atelet.proto index 8c88f60b..994e0aed 100644 --- a/internal/proto/ateletpb/atelet.proto +++ b/internal/proto/ateletpb/atelet.proto @@ -81,11 +81,11 @@ message WorkloadSpec { } enum VolumeType { - VOLUME_TYPE_UNSPECIFIED = 0; - VOLUME_TYPE_HOMEDIR = 1; + VOLUME_TYPE_UNSPECIFIED = 0; + VOLUME_TYPE_DURABLE_DIR = 1; } -message HomedirVolume { +message DurableDirVolume { } message Volume { @@ -94,7 +94,7 @@ message Volume { VolumeType type = 2; oneof source{ - HomedirVolume home_dir = 3; + DurableDirVolume durable_dir = 3; } } @@ -166,10 +166,13 @@ enum CheckpointType { enum SnapshotScope { // Not valid option; should never happen. SNAPSHOT_SCOPE_UNSPECIFIED = 0; - // Snapshot memory and the full rootfs (including homedir content). - SNAPSHOT_SCOPE_PROCESS = 1; - // Snapshot only the homedir; memory and the rest of rootfs are excluded. - SNAPSHOT_SCOPE_HOMEDIR = 2; + // Capture process memory plus the full filesystem delta on top of the OCI + // image (including any attached DurableDir volumes). + SNAPSHOT_SCOPE_FULL = 1; + // Capture only the contents of attached volumes that support snapshots + // (currently DurableDir-typed volumes). Memory and the rest of rootfs are + // excluded. + SNAPSHOT_SCOPE_DATA = 2; } message CheckpointRequest { diff --git a/internal/proto/ateompb/ateom.pb.go b/internal/proto/ateompb/ateom.pb.go index 8c3a57c0..34fb6a54 100644 --- a/internal/proto/ateompb/ateom.pb.go +++ b/internal/proto/ateompb/ateom.pb.go @@ -40,23 +40,26 @@ type SnapshotScope int32 const ( // Not valid option; should never happen. SnapshotScope_SNAPSHOT_SCOPE_UNSPECIFIED SnapshotScope = 0 - // Snapshot memory and the full rootfs (including homedir content). - SnapshotScope_SNAPSHOT_SCOPE_PROCESS SnapshotScope = 1 - // Snapshot only the homedir; memory and the rest of rootfs are excluded. - SnapshotScope_SNAPSHOT_SCOPE_HOMEDIR SnapshotScope = 2 + // Capture process memory plus the full filesystem delta on top of the OCI + // image (including any attached DurableDir volumes). + SnapshotScope_SNAPSHOT_SCOPE_FULL SnapshotScope = 1 + // Capture only the contents of attached volumes that support snapshots + // (currently DurableDir-typed volumes). Memory and the rest of rootfs are + // excluded. + SnapshotScope_SNAPSHOT_SCOPE_DATA SnapshotScope = 2 ) // Enum value maps for SnapshotScope. var ( SnapshotScope_name = map[int32]string{ 0: "SNAPSHOT_SCOPE_UNSPECIFIED", - 1: "SNAPSHOT_SCOPE_PROCESS", - 2: "SNAPSHOT_SCOPE_HOMEDIR", + 1: "SNAPSHOT_SCOPE_FULL", + 2: "SNAPSHOT_SCOPE_DATA", } SnapshotScope_value = map[string]int32{ "SNAPSHOT_SCOPE_UNSPECIFIED": 0, - "SNAPSHOT_SCOPE_PROCESS": 1, - "SNAPSHOT_SCOPE_HOMEDIR": 2, + "SNAPSHOT_SCOPE_FULL": 1, + "SNAPSHOT_SCOPE_DATA": 2, } ) @@ -221,12 +224,12 @@ func (x *WorkloadSpec) GetContainers() []*Container { } type Container struct { - state protoimpl.MessageState `protogen:"open.v1"` - Name string `protobuf:"bytes,1,opt,name=name,proto3" json:"name,omitempty"` - Readyz *Readyz `protobuf:"bytes,2,opt,name=readyz,proto3" json:"readyz,omitempty"` - HomeDirVolumes []string `protobuf:"bytes,3,rep,name=home_dir_volumes,json=homeDirVolumes,proto3" json:"home_dir_volumes,omitempty"` - unknownFields protoimpl.UnknownFields - sizeCache protoimpl.SizeCache + state protoimpl.MessageState `protogen:"open.v1"` + Name string `protobuf:"bytes,1,opt,name=name,proto3" json:"name,omitempty"` + Readyz *Readyz `protobuf:"bytes,2,opt,name=readyz,proto3" json:"readyz,omitempty"` + DurableDirVolumes []string `protobuf:"bytes,3,rep,name=durable_dir_volumes,json=durableDirVolumes,proto3" json:"durable_dir_volumes,omitempty"` + unknownFields protoimpl.UnknownFields + sizeCache protoimpl.SizeCache } func (x *Container) Reset() { @@ -273,9 +276,9 @@ func (x *Container) GetReadyz() *Readyz { return nil } -func (x *Container) GetHomeDirVolumes() []string { +func (x *Container) GetDurableDirVolumes() []string { if x != nil { - return x.HomeDirVolumes + return x.DurableDirVolumes } return nil } @@ -734,11 +737,11 @@ const file_ateom_proto_rawDesc = "" + "\fWorkloadSpec\x120\n" + "\n" + "containers\x18\x01 \x03(\v2\x10.ateom.ContainerR\n" + - "containers\"p\n" + + "containers\"v\n" + "\tContainer\x12\x12\n" + "\x04name\x18\x01 \x01(\tR\x04name\x12%\n" + - "\x06readyz\x18\x02 \x01(\v2\r.ateom.ReadyzR\x06readyz\x12(\n" + - "\x10home_dir_volumes\x18\x03 \x03(\tR\x0ehomeDirVolumes\"9\n" + + "\x06readyz\x18\x02 \x01(\v2\r.ateom.ReadyzR\x06readyz\x12.\n" + + "\x13durable_dir_volumes\x18\x03 \x03(\tR\x11durableDirVolumes\"9\n" + "\x06Readyz\x12/\n" + "\bhttp_get\x18\x01 \x01(\v2\x14.ateom.HTTPGetActionR\ahttpGet\"7\n" + "\rHTTPGetAction\x12\x12\n" + @@ -773,11 +776,11 @@ const file_ateom_proto_rawDesc = "" + "\x16RuntimeAssetPathsEntry\x12\x10\n" + "\x03key\x18\x01 \x01(\tR\x03key\x12\x14\n" + "\x05value\x18\x02 \x01(\tR\x05value:\x028\x01\"\x19\n" + - "\x17RestoreWorkloadResponse*g\n" + + "\x17RestoreWorkloadResponse*a\n" + "\rSnapshotScope\x12\x1e\n" + - "\x1aSNAPSHOT_SCOPE_UNSPECIFIED\x10\x00\x12\x1a\n" + - "\x16SNAPSHOT_SCOPE_PROCESS\x10\x01\x12\x1a\n" + - "\x16SNAPSHOT_SCOPE_HOMEDIR\x10\x022\x80\x02\n" + + "\x1aSNAPSHOT_SCOPE_UNSPECIFIED\x10\x00\x12\x17\n" + + "\x13SNAPSHOT_SCOPE_FULL\x10\x01\x12\x17\n" + + "\x13SNAPSHOT_SCOPE_DATA\x10\x022\x80\x02\n" + "\x05Ateom\x12F\n" + "\vRunWorkload\x12\x19.ateom.RunWorkloadRequest\x1a\x1a.ateom.RunWorkloadResponse\"\x00\x12[\n" + "\x12CheckpointWorkload\x12 .ateom.CheckpointWorkloadRequest\x1a!.ateom.CheckpointWorkloadResponse\"\x00\x12R\n" + diff --git a/internal/proto/ateompb/ateom.proto b/internal/proto/ateompb/ateom.proto index 3c582b43..30fa467b 100644 --- a/internal/proto/ateompb/ateom.proto +++ b/internal/proto/ateompb/ateom.proto @@ -69,9 +69,9 @@ message WorkloadSpec { } message Container { - string name = 1; - Readyz readyz = 2; - repeated string home_dir_volumes = 3; + string name = 1; + Readyz readyz = 2; + repeated string durable_dir_volumes = 3; } // Readyz describes how to check that a container is ready to serve. @@ -94,10 +94,13 @@ message RunWorkloadResponse { enum SnapshotScope { // Not valid option; should never happen. SNAPSHOT_SCOPE_UNSPECIFIED = 0; - // Snapshot memory and the full rootfs (including homedir content). - SNAPSHOT_SCOPE_PROCESS = 1; - // Snapshot only the homedir; memory and the rest of rootfs are excluded. - SNAPSHOT_SCOPE_HOMEDIR = 2; + // Capture process memory plus the full filesystem delta on top of the OCI + // image (including any attached DurableDir volumes). + SNAPSHOT_SCOPE_FULL = 1; + // Capture only the contents of attached volumes that support snapshots + // (currently DurableDir-typed volumes). Memory and the rest of rootfs are + // excluded. + SNAPSHOT_SCOPE_DATA = 2; } message CheckpointWorkloadRequest { diff --git a/manifests/ate-install/generated/ate.dev_actortemplates.yaml b/manifests/ate-install/generated/ate.dev_actortemplates.yaml index 1fb3483b..51e9ca95 100644 --- a/manifests/ate-install/generated/ate.dev_actortemplates.yaml +++ b/manifests/ate-install/generated/ate.dev_actortemplates.yaml @@ -269,40 +269,41 @@ spec: onCommit: description: |- OnCommit specifies what to include in the snapshot when a commit is requested. - If not provided, the "process" behavior is used by default. + If not provided, the "full" behavior is used by default. The OnCommit must be a subset of the OnPause content. For example: - - if OnPause is "process", then OnCommit can be "process" or "homedir". - - if OnPause is "homedir", then OnCommit must be "homedir". + - if OnPause is "full", then OnCommit can be "full" or "data". + - if OnPause is "data", then OnCommit must be "data". enum: - - process - - homedir + - full + - data type: string onPause: description: |- OnPause specifies what to include in the snapshot when the actor is paused. - If not provided, the "process" behavior is used by default. + If not provided, the "full" behavior is used by default. enum: - - process - - homedir + - full + - data type: string required: - location type: object x-kubernetes-validations: - message: OnCommit must be a subset of OnPause - rule: '(has(self.onPause) ? self.onPause : ''process'') == ''process'' - || (has(self.onCommit) ? self.onCommit : ''process'') == (has(self.onPause) - ? self.onPause : ''process'')' + rule: '(has(self.onPause) ? self.onPause : ''full'') == ''full'' + || (has(self.onCommit) ? self.onCommit : ''full'') == (has(self.onPause) + ? self.onPause : ''full'')' volumes: description: Volumes defines the volumes to mount into all containers in the actor. items: properties: - homeDir: - description: homeDir represents a directory on rootfs that will - participate in snapshots. + durableDir: + description: |- + durableDir represents a durable directory on rootfs that persists across + resumes and participates in snapshots. type: object name: description: name of the volume. @@ -315,8 +316,8 @@ spec: - name type: object x-kubernetes-validations: - - message: exactly one of the fields in [homeDir] must be set - rule: '[has(self.homeDir)].filter(x,x==true).size() == 1' + - message: exactly one of the fields in [durableDir] must be set + rule: '[has(self.durableDir)].filter(x,x==true).size() == 1' maxItems: 32 type: array workerSelector: @@ -377,13 +378,13 @@ spec: x-kubernetes-validations: - message: Spec is immutable rule: self == oldSelf - - message: A container may mount at most one HomeDir-typed volume + - message: A container may mount at most one DurableDir-typed volume rule: '!has(self.containers) || self.containers.all(c, !has(c.volumeMounts) || c.volumeMounts.filter(vm, has(self.volumes) && self.volumes.exists(v, - v.name == vm.name && has(v.homeDir))).size() <= 1)' - - message: HomeDir volumes are not supported when sandboxClass is 'microvm' + v.name == vm.name && has(v.durableDir))).size() <= 1)' + - message: DurableDir volumes are not supported when sandboxClass is 'microvm' rule: '!has(self.sandboxClass) || self.sandboxClass != ''microvm'' || - !has(self.volumes) || !self.volumes.exists(v, has(v.homeDir))' + !has(self.volumes) || !self.volumes.exists(v, has(v.durableDir))' status: description: status is the observed state of ActorTemplate properties: diff --git a/pkg/api/v1alpha1/actortemplate_types.go b/pkg/api/v1alpha1/actortemplate_types.go index 34d746ec..37c067d6 100644 --- a/pkg/api/v1alpha1/actortemplate_types.go +++ b/pkg/api/v1alpha1/actortemplate_types.go @@ -29,8 +29,9 @@ const ( PhaseFailed PhaseType = "Failed" ) -// Represents a directory on rootfs that will participate in snapshots. -type HomedirVolumeSource struct { +// Represents a durable directory on rootfs that persists across resumes and +// participates in snapshots. +type DurableDirVolumeSource struct { } // Represents the source of a volume to mount. @@ -38,11 +39,12 @@ type HomedirVolumeSource struct { // // When adding a new source type, list it in the ExactlyOneOf marker below. // -// +kubebuilder:validation:ExactlyOneOf={homeDir} +// +kubebuilder:validation:ExactlyOneOf={durableDir} type VolumeSource struct { - // homeDir represents a directory on rootfs that will participate in snapshots. + // durableDir represents a durable directory on rootfs that persists across + // resumes and participates in snapshots. // +optional - HomeDir *HomedirVolumeSource `json:"homeDir,omitempty" protobuf:"bytes,2,opt,name=homeDir"` + DurableDir *DurableDirVolumeSource `json:"durableDir,omitempty" protobuf:"bytes,2,opt,name=durableDir"` } type Volume struct { @@ -216,17 +218,20 @@ type SecretKeySelector struct { } // SnapshotScope defines what components to include in a snapshot. -// +kubebuilder:validation:Enum=process;homedir +// +kubebuilder:validation:Enum=full;data type SnapshotScope string const ( - // Process memory plus the full rootfs (homedir included). - SnapshotScopeProcess SnapshotScope = "process" - // Only the homedir; memory and the rest of rootfs are excluded. - SnapshotScopeHomedir SnapshotScope = "homedir" + // Full captures process memory plus the entire filesystem delta on top of + // the OCI image (including any attached DurableDir volumes). + SnapshotScopeFull SnapshotScope = "full" + // Data captures only the contents of attached volumes that support + // snapshots (currently DurableDir-typed volumes). Process memory and + // the rest of rootfs are excluded. + SnapshotScopeData SnapshotScope = "data" ) -// +kubebuilder:validation:XValidation:rule="(has(self.onPause) ? self.onPause : 'process') == 'process' || (has(self.onCommit) ? self.onCommit : 'process') == (has(self.onPause) ? self.onPause : 'process')",message="OnCommit must be a subset of OnPause" +// +kubebuilder:validation:XValidation:rule="(has(self.onPause) ? self.onPause : 'full') == 'full' || (has(self.onCommit) ? self.onCommit : 'full') == (has(self.onPause) ? self.onPause : 'full')",message="OnCommit must be a subset of OnPause" type SnapshotsConfig struct { // Location to store snapshots in. // @@ -235,18 +240,18 @@ type SnapshotsConfig struct { Location string `json:"location"` // OnPause specifies what to include in the snapshot when the actor is paused. - // If not provided, the "process" behavior is used by default. + // If not provided, the "full" behavior is used by default. // // +optional OnPause SnapshotScope `json:"onPause,omitempty"` // OnCommit specifies what to include in the snapshot when a commit is requested. - // If not provided, the "process" behavior is used by default. + // If not provided, the "full" behavior is used by default. // The OnCommit must be a subset of the OnPause content. // // For example: - // - if OnPause is "process", then OnCommit can be "process" or "homedir". - // - if OnPause is "homedir", then OnCommit must be "homedir". + // - if OnPause is "full", then OnCommit can be "full" or "data". + // - if OnPause is "data", then OnCommit must be "data". // // +optional OnCommit SnapshotScope `json:"onCommit,omitempty"` @@ -254,8 +259,8 @@ type SnapshotsConfig struct { // ActorTemplateSpec defined desired spec of an actor. // -// +kubebuilder:validation:XValidation:rule="!has(self.containers) || self.containers.all(c, !has(c.volumeMounts) || c.volumeMounts.filter(vm, has(self.volumes) && self.volumes.exists(v, v.name == vm.name && has(v.homeDir))).size() <= 1)",message="A container may mount at most one HomeDir-typed volume" -// +kubebuilder:validation:XValidation:rule="!has(self.sandboxClass) || self.sandboxClass != 'microvm' || !has(self.volumes) || !self.volumes.exists(v, has(v.homeDir))",message="HomeDir volumes are not supported when sandboxClass is 'microvm'" +// +kubebuilder:validation:XValidation:rule="!has(self.containers) || self.containers.all(c, !has(c.volumeMounts) || c.volumeMounts.filter(vm, has(self.volumes) && self.volumes.exists(v, v.name == vm.name && has(v.durableDir))).size() <= 1)",message="A container may mount at most one DurableDir-typed volume" +// +kubebuilder:validation:XValidation:rule="!has(self.sandboxClass) || self.sandboxClass != 'microvm' || !has(self.volumes) || !self.volumes.exists(v, has(v.durableDir))",message="DurableDir volumes are not supported when sandboxClass is 'microvm'" type ActorTemplateSpec struct { // PauseImage is the container to use as the root sandbox container. // diff --git a/pkg/api/v1alpha1/actortemplate_validation_test.go b/pkg/api/v1alpha1/actortemplate_validation_test.go index 87bddeb4..5a0da7bc 100644 --- a/pkg/api/v1alpha1/actortemplate_validation_test.go +++ b/pkg/api/v1alpha1/actortemplate_validation_test.go @@ -481,45 +481,45 @@ func TestActorTemplateValidation(t *testing.T) { wantErr: true, errMsg: "Unsupported value", }, { - name: "SnapshotsConfig: OnPause=process, OnCommit=process", + name: "SnapshotsConfig: OnPause=full, OnCommit=full", mutate: func(at *ActorTemplate) { - at.Spec.SnapshotsConfig.OnPause = SnapshotScopeProcess - at.Spec.SnapshotsConfig.OnCommit = SnapshotScopeProcess + at.Spec.SnapshotsConfig.OnPause = SnapshotScopeFull + at.Spec.SnapshotsConfig.OnCommit = SnapshotScopeFull }, wantErr: false, }, { - name: "SnapshotsConfig: OnPause=process, OnCommit=homedir", + name: "SnapshotsConfig: OnPause=full, OnCommit=data", mutate: func(at *ActorTemplate) { - at.Spec.SnapshotsConfig.OnPause = SnapshotScopeProcess - at.Spec.SnapshotsConfig.OnCommit = SnapshotScopeHomedir + at.Spec.SnapshotsConfig.OnPause = SnapshotScopeFull + at.Spec.SnapshotsConfig.OnCommit = SnapshotScopeData }, wantErr: false, }, { - name: "SnapshotsConfig: OnPause=homedir, OnCommit=homedir", + name: "SnapshotsConfig: OnPause=data, OnCommit=data", mutate: func(at *ActorTemplate) { - at.Spec.SnapshotsConfig.OnPause = SnapshotScopeHomedir - at.Spec.SnapshotsConfig.OnCommit = SnapshotScopeHomedir + at.Spec.SnapshotsConfig.OnPause = SnapshotScopeData + at.Spec.SnapshotsConfig.OnCommit = SnapshotScopeData }, wantErr: false, }, { - name: "SnapshotsConfig: OnPause=homedir, OnCommit=process (invalid)", + name: "SnapshotsConfig: OnPause=data, OnCommit=full (invalid)", mutate: func(at *ActorTemplate) { - at.Spec.SnapshotsConfig.OnPause = SnapshotScopeHomedir - at.Spec.SnapshotsConfig.OnCommit = SnapshotScopeProcess + at.Spec.SnapshotsConfig.OnPause = SnapshotScopeData + at.Spec.SnapshotsConfig.OnCommit = SnapshotScopeFull }, wantErr: true, errMsg: "OnCommit must be a subset of OnPause", }, { - name: "SnapshotsConfig: OnPause=homedir, OnCommit unset (defaults to process, invalid)", + name: "SnapshotsConfig: OnPause=data, OnCommit unset (defaults to full, invalid)", mutate: func(at *ActorTemplate) { - at.Spec.SnapshotsConfig.OnPause = SnapshotScopeHomedir + at.Spec.SnapshotsConfig.OnPause = SnapshotScopeData }, wantErr: true, errMsg: "OnCommit must be a subset of OnPause", }, { - name: "SnapshotsConfig: OnPause unset (defaults to process), OnCommit=homedir", + name: "SnapshotsConfig: OnPause unset (defaults to full), OnCommit=data", mutate: func(at *ActorTemplate) { - at.Spec.SnapshotsConfig.OnCommit = SnapshotScopeHomedir + at.Spec.SnapshotsConfig.OnCommit = SnapshotScopeData }, wantErr: false, }, { @@ -537,10 +537,10 @@ func TestActorTemplateValidation(t *testing.T) { wantErr: true, errMsg: "Unsupported value", }, { - name: "Volumes: 1 HomeDir mount is valid", + name: "Volumes: 1 DurableDir mount is valid", mutate: func(at *ActorTemplate) { at.Spec.Volumes = []Volume{ - {Name: "vol1", VolumeSource: VolumeSource{HomeDir: &HomedirVolumeSource{}}}, + {Name: "vol1", VolumeSource: VolumeSource{DurableDir: &DurableDirVolumeSource{}}}, } at.Spec.Containers[0].VolumeMounts = []VolumeMount{ {Name: "vol1", MountPath: "/home"}, @@ -548,11 +548,11 @@ func TestActorTemplateValidation(t *testing.T) { }, wantErr: false, }, { - name: "Volumes: 2 HomeDir mounts in same container is invalid", + name: "Volumes: 2 DurableDir mounts in same container is invalid", mutate: func(at *ActorTemplate) { at.Spec.Volumes = []Volume{ - {Name: "vol1", VolumeSource: VolumeSource{HomeDir: &HomedirVolumeSource{}}}, - {Name: "vol2", VolumeSource: VolumeSource{HomeDir: &HomedirVolumeSource{}}}, + {Name: "vol1", VolumeSource: VolumeSource{DurableDir: &DurableDirVolumeSource{}}}, + {Name: "vol2", VolumeSource: VolumeSource{DurableDir: &DurableDirVolumeSource{}}}, } at.Spec.Containers[0].VolumeMounts = []VolumeMount{ {Name: "vol1", MountPath: "/home1"}, @@ -560,13 +560,13 @@ func TestActorTemplateValidation(t *testing.T) { } }, wantErr: true, - errMsg: "A container may mount at most one HomeDir-typed volume", + errMsg: "A container may mount at most one DurableDir-typed volume", }, { - name: "Volumes: 2 HomeDir mounts in different containers is valid", + name: "Volumes: 2 DurableDir mounts in different containers is valid", mutate: func(at *ActorTemplate) { at.Spec.Volumes = []Volume{ - {Name: "vol1", VolumeSource: VolumeSource{HomeDir: &HomedirVolumeSource{}}}, - {Name: "vol2", VolumeSource: VolumeSource{HomeDir: &HomedirVolumeSource{}}}, + {Name: "vol1", VolumeSource: VolumeSource{DurableDir: &DurableDirVolumeSource{}}}, + {Name: "vol2", VolumeSource: VolumeSource{DurableDir: &DurableDirVolumeSource{}}}, } at.Spec.Containers = append(at.Spec.Containers, Container{ Name: "sidecar", @@ -588,12 +588,12 @@ func TestActorTemplateValidation(t *testing.T) { } }, wantErr: true, - errMsg: "exactly one of the fields in [homeDir] must be set", + errMsg: "exactly one of the fields in [durableDir] must be set", }, { - name: "Volumes: VolumeSource with no source set is invalid (mixed with a valid HomeDir volume)", + name: "Volumes: VolumeSource with no source set is invalid (mixed with a valid DurableDir volume)", mutate: func(at *ActorTemplate) { at.Spec.Volumes = []Volume{ - {Name: "vol1", VolumeSource: VolumeSource{HomeDir: &HomedirVolumeSource{}}}, + {Name: "vol1", VolumeSource: VolumeSource{DurableDir: &DurableDirVolumeSource{}}}, {Name: "vol2", VolumeSource: VolumeSource{}}, } at.Spec.Containers[0].VolumeMounts = []VolumeMount{ @@ -602,12 +602,12 @@ func TestActorTemplateValidation(t *testing.T) { } }, wantErr: true, - errMsg: "exactly one of the fields in [homeDir] must be set", + errMsg: "exactly one of the fields in [durableDir] must be set", }, { - name: "Volumes: HomeDir MountPath with nested absolute path is valid", + name: "Volumes: DurableDir MountPath with nested absolute path is valid", mutate: func(at *ActorTemplate) { at.Spec.Volumes = []Volume{ - {Name: "vol1", VolumeSource: VolumeSource{HomeDir: &HomedirVolumeSource{}}}, + {Name: "vol1", VolumeSource: VolumeSource{DurableDir: &DurableDirVolumeSource{}}}, } at.Spec.Containers[0].VolumeMounts = []VolumeMount{ {Name: "vol1", MountPath: "/home/user/data"}, @@ -615,10 +615,10 @@ func TestActorTemplateValidation(t *testing.T) { }, wantErr: false, }, { - name: "Volumes: HomeDir MountPath as bare root is invalid", + name: "Volumes: DurableDir MountPath as bare root is invalid", mutate: func(at *ActorTemplate) { at.Spec.Volumes = []Volume{ - {Name: "vol1", VolumeSource: VolumeSource{HomeDir: &HomedirVolumeSource{}}}, + {Name: "vol1", VolumeSource: VolumeSource{DurableDir: &DurableDirVolumeSource{}}}, } at.Spec.Containers[0].VolumeMounts = []VolumeMount{ {Name: "vol1", MountPath: "/"}, @@ -627,10 +627,10 @@ func TestActorTemplateValidation(t *testing.T) { wantErr: true, errMsg: "MountPath must be a clean absolute Unix path", }, { - name: "Volumes: HomeDir MountPath with relative path is invalid", + name: "Volumes: DurableDir MountPath with relative path is invalid", mutate: func(at *ActorTemplate) { at.Spec.Volumes = []Volume{ - {Name: "vol1", VolumeSource: VolumeSource{HomeDir: &HomedirVolumeSource{}}}, + {Name: "vol1", VolumeSource: VolumeSource{DurableDir: &DurableDirVolumeSource{}}}, } at.Spec.Containers[0].VolumeMounts = []VolumeMount{ {Name: "vol1", MountPath: "home/user"}, @@ -639,10 +639,10 @@ func TestActorTemplateValidation(t *testing.T) { wantErr: true, errMsg: "MountPath must be a clean absolute Unix path", }, { - name: "Volumes: HomeDir MountPath as empty string is invalid", + name: "Volumes: DurableDir MountPath as empty string is invalid", mutate: func(at *ActorTemplate) { at.Spec.Volumes = []Volume{ - {Name: "vol1", VolumeSource: VolumeSource{HomeDir: &HomedirVolumeSource{}}}, + {Name: "vol1", VolumeSource: VolumeSource{DurableDir: &DurableDirVolumeSource{}}}, } at.Spec.Containers[0].VolumeMounts = []VolumeMount{ {Name: "vol1", MountPath: ""}, @@ -651,10 +651,10 @@ func TestActorTemplateValidation(t *testing.T) { wantErr: true, errMsg: "MountPath must be a clean absolute Unix path", }, { - name: "Volumes: HomeDir MountPath with leading whitespace is invalid", + name: "Volumes: DurableDir MountPath with leading whitespace is invalid", mutate: func(at *ActorTemplate) { at.Spec.Volumes = []Volume{ - {Name: "vol1", VolumeSource: VolumeSource{HomeDir: &HomedirVolumeSource{}}}, + {Name: "vol1", VolumeSource: VolumeSource{DurableDir: &DurableDirVolumeSource{}}}, } at.Spec.Containers[0].VolumeMounts = []VolumeMount{ {Name: "vol1", MountPath: " /home"}, @@ -663,10 +663,10 @@ func TestActorTemplateValidation(t *testing.T) { wantErr: true, errMsg: "MountPath must be a clean absolute Unix path", }, { - name: "Volumes: HomeDir MountPath with trailing slash is invalid", + name: "Volumes: DurableDir MountPath with trailing slash is invalid", mutate: func(at *ActorTemplate) { at.Spec.Volumes = []Volume{ - {Name: "vol1", VolumeSource: VolumeSource{HomeDir: &HomedirVolumeSource{}}}, + {Name: "vol1", VolumeSource: VolumeSource{DurableDir: &DurableDirVolumeSource{}}}, } at.Spec.Containers[0].VolumeMounts = []VolumeMount{ {Name: "vol1", MountPath: "/home/"}, @@ -675,10 +675,10 @@ func TestActorTemplateValidation(t *testing.T) { wantErr: true, errMsg: "MountPath must be a clean absolute Unix path", }, { - name: "Volumes: HomeDir MountPath with consecutive slashes is invalid", + name: "Volumes: DurableDir MountPath with consecutive slashes is invalid", mutate: func(at *ActorTemplate) { at.Spec.Volumes = []Volume{ - {Name: "vol1", VolumeSource: VolumeSource{HomeDir: &HomedirVolumeSource{}}}, + {Name: "vol1", VolumeSource: VolumeSource{DurableDir: &DurableDirVolumeSource{}}}, } at.Spec.Containers[0].VolumeMounts = []VolumeMount{ {Name: "vol1", MountPath: "/home//user"}, @@ -687,10 +687,10 @@ func TestActorTemplateValidation(t *testing.T) { wantErr: true, errMsg: "MountPath must be a clean absolute Unix path", }, { - name: "Volumes: HomeDir MountPath containing ':' is invalid", + name: "Volumes: DurableDir MountPath containing ':' is invalid", mutate: func(at *ActorTemplate) { at.Spec.Volumes = []Volume{ - {Name: "vol1", VolumeSource: VolumeSource{HomeDir: &HomedirVolumeSource{}}}, + {Name: "vol1", VolumeSource: VolumeSource{DurableDir: &DurableDirVolumeSource{}}}, } at.Spec.Containers[0].VolumeMounts = []VolumeMount{ {Name: "vol1", MountPath: "/ho:me"}, @@ -699,10 +699,10 @@ func TestActorTemplateValidation(t *testing.T) { wantErr: true, errMsg: "MountPath must be a clean absolute Unix path", }, { - name: "Volumes: HomeDir MountPath with '..' component is invalid", + name: "Volumes: DurableDir MountPath with '..' component is invalid", mutate: func(at *ActorTemplate) { at.Spec.Volumes = []Volume{ - {Name: "vol1", VolumeSource: VolumeSource{HomeDir: &HomedirVolumeSource{}}}, + {Name: "vol1", VolumeSource: VolumeSource{DurableDir: &DurableDirVolumeSource{}}}, } at.Spec.Containers[0].VolumeMounts = []VolumeMount{ {Name: "vol1", MountPath: "/home/../etc"}, @@ -711,10 +711,10 @@ func TestActorTemplateValidation(t *testing.T) { wantErr: true, errMsg: "MountPath must be a clean absolute Unix path", }, { - name: "Volumes: HomeDir MountPath with trailing '..' is invalid", + name: "Volumes: DurableDir MountPath with trailing '..' is invalid", mutate: func(at *ActorTemplate) { at.Spec.Volumes = []Volume{ - {Name: "vol1", VolumeSource: VolumeSource{HomeDir: &HomedirVolumeSource{}}}, + {Name: "vol1", VolumeSource: VolumeSource{DurableDir: &DurableDirVolumeSource{}}}, } at.Spec.Containers[0].VolumeMounts = []VolumeMount{ {Name: "vol1", MountPath: "/home/.."}, @@ -723,10 +723,10 @@ func TestActorTemplateValidation(t *testing.T) { wantErr: true, errMsg: "MountPath must be a clean absolute Unix path", }, { - name: "Volumes: HomeDir MountPath with '.' component is invalid", + name: "Volumes: DurableDir MountPath with '.' component is invalid", mutate: func(at *ActorTemplate) { at.Spec.Volumes = []Volume{ - {Name: "vol1", VolumeSource: VolumeSource{HomeDir: &HomedirVolumeSource{}}}, + {Name: "vol1", VolumeSource: VolumeSource{DurableDir: &DurableDirVolumeSource{}}}, } at.Spec.Containers[0].VolumeMounts = []VolumeMount{ {Name: "vol1", MountPath: "/home/./user"}, @@ -735,10 +735,10 @@ func TestActorTemplateValidation(t *testing.T) { wantErr: true, errMsg: "MountPath must be a clean absolute Unix path", }, { - name: "Volumes: HomeDir MountPath containing dotfile is valid (only bare '.' / '..' components are rejected)", + name: "Volumes: DurableDir MountPath containing dotfile is valid (only bare '.' / '..' components are rejected)", mutate: func(at *ActorTemplate) { at.Spec.Volumes = []Volume{ - {Name: "vol1", VolumeSource: VolumeSource{HomeDir: &HomedirVolumeSource{}}}, + {Name: "vol1", VolumeSource: VolumeSource{DurableDir: &DurableDirVolumeSource{}}}, } at.Spec.Containers[0].VolumeMounts = []VolumeMount{ {Name: "vol1", MountPath: "/home/.config"}, @@ -746,10 +746,10 @@ func TestActorTemplateValidation(t *testing.T) { }, wantErr: false, }, { - name: "Volumes: HomeDir MountPath with NUL byte is invalid", + name: "Volumes: DurableDir MountPath with NUL byte is invalid", mutate: func(at *ActorTemplate) { at.Spec.Volumes = []Volume{ - {Name: "vol1", VolumeSource: VolumeSource{HomeDir: &HomedirVolumeSource{}}}, + {Name: "vol1", VolumeSource: VolumeSource{DurableDir: &DurableDirVolumeSource{}}}, } at.Spec.Containers[0].VolumeMounts = []VolumeMount{ {Name: "vol1", MountPath: "/home\x00/user"}, @@ -758,10 +758,10 @@ func TestActorTemplateValidation(t *testing.T) { wantErr: true, errMsg: "MountPath must be a clean absolute Unix path", }, { - name: "Volumes: HomeDir MountPath with control character is invalid", + name: "Volumes: DurableDir MountPath with control character is invalid", mutate: func(at *ActorTemplate) { at.Spec.Volumes = []Volume{ - {Name: "vol1", VolumeSource: VolumeSource{HomeDir: &HomedirVolumeSource{}}}, + {Name: "vol1", VolumeSource: VolumeSource{DurableDir: &DurableDirVolumeSource{}}}, } at.Spec.Containers[0].VolumeMounts = []VolumeMount{ {Name: "vol1", MountPath: "/home\t/user"}, @@ -770,11 +770,11 @@ func TestActorTemplateValidation(t *testing.T) { wantErr: true, errMsg: "MountPath must be a clean absolute Unix path", }, { - name: "Volumes: HomeDir mount with invalid MountPath in second container is rejected", + name: "Volumes: DurableDir mount with invalid MountPath in second container is rejected", mutate: func(at *ActorTemplate) { at.Spec.Volumes = []Volume{ - {Name: "vol1", VolumeSource: VolumeSource{HomeDir: &HomedirVolumeSource{}}}, - {Name: "vol2", VolumeSource: VolumeSource{HomeDir: &HomedirVolumeSource{}}}, + {Name: "vol1", VolumeSource: VolumeSource{DurableDir: &DurableDirVolumeSource{}}}, + {Name: "vol2", VolumeSource: VolumeSource{DurableDir: &DurableDirVolumeSource{}}}, } at.Spec.Containers = append(at.Spec.Containers, Container{ Name: "sidecar", @@ -793,7 +793,7 @@ func TestActorTemplateValidation(t *testing.T) { name: "Volumes: Volume Name with uppercase is invalid", mutate: func(at *ActorTemplate) { at.Spec.Volumes = []Volume{ - {Name: "Vol1", VolumeSource: VolumeSource{HomeDir: &HomedirVolumeSource{}}}, + {Name: "Vol1", VolumeSource: VolumeSource{DurableDir: &DurableDirVolumeSource{}}}, } }, wantErr: true, @@ -802,7 +802,7 @@ func TestActorTemplateValidation(t *testing.T) { name: "Volumes: Volume Name with underscore is invalid", mutate: func(at *ActorTemplate) { at.Spec.Volumes = []Volume{ - {Name: "vol_1", VolumeSource: VolumeSource{HomeDir: &HomedirVolumeSource{}}}, + {Name: "vol_1", VolumeSource: VolumeSource{DurableDir: &DurableDirVolumeSource{}}}, } }, wantErr: true, @@ -811,7 +811,7 @@ func TestActorTemplateValidation(t *testing.T) { name: "Volumes: VolumeMount Name with uppercase is invalid", mutate: func(at *ActorTemplate) { at.Spec.Volumes = []Volume{ - {Name: "vol1", VolumeSource: VolumeSource{HomeDir: &HomedirVolumeSource{}}}, + {Name: "vol1", VolumeSource: VolumeSource{DurableDir: &DurableDirVolumeSource{}}}, } at.Spec.Containers[0].VolumeMounts = []VolumeMount{ {Name: "Vol1", MountPath: "/home/user"}, @@ -820,24 +820,24 @@ func TestActorTemplateValidation(t *testing.T) { wantErr: true, errMsg: "Name must be a valid DNS label", }, { - name: "Volumes: HomeDir volume with SandboxClass microvm is invalid", + name: "Volumes: DurableDir volume with SandboxClass microvm is invalid", mutate: func(at *ActorTemplate) { at.Spec.SandboxClass = SandboxClassMicroVM at.Spec.Volumes = []Volume{ - {Name: "vol1", VolumeSource: VolumeSource{HomeDir: &HomedirVolumeSource{}}}, + {Name: "vol1", VolumeSource: VolumeSource{DurableDir: &DurableDirVolumeSource{}}}, } at.Spec.Containers[0].VolumeMounts = []VolumeMount{ {Name: "vol1", MountPath: "/home/user"}, } }, wantErr: true, - errMsg: "HomeDir volumes are not supported when sandboxClass is 'microvm'", + errMsg: "DurableDir volumes are not supported when sandboxClass is 'microvm'", }, { - name: "Volumes: HomeDir volume with SandboxClass gvisor is valid", + name: "Volumes: DurableDir volume with SandboxClass gvisor is valid", mutate: func(at *ActorTemplate) { at.Spec.SandboxClass = SandboxClassGvisor at.Spec.Volumes = []Volume{ - {Name: "vol1", VolumeSource: VolumeSource{HomeDir: &HomedirVolumeSource{}}}, + {Name: "vol1", VolumeSource: VolumeSource{DurableDir: &DurableDirVolumeSource{}}}, } at.Spec.Containers[0].VolumeMounts = []VolumeMount{ {Name: "vol1", MountPath: "/home/user"}, @@ -845,7 +845,7 @@ func TestActorTemplateValidation(t *testing.T) { }, wantErr: false, }, { - name: "Volumes: SandboxClass microvm without HomeDir volumes is valid", + name: "Volumes: SandboxClass microvm without DurableDir volumes is valid", mutate: func(at *ActorTemplate) { at.Spec.SandboxClass = SandboxClassMicroVM }, diff --git a/pkg/api/v1alpha1/zz_generated.deepcopy.go b/pkg/api/v1alpha1/zz_generated.deepcopy.go index 483b4bb9..f6a678ca 100644 --- a/pkg/api/v1alpha1/zz_generated.deepcopy.go +++ b/pkg/api/v1alpha1/zz_generated.deepcopy.go @@ -208,6 +208,21 @@ func (in *ContainerReadyz) DeepCopy() *ContainerReadyz { return out } +// DeepCopyInto is an autogenerated deepcopy function, copying the receiver, writing into out. in must be non-nil. +func (in *DurableDirVolumeSource) DeepCopyInto(out *DurableDirVolumeSource) { + *out = *in +} + +// DeepCopy is an autogenerated deepcopy function, copying the receiver, creating a new DurableDirVolumeSource. +func (in *DurableDirVolumeSource) DeepCopy() *DurableDirVolumeSource { + if in == nil { + return nil + } + out := new(DurableDirVolumeSource) + in.DeepCopyInto(out) + return out +} + // DeepCopyInto is an autogenerated deepcopy function, copying the receiver, writing into out. in must be non-nil. func (in *EnvVar) DeepCopyInto(out *EnvVar) { *out = *in @@ -273,21 +288,6 @@ func (in *HTTPGetAction) DeepCopy() *HTTPGetAction { return out } -// DeepCopyInto is an autogenerated deepcopy function, copying the receiver, writing into out. in must be non-nil. -func (in *HomedirVolumeSource) DeepCopyInto(out *HomedirVolumeSource) { - *out = *in -} - -// DeepCopy is an autogenerated deepcopy function, copying the receiver, creating a new HomedirVolumeSource. -func (in *HomedirVolumeSource) DeepCopy() *HomedirVolumeSource { - if in == nil { - return nil - } - out := new(HomedirVolumeSource) - in.DeepCopyInto(out) - return out -} - // DeepCopyInto is an autogenerated deepcopy function, copying the receiver, writing into out. in must be non-nil. func (in *SandboxConfig) DeepCopyInto(out *SandboxConfig) { *out = *in @@ -448,9 +448,9 @@ func (in *VolumeMount) DeepCopy() *VolumeMount { // DeepCopyInto is an autogenerated deepcopy function, copying the receiver, writing into out. in must be non-nil. func (in *VolumeSource) DeepCopyInto(out *VolumeSource) { *out = *in - if in.HomeDir != nil { - in, out := &in.HomeDir, &out.HomeDir - *out = new(HomedirVolumeSource) + if in.DurableDir != nil { + in, out := &in.DurableDir, &out.DurableDir + *out = new(DurableDirVolumeSource) **out = **in } } From 8ce291d358b7f9cd5cc27d2a51ff932d14dfdb7f Mon Sep 17 00:00:00 2001 From: dberkov Date: Thu, 25 Jun 2026 17:52:33 -0700 Subject: [PATCH 06/17] docs: add Pause lifecycle state, DurableDir volumes, and Snapshot scope definitions to glossary --- docs/glossary.md | 35 +++++++++++++++++++++++++++++++++-- 1 file changed, 33 insertions(+), 2 deletions(-) diff --git a/docs/glossary.md b/docs/glossary.md index 29ebfd95..301c9ac9 100644 --- a/docs/glossary.md +++ b/docs/glossary.md @@ -62,13 +62,44 @@ because they change too frequently for etcd. ## Lifecycle - **Suspend**: hibernate a running Actor by checkpointing it to a snapshot and - freeing its Worker. + freeing its Worker. The requested snapshots are uploaded to external storage. -- **Resume**: activate a suspended Actor by restoring it onto a Worker. The +- **Pause**: a short-term checkpoint of a running Actor. Snapshot files remain + on the node VM, and the following Resume is prioritized onto the node VM + where the snapshots are persisted. + +- **Resume**: activate a suspended/paused Actor by restoring it onto a Worker. The common path restores from a snapshot rather than cold-booting. +## Volumes + +- **DurableDir volume**: a directory mounted into one or more containers + whose contents are preserved by the [`Data` snapshot scope](#snapshots) + and therefore survive across Suspend/Resume independently of process + memory or other rootfs writes. A single `ActorTemplate` may declare + multiple `DurableDir` volumes, and the same volume may be mounted into + multiple containers (potentially at different paths). This is the + per-Actor application-data surface. + ## Snapshots +- **Snapshot scope**: what an `ActorTemplate`'s `SnapshotsConfig` includes + in a given snapshot. Two scopes exist today: + - **`full`**: process memory plus the rootfs delta on top of the OCI + image (which also includes any attached `DurableDir` volumes, + since they live inside rootfs). Used to capture everything needed + to resume hot. + - **`data`**: only the contents of attached volumes that support + snapshots — currently `DurableDir` volumes. Process memory and the + rest of rootfs are discarded; on Resume the Actor cold-boots from + the OCI image with `DurableDir` contents restored. Used to persist + application data cheaply without the cost of a full memory image. + + Configured per-trigger via `onPause` and `onCommit`: `onPause` selects + what is captured during a [Pause](#lifecycle) (kept on the node), and + `onCommit` selects what is captured during a [Suspend](#lifecycle) + (uploaded to snapshot storage). `onCommit` must be a subset of `onPause`. + - **Golden Snapshot**: the initial checkpoint captured once, when an `ActorTemplate` is created, from a temporary "golden" boot of the workload. By default an Actor of that template is first restored from this shared From d484c296aa72f5e7355aeb8513da637aaf3c4889 Mon Sep 17 00:00:00 2001 From: dberkov Date: Thu, 25 Jun 2026 19:32:51 -0700 Subject: [PATCH 07/17] fix code after rebase --- cmd/atelet/main.go | 208 +++++++------------------------- cmd/ateom-gvisor/main.go | 35 ++---- cmd/ateom-gvisor/runsc.go | 5 +- internal/ateompath/ateompath.go | 6 +- 4 files changed, 59 insertions(+), 195 deletions(-) diff --git a/cmd/atelet/main.go b/cmd/atelet/main.go index 8840b753..ca4d93e2 100644 --- a/cmd/atelet/main.go +++ b/cmd/atelet/main.go @@ -350,11 +350,11 @@ func (s *AteomHerder) Checkpoint(ctx context.Context, req *ateletpb.CheckpointRe switch req.GetType() { case ateletpb.CheckpointType_CHECKPOINT_TYPE_EXTERNAL: - if err := s.uploadExternalCheckpoint(ctx, req, checkpointDir, sandboxRec, req.GetScope() == ateletpb.SnapshotScope_SNAPSHOT_SCOPE_FULL, hasDurableDirVolumeMount(req.GetSpec())); err != nil { + if err := s.uploadExternalCheckpoint(ctx, req, checkpointDir, sandboxRec); err != nil { return nil, err } case ateletpb.CheckpointType_CHECKPOINT_TYPE_LOCAL: - if err := s.moveLocalCheckpoint(ctx, req, checkpointDir, sandboxRec, req.GetScope() == ateletpb.SnapshotScope_SNAPSHOT_SCOPE_FULL, hasDurableDirVolumeMount(req.GetSpec())); err != nil { + if err := s.moveLocalCheckpoint(ctx, req, checkpointDir, sandboxRec); err != nil { return nil, err } default: @@ -368,24 +368,6 @@ func (s *AteomHerder) Checkpoint(ctx context.Context, req *ateletpb.CheckpointRe return &ateletpb.CheckpointResponse{}, nil } -// returns true if at least one of the containers in the workload spec has a durable-dir volume mount -func hasDurableDirVolumeMount(spec *ateletpb.WorkloadSpec) bool { - hdv := make(map[string]bool) - for _, v := range spec.GetVolumes() { - if v.GetType() == ateletpb.VolumeType_VOLUME_TYPE_DURABLE_DIR { - hdv[v.GetName()] = true - } - } - for _, ctr := range spec.GetContainers() { - for _, vm := range ctr.GetVolumeMounts() { - if hdv[vm.GetName()] { - return true - } - } - } - return false -} - func toAteomSnapshotScope(scope ateletpb.SnapshotScope) ateompb.SnapshotScope { // assumption the request already been valdated and scope is in the valid values set switch scope { @@ -396,7 +378,7 @@ func toAteomSnapshotScope(scope ateletpb.SnapshotScope) ateompb.SnapshotScope { } } -func (s *AteomHerder) moveLocalCheckpoint(ctx context.Context, req *ateletpb.CheckpointRequest, checkpointDir string, rec *sandboxAssetsRecord, moveProcessFiles bool, moveDurableDirFiles bool) error { +func (s *AteomHerder) moveLocalCheckpoint(ctx context.Context, req *ateletpb.CheckpointRequest, checkpointDir string, rec *sandboxAssetsRecord) error { localCheckpointPath := filepath.Join(ateompath.LocalCheckpointsDir(req.GetActorTemplateNamespace(), req.GetActorTemplateName(), req.GetActorId()), req.GetLocalConfig().GetSnapshotPrefix()) if err := os.MkdirAll(localCheckpointPath, 0o700); err != nil { return fmt.Errorf("while creating local checkpoint directory: %w", err) @@ -404,32 +386,14 @@ func (s *AteomHerder) moveLocalCheckpoint(ctx context.Context, req *ateletpb.Che ns, tmpl := req.GetActorTemplateNamespace(), req.GetActorTemplateName() - if moveProcessFiles { - // Move exactly the files ateom reported. - for _, fileName := range rec.SnapshotFiles { - src := filepath.Join(checkpointDir, fileName) - dst := filepath.Join(localCheckpointPath, fileName) - recordSnapshotSize(ctx, strings.TrimSuffix(fileName, ".img"), src, ns, tmpl) + // Move exactly the files ateom reported. + for _, fileName := range rec.SnapshotFiles { + src := filepath.Join(checkpointDir, fileName) + dst := filepath.Join(localCheckpointPath, fileName) + recordSnapshotSize(ctx, strings.TrimSuffix(fileName, ".img"), src, ns, tmpl) - if err := os.Rename(src, dst); err != nil { - return fmt.Errorf("failed to move %s to %s: %w", src, dst, err) - } - } - } - - if moveDurableDirFiles { - // move durable-dir files - durableDirCheckpointDir := filepath.Join(checkpointDir, ateompath.DurableDirSnapshotsSubfoldderName) - durableDirLocalCheckpointPath := filepath.Join(localCheckpointPath, ateompath.DurableDirSnapshotsSubfoldderName) - if err := os.MkdirAll(durableDirLocalCheckpointPath, 0o700); err != nil { - return fmt.Errorf("while creating local checkpoint directory: %w", err) - } - for _, fileName := range []string{"fscheckpoint.json", "multitar.img", "pages.img", "pages_meta.img"} { - src := filepath.Join(durableDirCheckpointDir, fileName) - dst := filepath.Join(durableDirLocalCheckpointPath, fileName) - if err := os.Rename(src, dst); err != nil { - return fmt.Errorf("failed to move %s to %s: %w", src, dst, err) - } + if err := os.Rename(src, dst); err != nil { + return fmt.Errorf("failed to move %s to %s: %w", src, dst, err) } } @@ -446,45 +410,25 @@ func (s *AteomHerder) moveLocalCheckpoint(ctx context.Context, req *ateletpb.Che return nil } -func (s *AteomHerder) uploadExternalCheckpoint(ctx context.Context, req *ateletpb.CheckpointRequest, checkpointDir string, rec *sandboxAssetsRecord, uploadProcessFiles bool, uploadDurableDirFiles bool) error { +func (s *AteomHerder) uploadExternalCheckpoint(ctx context.Context, req *ateletpb.CheckpointRequest, checkpointDir string, rec *sandboxAssetsRecord) error { ns, tmpl := req.GetActorTemplateNamespace(), req.GetActorTemplateName() prefix := strings.TrimSuffix(req.GetExternalConfig().GetSnapshotUriPrefix(), "/") // Upload exactly the files ateom reported (each zstd-compressed). g, gCtx := errgroup.WithContext(ctx) - - if uploadProcessFiles { - - for _, fileName := range rec.SnapshotFiles { - fileName := fileName - local := filepath.Join(checkpointDir, fileName) - recordSnapshotSize(ctx, strings.TrimSuffix(fileName, ".img"), local, ns, tmpl) - g.Go(func() error { - if err := ategcs.SendLocalFileToGCSWithZstd(gCtx, s.gcsClient, prefix+"/"+fileName+".zstd", local); err != nil { - return fmt.Errorf("while uploading %s to GCS: %w", fileName, err) - } - return nil - }) - } - if err := g.Wait(); err != nil { - return err - } - } - - if uploadDurableDirFiles { - for _, fileName := range []string{"fscheckpoint.json", "multitar.img", "pages.img", "pages_meta.img"} { - impPath := filepath.Join(checkpointDir, ateompath.DurableDirSnapshotsSubfoldderName, fileName) - - before, _, _ := strings.Cut(fileName, ".") - recordSnapshotSize(ctx, before, impPath, ns, tmpl) - - if err := uploadIfExists(ctx, s.gcsClient, - fmt.Sprintf("%s/%s/%s.zstd", prefix, ateompath.DurableDirSnapshotsSubfoldderName, fileName), - impPath, - ); err != nil { - return err + for _, fileName := range rec.SnapshotFiles { + fileName := fileName + local := filepath.Join(checkpointDir, fileName) + recordSnapshotSize(ctx, strings.TrimSuffix(fileName, ".img"), local, ns, tmpl) + g.Go(func() error { + if err := ategcs.SendLocalFileToGCSWithZstd(gCtx, s.gcsClient, prefix+"/"+fileName+".zstd", local); err != nil { + return fmt.Errorf("while uploading %s to GCS: %w", fileName, err) } - } + return nil + }) + } + if err := g.Wait(); err != nil { + return err } // Pin the sandbox binaries + snapshot file list into a manifest beside the @@ -532,11 +476,7 @@ func (s *AteomHerder) Restore(ctx context.Context, req *ateletpb.RestoreRequest) if err != nil { return nil, fmt.Errorf("while fetching snapshot manifest: %w", err) } - sandboxRec, err = unmarshalSandboxRecord(manifest) - if err != nil { - return nil, err - } - if err := s.downloadExternalCheckpoint(ctx, prefix, checkpointDir, req.GetScope()); err != nil { + if sandboxRec, err = unmarshalSandboxRecord(manifest); err != nil { return nil, err } case ateletpb.CheckpointType_CHECKPOINT_TYPE_LOCAL: @@ -546,11 +486,7 @@ func (s *AteomHerder) Restore(ctx context.Context, req *ateletpb.RestoreRequest) if err != nil { return nil, fmt.Errorf("while reading local snapshot manifest: %w", err) } - sandboxRec, err = unmarshalSandboxRecord(manifest) - if err != nil { - return nil, err - } - if err := s.copyLocalCheckpoint(ctx, snapshotPrefix, localCheckpointDir, checkpointDir, req.GetScope()); err != nil { + if sandboxRec, err = unmarshalSandboxRecord(manifest); err != nil { return nil, err } default: @@ -632,36 +568,18 @@ func (s *AteomHerder) Restore(ctx context.Context, req *ateletpb.RestoreRequest) return &ateletpb.RestoreResponse{}, nil } -func (s *AteomHerder) copyLocalCheckpoint(ctx context.Context, snapshotPrefix string, srcDir, dstDir string, files []string, scope ateletpb.SnapshotScope) error { - switch scope { - case ateletpb.SnapshotScope_SNAPSHOT_SCOPE_FULL: - for _, fileName := range files { - if ctx.Err() != nil { - return fmt.Errorf("context cancelled: %w", ctx.Err()) - } - src := filepath.Join(srcDir, snapshotPrefix, fileName) - dst := filepath.Join(dstDir, fileName) - if _, err := copyFile(src, dst); err != nil { - return fmt.Errorf("failed to copy %s to %s: %w", src, dst, err) - } +func (s *AteomHerder) copyLocalCheckpoint(ctx context.Context, snapshotPrefix string, srcDir, dstDir string, files []string) error { + for _, fileName := range files { + if ctx.Err() != nil { + return fmt.Errorf("context cancelled: %w", ctx.Err()) } - case ateletpb.SnapshotScope_SNAPSHOT_SCOPE_DATA: - ddDstDir := filepath.Join(dstDir, ateompath.DurableDirSnapshotsSubfoldderName) - if err := os.MkdirAll(ddDstDir, 0o700); err != nil { - return fmt.Errorf("while creating durable-dir directory: %w", err) - } - - for _, fileName := range []string{"fscheckpoint.json", "multitar.img", "pages.img", "pages_meta.img"} { - if ctx.Err() != nil { - return fmt.Errorf("context cancelled: %w", ctx.Err()) - } - src := filepath.Join(srcDir, snapshotPrefix, ateompath.DurableDirSnapshotsSubfoldderName, fileName) - dst := filepath.Join(ddDstDir, fileName) - if _, err := copyFile(src, dst); err != nil { - return fmt.Errorf("failed to copy %s to %s: %w", src, dst, err) - } + src := filepath.Join(srcDir, snapshotPrefix, fileName) + dst := filepath.Join(dstDir, fileName) + if _, err := copyFile(src, dst); err != nil { + return fmt.Errorf("failed to copy %s to %s: %w", src, dst, err) } } + return nil } @@ -690,39 +608,19 @@ func copyFile(src, dst string) (int64, error) { return nBytes, err } -func (s *AteomHerder) downloadExternalCheckpoint(ctx context.Context, snapshotUriPrefix string, dstDir string, files []string, scope ateletpb.SnapshotScope) error { +func (s *AteomHerder) downloadExternalCheckpoint(ctx context.Context, snapshotUriPrefix string, dstDir string, files []string) error { prefix := strings.TrimSuffix(snapshotUriPrefix, "/") g, gCtx := errgroup.WithContext(ctx) - - switch scope { - case ateletpb.SnapshotScope_SNAPSHOT_SCOPE_FULL: - g, gCtx := errgroup.WithContext(ctx) - for _, fileName := range files { - fileName := fileName - local := filepath.Join(dstDir, fileName) - g.Go(func() error { - if err := ategcs.FetchLocalFileFromGCSWithZstd(gCtx, s.gcsClient, prefix+"/"+fileName+".zstd", local); err != nil { - return fmt.Errorf("while downloading %s from GCS: %w", fileName, err) - } - return nil - }) - } - if err := g.Wait(); err != nil { - return err - } - case ateletpb.SnapshotScope_SNAPSHOT_SCOPE_DATA: - for _, fileName := range []string{"fscheckpoint.json", "multitar.img", "pages.img", "pages_meta.img"} { - remote := fmt.Sprintf("%s/%s/%s.zstd", prefix, ateompath.DurableDirSnapshotsSubfoldderName, fileName) - g.Go(func() error { - local := filepath.Join(dstDir, ateompath.DurableDirSnapshotsSubfoldderName, fileName) - if err := ategcs.FetchLocalFileFromGCSWithZstd(gCtx, s.gcsClient, remote, local); err != nil { - return fmt.Errorf("while downloading %s from GCS: %w", remote, err) - } - return nil - }) - } + for _, fileName := range files { + fileName := fileName + local := filepath.Join(dstDir, fileName) + g.Go(func() error { + if err := ategcs.FetchLocalFileFromGCSWithZstd(gCtx, s.gcsClient, prefix+"/"+fileName+".zstd", local); err != nil { + return fmt.Errorf("while downloading %s from GCS: %w", fileName, err) + } + return nil + }) } - if err := g.Wait(); err != nil { return err } @@ -893,19 +791,6 @@ func toAteomReadyz(in *ateletpb.Readyz) *ateompb.Readyz { return out } -// uploadIfExists uploads a local file to GCS (zstd-compressed) only if -// the file is present. Missing files are silently skipped — used for -// optional checkpoint side-files (pages.img, pages_meta.img). -func uploadIfExists(ctx context.Context, gcs ategcs.ObjectStorage, remoteURI, localPath string) error { - if _, err := os.Stat(localPath); err != nil { - return nil - } - if err := ategcs.SendLocalFileToGCSWithZstd(ctx, gcs, remoteURI, localPath); err != nil { - return fmt.Errorf("while uploading %s to GCS: %w", filepath.Base(localPath), err) - } - return out -} - type AteomDialer struct { conns *lru.Cache } @@ -1102,11 +987,6 @@ func resetActorDirs(actorTemplateNamespace, actorTemplateName, actorID string) e return fmt.Errorf("while creating restore-state dir: %w", err) } - restoreStateDurableDir := filepath.Join(restoreStateDir, ateompath.DurableDirSnapshotsSubfoldderName) - if err := os.MkdirAll(restoreStateDurableDir, 0o700); err != nil { - return fmt.Errorf("while creating restore-state durable-dir dir: %w", err) - } - // World-readable (0o755): bind-mounted into the actor, whose workload // reads it through the gofer. identityDir := ateompath.ActorIdentityDirPath(actorTemplateNamespace, actorTemplateName, actorID) diff --git a/cmd/ateom-gvisor/main.go b/cmd/ateom-gvisor/main.go index 81b43d3c..30010eac 100644 --- a/cmd/ateom-gvisor/main.go +++ b/cmd/ateom-gvisor/main.go @@ -23,7 +23,6 @@ import ( "log/slog" "net" "os" - "path/filepath" "runtime" "sort" "sync" @@ -264,33 +263,25 @@ func (s *AteomService) CheckpointWorkload(ctx context.Context, req *ateompb.Chec // Always take durable-dir snapshot if at least one container has a durable-dir volume mount. // TODO(dberkov): this is a temporary workaround until gVisor supports taking durable-dir snapshots in a single request with the process snapshot. - var ddv []string - for _, ctr := range req.GetSpec().GetContainers() { - ddv = append(ddv, ctr.GetDurableDirVolumes()...) - } - if len(ddv) > 0 { - // TODO(dberkov) add control for "resume=true" flag - // Checkpoint each durable-dir volume - - // prepare durable-dir checkpoint folder - fsCheckpointPath := filepath.Join(checkpointPath, ateompath.DurableDirSnapshotsSubfoldderName) - if err := os.MkdirAll(fsCheckpointPath, 0o700); err != nil { - return nil, fmt.Errorf("while creating fscheckpoint directory: %w", err) + switch req.GetScope() { + case ateompb.SnapshotScope_SNAPSHOT_SCOPE_DATA: + var ddv []string + for _, ctr := range req.GetSpec().GetContainers() { + ddv = append(ddv, ctr.GetDurableDirVolumes()...) } - - // keep gVisor running if full snapshot is requested. - leaveRunning := req.GetScope() == ateompb.SnapshotScope_SNAPSHOT_SCOPE_FULL - if err := rcmd.cmdFsCheckpoint(ctx, "pause", fsCheckpointPath, ddv, leaveRunning); err != nil { + if len(ddv) == 0 { + return nil, fmt.Errorf("no durable-dir volumes found for DATA snapshot") + } + if err := rcmd.cmdFsCheckpoint(ctx, "pause", checkpointPath, ddv); err != nil { return nil, fmt.Errorf("while fscheckpointing durable-dir %q: %w", ddv[0], err) } - } - - // take full snapshot (memory + rootfs delta) if requested - if req.GetScope() == ateompb.SnapshotScope_SNAPSHOT_SCOPE_FULL { + case ateompb.SnapshotScope_SNAPSHOT_SCOPE_FULL: // Checkpoint pause container (root of the sandbox) if err := rcmd.cmdCheckpoint(ctx, "pause", checkpointPath); err != nil { return nil, fmt.Errorf("while checkpointing pause: %w", err) } + default: + return nil, fmt.Errorf("unsupported snapshot scope: %v", req.GetScope()) } // After checkpointing the sandbox root, runsc may no longer have a usable @@ -395,7 +386,7 @@ func (s *AteomService) RestoreWorkload(ctx context.Context, req *ateompb.Restore switch req.GetScope() { case ateompb.SnapshotScope_SNAPSHOT_SCOPE_DATA: // Create and restore pause container - if err := rcmd.cmdCreate(ctx, os.Stdout, "pause", []string{"--fs-restore-image-path", filepath.Join(checkpointDir, ateompath.DurableDirSnapshotsSubfoldderName)}); err != nil { + if err := rcmd.cmdCreate(ctx, os.Stdout, "pause", []string{"--fs-restore-image-path", checkpointDir}); err != nil { return nil, fmt.Errorf("while creating pause container: %w", err) } if err := rcmd.cmdStart(ctx, os.Stdout, "pause"); err != nil { diff --git a/cmd/ateom-gvisor/runsc.go b/cmd/ateom-gvisor/runsc.go index ae35fc69..2bbc5f2f 100644 --- a/cmd/ateom-gvisor/runsc.go +++ b/cmd/ateom-gvisor/runsc.go @@ -134,7 +134,7 @@ func (r *runsc) cmdCheckpoint(ctx context.Context, containerName, checkpointPath return nil } -func (r *runsc) cmdFsCheckpoint(ctx context.Context, containerName, checkpointPath string, durableDirMounts []string, leaveRunning bool) error { +func (r *runsc) cmdFsCheckpoint(ctx context.Context, containerName, checkpointPath string, durableDirMounts []string) error { reapLock.RLock() defer reapLock.RUnlock() @@ -155,9 +155,6 @@ func (r *runsc) cmdFsCheckpoint(ctx context.Context, containerName, checkpointPa for _, ddv := range durableDirMounts { args = append(args, "-path", ddv) } - if leaveRunning { - args = append(args, "-leave-running") - } // name of the container must be the last paramter. args = append(args, containerName) diff --git a/internal/ateompath/ateompath.go b/internal/ateompath/ateompath.go index 2ad0ebf8..d9beb2d6 100644 --- a/internal/ateompath/ateompath.go +++ b/internal/ateompath/ateompath.go @@ -23,10 +23,6 @@ const ( // The base path. This is both the path of the root shared folder on the // host filesystem, and when it is mounted into ateom and atelet containers. BasePath = "/var/lib/ateom-gvisor" - - // DurableDir snapshots are temporarily stored in a subfolder relative to the process checkpoint path. - // This is because gVisor is missing the capability to separate durable-dir content from the rest of rootfs upon checkpointing. - DurableDirSnapshotsSubfoldderName = "durabledir" ) var ( @@ -145,7 +141,7 @@ func LocalCheckpointsDir(actorTemplateNamespace, actorTemplateName, actorID stri func DurableDirVolumeMountsDir(actorTemplateNamespace, actorTemplateName, actorID string) string { return filepath.Join( ActorPath(actorTemplateNamespace, actorTemplateName, actorID), - DurableDirSnapshotsSubfoldderName, + "durable-dir", ) } From 217614dbff0fd4d32f6b79743b8aeded04acce0c Mon Sep 17 00:00:00 2001 From: dberkov Date: Thu, 25 Jun 2026 19:37:25 -0700 Subject: [PATCH 08/17] docs: fix typo in comment within runsc.go --- cmd/ateom-gvisor/runsc.go | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/cmd/ateom-gvisor/runsc.go b/cmd/ateom-gvisor/runsc.go index 2bbc5f2f..04f26e40 100644 --- a/cmd/ateom-gvisor/runsc.go +++ b/cmd/ateom-gvisor/runsc.go @@ -156,7 +156,7 @@ func (r *runsc) cmdFsCheckpoint(ctx context.Context, containerName, checkpointPa args = append(args, "-path", ddv) } - // name of the container must be the last paramter. + // name of the container must be the last parameter. args = append(args, containerName) cmd := exec.CommandContext( From dabc59026118ef7ceae5f2b94648dd4a711490ee Mon Sep 17 00:00:00 2001 From: dberkov Date: Fri, 26 Jun 2026 15:00:05 -0700 Subject: [PATCH 09/17] address BenTheElder's comment from 06/25 --- .../internal/controlapi/workflow_pause.go | 7 +--- .../internal/controlapi/workflow_suspend.go | 7 +--- cmd/atelet/main.go | 2 +- demos/counter/counter.go | 2 +- internal/e2e/suites/demo/demo_test.go | 4 +- .../generated/ate.dev_actortemplates.yaml | 3 ++ pkg/api/v1alpha1/actortemplate_types.go | 1 + .../v1alpha1/actortemplate_validation_test.go | 41 ++++++++++++++++--- 8 files changed, 48 insertions(+), 19 deletions(-) diff --git a/cmd/ateapi/internal/controlapi/workflow_pause.go b/cmd/ateapi/internal/controlapi/workflow_pause.go index 56ad4771..c1137bb0 100644 --- a/cmd/ateapi/internal/controlapi/workflow_pause.go +++ b/cmd/ateapi/internal/controlapi/workflow_pause.go @@ -28,7 +28,6 @@ import ( listersv1alpha1 "github.com/agent-substrate/substrate/pkg/client/listers/api/v1alpha1" "github.com/agent-substrate/substrate/pkg/proto/ateapipb" "k8s.io/apimachinery/pkg/util/wait" - "k8s.io/client-go/kubernetes" ) // PauseInput holds the immutable parameters requested by the client. @@ -92,9 +91,7 @@ func (s *MarkPausingStep) Execute(ctx context.Context, input *PauseInput, state func (s *MarkPausingStep) RetryBackoff() *wait.Backoff { return nil } type CallAteletPauseStep struct { - dialer *AteletDialer - kubeClient kubernetes.Interface - secretCache *envSecretCache + dialer *AteletDialer } func (s *CallAteletPauseStep) Name() string { return "CallAteletPause" } @@ -117,7 +114,7 @@ func (s *CallAteletPauseStep) Execute(ctx context.Context, input *PauseInput, st } client := ateletpb.NewAteomHerderClient(ateletConn) - workloadSpec, err := workloadSpecFromActorTemplate(ctx, s.kubeClient, s.secretCache, state.ActorTemplate) + workloadSpec, err := workloadSpecFromActorTemplate(ctx, nil, nil, state.ActorTemplate) if err != nil { return err } diff --git a/cmd/ateapi/internal/controlapi/workflow_suspend.go b/cmd/ateapi/internal/controlapi/workflow_suspend.go index 51df1ade..50a77734 100644 --- a/cmd/ateapi/internal/controlapi/workflow_suspend.go +++ b/cmd/ateapi/internal/controlapi/workflow_suspend.go @@ -29,7 +29,6 @@ import ( listersv1alpha1 "github.com/agent-substrate/substrate/pkg/client/listers/api/v1alpha1" "github.com/agent-substrate/substrate/pkg/proto/ateapipb" "k8s.io/apimachinery/pkg/util/wait" - "k8s.io/client-go/kubernetes" ) // SuspendInput holds the immutable parameters requested by the client. @@ -94,9 +93,7 @@ func (s *MarkSuspendingStep) Execute(ctx context.Context, input *SuspendInput, s func (s *MarkSuspendingStep) RetryBackoff() *wait.Backoff { return nil } type CallAteletSuspendStep struct { - dialer *AteletDialer - kubeClient kubernetes.Interface - secretCache *envSecretCache + dialer *AteletDialer } func (s *CallAteletSuspendStep) Name() string { return "CallAteletSuspend" } @@ -119,7 +116,7 @@ func (s *CallAteletSuspendStep) Execute(ctx context.Context, input *SuspendInput } client := ateletpb.NewAteomHerderClient(ateletConn) - workloadSpec, err := workloadSpecFromActorTemplate(ctx, s.kubeClient, s.secretCache, state.ActorTemplate) + workloadSpec, err := workloadSpecFromActorTemplate(ctx, nil, nil, state.ActorTemplate) if err != nil { return err } diff --git a/cmd/atelet/main.go b/cmd/atelet/main.go index ca4d93e2..801454c6 100644 --- a/cmd/atelet/main.go +++ b/cmd/atelet/main.go @@ -369,7 +369,7 @@ func (s *AteomHerder) Checkpoint(ctx context.Context, req *ateletpb.CheckpointRe } func toAteomSnapshotScope(scope ateletpb.SnapshotScope) ateompb.SnapshotScope { - // assumption the request already been valdated and scope is in the valid values set + // assumption the request already been validated and scope is in the valid values set switch scope { case ateletpb.SnapshotScope_SNAPSHOT_SCOPE_DATA: return ateompb.SnapshotScope_SNAPSHOT_SCOPE_DATA diff --git a/demos/counter/counter.go b/demos/counter/counter.go index 69c77920..bd5b9084 100644 --- a/demos/counter/counter.go +++ b/demos/counter/counter.go @@ -54,7 +54,7 @@ func incrementFileCounter() int { } } counter++ - err = os.WriteFile(fileCounterPath, []byte(strconv.Itoa(counter)), 0644) + err = os.WriteFile(fileCounterPath, []byte(strconv.Itoa(counter)), 0o644) if err != nil { return -1 } diff --git a/internal/e2e/suites/demo/demo_test.go b/internal/e2e/suites/demo/demo_test.go index bf9304b0..74f10952 100644 --- a/internal/e2e/suites/demo/demo_test.go +++ b/internal/e2e/suites/demo/demo_test.go @@ -197,7 +197,7 @@ func TestDurableDirLifecycle(t *testing.T) { if err != nil { t.Fatalf("failed to call actor again: %v", err) } - validateCounterResponse(t, resp, "after paise", tc.wantMemoryAfterPause, tc.wantFileAfterPause) + validateCounterResponse(t, resp, "after pause", tc.wantMemoryAfterPause, tc.wantFileAfterPause) // // Suspending the actor @@ -424,7 +424,7 @@ func suspendActor(ctx context.Context, t *testing.T, clients *e2e.Clients, nsObj if err != nil { t.Fatalf("failed to call actor again: %v", err) } - validateCounterResponse(t, resp, "after pause", 2, 2) + validateCounterResponse(t, resp, "after suspend", 2, 2) // Suspending the actor before deletion t.Logf("Suspending Actor %q before deletion...", actorID) diff --git a/manifests/ate-install/generated/ate.dev_actortemplates.yaml b/manifests/ate-install/generated/ate.dev_actortemplates.yaml index 51e9ca95..d74b2d98 100644 --- a/manifests/ate-install/generated/ate.dev_actortemplates.yaml +++ b/manifests/ate-install/generated/ate.dev_actortemplates.yaml @@ -378,6 +378,9 @@ spec: x-kubernetes-validations: - message: Spec is immutable rule: self == oldSelf + - message: At most one DurableDir-typed volume is supported per ActorTemplate + rule: '!has(self.volumes) || self.volumes.filter(v, has(v.durableDir)).size() + <= 1' - message: A container may mount at most one DurableDir-typed volume rule: '!has(self.containers) || self.containers.all(c, !has(c.volumeMounts) || c.volumeMounts.filter(vm, has(self.volumes) && self.volumes.exists(v, diff --git a/pkg/api/v1alpha1/actortemplate_types.go b/pkg/api/v1alpha1/actortemplate_types.go index 37c067d6..5d1fc996 100644 --- a/pkg/api/v1alpha1/actortemplate_types.go +++ b/pkg/api/v1alpha1/actortemplate_types.go @@ -259,6 +259,7 @@ type SnapshotsConfig struct { // ActorTemplateSpec defined desired spec of an actor. // +// +kubebuilder:validation:XValidation:rule="!has(self.volumes) || self.volumes.filter(v, has(v.durableDir)).size() <= 1",message="At most one DurableDir-typed volume is supported per ActorTemplate" // +kubebuilder:validation:XValidation:rule="!has(self.containers) || self.containers.all(c, !has(c.volumeMounts) || c.volumeMounts.filter(vm, has(self.volumes) && self.volumes.exists(v, v.name == vm.name && has(v.durableDir))).size() <= 1)",message="A container may mount at most one DurableDir-typed volume" // +kubebuilder:validation:XValidation:rule="!has(self.sandboxClass) || self.sandboxClass != 'microvm' || !has(self.volumes) || !self.volumes.exists(v, has(v.durableDir))",message="DurableDir volumes are not supported when sandboxClass is 'microvm'" type ActorTemplateSpec struct { diff --git a/pkg/api/v1alpha1/actortemplate_validation_test.go b/pkg/api/v1alpha1/actortemplate_validation_test.go index 5a0da7bc..c9d5f771 100644 --- a/pkg/api/v1alpha1/actortemplate_validation_test.go +++ b/pkg/api/v1alpha1/actortemplate_validation_test.go @@ -548,7 +548,7 @@ func TestActorTemplateValidation(t *testing.T) { }, wantErr: false, }, { - name: "Volumes: 2 DurableDir mounts in same container is invalid", + name: "Volumes: 2 DurableDir volumes in template is invalid", mutate: func(at *ActorTemplate) { at.Spec.Volumes = []Volume{ {Name: "vol1", VolumeSource: VolumeSource{DurableDir: &DurableDirVolumeSource{}}}, @@ -560,9 +560,9 @@ func TestActorTemplateValidation(t *testing.T) { } }, wantErr: true, - errMsg: "A container may mount at most one DurableDir-typed volume", + errMsg: "At most one DurableDir-typed volume is supported per ActorTemplate", }, { - name: "Volumes: 2 DurableDir mounts in different containers is valid", + name: "Volumes: 2 DurableDir volumes spread across containers is invalid", mutate: func(at *ActorTemplate) { at.Spec.Volumes = []Volume{ {Name: "vol1", VolumeSource: VolumeSource{DurableDir: &DurableDirVolumeSource{}}}, @@ -579,6 +579,38 @@ func TestActorTemplateValidation(t *testing.T) { {Name: "vol1", MountPath: "/home1"}, } }, + wantErr: true, + errMsg: "At most one DurableDir-typed volume is supported per ActorTemplate", + }, { + name: "Volumes: same DurableDir volume mounted twice in one container is invalid", + mutate: func(at *ActorTemplate) { + at.Spec.Volumes = []Volume{ + {Name: "vol1", VolumeSource: VolumeSource{DurableDir: &DurableDirVolumeSource{}}}, + } + at.Spec.Containers[0].VolumeMounts = []VolumeMount{ + {Name: "vol1", MountPath: "/home1"}, + {Name: "vol1", MountPath: "/home2"}, + } + }, + wantErr: true, + errMsg: "A container may mount at most one DurableDir-typed volume", + }, { + name: "Volumes: same DurableDir volume mounted across two containers is valid", + mutate: func(at *ActorTemplate) { + at.Spec.Volumes = []Volume{ + {Name: "vol1", VolumeSource: VolumeSource{DurableDir: &DurableDirVolumeSource{}}}, + } + at.Spec.Containers = append(at.Spec.Containers, Container{ + Name: "sidecar", + Image: "busybox@sha256:326e0e090a9a4057e62a1b94236e7a2df2f2f76722f67232e0e47854e4df9c53", + VolumeMounts: []VolumeMount{ + {Name: "vol1", MountPath: "/home-sidecar"}, + }, + }) + at.Spec.Containers[0].VolumeMounts = []VolumeMount{ + {Name: "vol1", MountPath: "/home-main"}, + } + }, wantErr: false, }, { name: "Volumes: VolumeSource with no source set is invalid", @@ -774,13 +806,12 @@ func TestActorTemplateValidation(t *testing.T) { mutate: func(at *ActorTemplate) { at.Spec.Volumes = []Volume{ {Name: "vol1", VolumeSource: VolumeSource{DurableDir: &DurableDirVolumeSource{}}}, - {Name: "vol2", VolumeSource: VolumeSource{DurableDir: &DurableDirVolumeSource{}}}, } at.Spec.Containers = append(at.Spec.Containers, Container{ Name: "sidecar", Image: "busybox@sha256:326e0e090a9a4057e62a1b94236e7a2df2f2f76722f67232e0e47854e4df9c53", VolumeMounts: []VolumeMount{ - {Name: "vol2", MountPath: "home2"}, + {Name: "vol1", MountPath: "home1"}, }, }) at.Spec.Containers[0].VolumeMounts = []VolumeMount{ From 7c722b58a8cec5e478c3163a395c803d0ec79cbd Mon Sep 17 00:00:00 2001 From: dberkov Date: Fri, 26 Jun 2026 15:09:39 -0700 Subject: [PATCH 10/17] addressed thockin's comments --- docs/glossary.md | 4 +- internal/e2e/suites/demo/demo_test.go | 6 +-- .../generated/ate.dev_actortemplates.yaml | 26 +++++----- pkg/api/v1alpha1/actortemplate_types.go | 18 +++---- .../v1alpha1/actortemplate_validation_test.go | 49 ++++++++++++++++--- 5 files changed, 68 insertions(+), 35 deletions(-) diff --git a/docs/glossary.md b/docs/glossary.md index 301c9ac9..2b935821 100644 --- a/docs/glossary.md +++ b/docs/glossary.md @@ -85,11 +85,11 @@ because they change too frequently for etcd. - **Snapshot scope**: what an `ActorTemplate`'s `SnapshotsConfig` includes in a given snapshot. Two scopes exist today: - - **`full`**: process memory plus the rootfs delta on top of the OCI + - **`Full`**: process memory plus the rootfs delta on top of the OCI image (which also includes any attached `DurableDir` volumes, since they live inside rootfs). Used to capture everything needed to resume hot. - - **`data`**: only the contents of attached volumes that support + - **`Data`**: only the contents of attached volumes that support snapshots — currently `DurableDir` volumes. Process memory and the rest of rootfs are discarded; on Resume the Actor cold-boots from the OCI image with `DurableDir` contents restored. Used to persist diff --git a/internal/e2e/suites/demo/demo_test.go b/internal/e2e/suites/demo/demo_test.go index 74f10952..f864a283 100644 --- a/internal/e2e/suites/demo/demo_test.go +++ b/internal/e2e/suites/demo/demo_test.go @@ -94,7 +94,7 @@ func TestDurableDirLifecycle(t *testing.T) { wantFileAfterSuspend int }{ { - name: "onCommit:full, onPause:full", + name: "onCommit:Full, onPause:Full", onCommit: v1alpha1.SnapshotScopeFull, onPause: v1alpha1.SnapshotScopeFull, wantMemoryAfterPause: 2, @@ -103,7 +103,7 @@ func TestDurableDirLifecycle(t *testing.T) { wantFileAfterSuspend: 3, }, { - name: "onCommit:data, onPause:full", + name: "onCommit:Data, onPause:Full", onCommit: v1alpha1.SnapshotScopeData, onPause: v1alpha1.SnapshotScopeFull, wantMemoryAfterPause: 2, @@ -112,7 +112,7 @@ func TestDurableDirLifecycle(t *testing.T) { wantFileAfterSuspend: 3, }, { - name: "onCommit:data, onPause:data", + name: "onCommit:Data, onPause:Data", onCommit: v1alpha1.SnapshotScopeData, onPause: v1alpha1.SnapshotScopeData, wantMemoryAfterPause: 1, diff --git a/manifests/ate-install/generated/ate.dev_actortemplates.yaml b/manifests/ate-install/generated/ate.dev_actortemplates.yaml index d74b2d98..5aac0173 100644 --- a/manifests/ate-install/generated/ate.dev_actortemplates.yaml +++ b/manifests/ate-install/generated/ate.dev_actortemplates.yaml @@ -269,32 +269,32 @@ spec: onCommit: description: |- OnCommit specifies what to include in the snapshot when a commit is requested. - If not provided, the "full" behavior is used by default. - The OnCommit must be a subset of the OnPause content. + If not provided, the "Full" behavior is used by default. + onCommit must be a subset of the onPause content. For example: - - if OnPause is "full", then OnCommit can be "full" or "data". - - if OnPause is "data", then OnCommit must be "data". + - if onPause is "Full", then onCommit can be "Full" or "Data". + - if onPause is "Data", then onCommit must be "Data". enum: - - full - - data + - Full + - Data type: string onPause: description: |- OnPause specifies what to include in the snapshot when the actor is paused. - If not provided, the "full" behavior is used by default. + If not provided, the "Full" behavior is used by default. enum: - - full - - data + - Full + - Data type: string required: - location type: object x-kubernetes-validations: - - message: OnCommit must be a subset of OnPause - rule: '(has(self.onPause) ? self.onPause : ''full'') == ''full'' - || (has(self.onCommit) ? self.onCommit : ''full'') == (has(self.onPause) - ? self.onPause : ''full'')' + - message: onCommit must be a subset of onPause + rule: '(has(self.onPause) ? self.onPause : ''Full'') == ''Full'' + || (has(self.onCommit) ? self.onCommit : ''Full'') == (has(self.onPause) + ? self.onPause : ''Full'')' volumes: description: Volumes defines the volumes to mount into all containers in the actor. diff --git a/pkg/api/v1alpha1/actortemplate_types.go b/pkg/api/v1alpha1/actortemplate_types.go index 5d1fc996..99ebd665 100644 --- a/pkg/api/v1alpha1/actortemplate_types.go +++ b/pkg/api/v1alpha1/actortemplate_types.go @@ -218,20 +218,20 @@ type SecretKeySelector struct { } // SnapshotScope defines what components to include in a snapshot. -// +kubebuilder:validation:Enum=full;data +// +kubebuilder:validation:Enum=Full;Data type SnapshotScope string const ( // Full captures process memory plus the entire filesystem delta on top of // the OCI image (including any attached DurableDir volumes). - SnapshotScopeFull SnapshotScope = "full" + SnapshotScopeFull SnapshotScope = "Full" // Data captures only the contents of attached volumes that support // snapshots (currently DurableDir-typed volumes). Process memory and // the rest of rootfs are excluded. - SnapshotScopeData SnapshotScope = "data" + SnapshotScopeData SnapshotScope = "Data" ) -// +kubebuilder:validation:XValidation:rule="(has(self.onPause) ? self.onPause : 'full') == 'full' || (has(self.onCommit) ? self.onCommit : 'full') == (has(self.onPause) ? self.onPause : 'full')",message="OnCommit must be a subset of OnPause" +// +kubebuilder:validation:XValidation:rule="(has(self.onPause) ? self.onPause : 'Full') == 'Full' || (has(self.onCommit) ? self.onCommit : 'Full') == (has(self.onPause) ? self.onPause : 'Full')",message="onCommit must be a subset of onPause" type SnapshotsConfig struct { // Location to store snapshots in. // @@ -240,18 +240,18 @@ type SnapshotsConfig struct { Location string `json:"location"` // OnPause specifies what to include in the snapshot when the actor is paused. - // If not provided, the "full" behavior is used by default. + // If not provided, the "Full" behavior is used by default. // // +optional OnPause SnapshotScope `json:"onPause,omitempty"` // OnCommit specifies what to include in the snapshot when a commit is requested. - // If not provided, the "full" behavior is used by default. - // The OnCommit must be a subset of the OnPause content. + // If not provided, the "Full" behavior is used by default. + // onCommit must be a subset of the onPause content. // // For example: - // - if OnPause is "full", then OnCommit can be "full" or "data". - // - if OnPause is "data", then OnCommit must be "data". + // - if onPause is "Full", then onCommit can be "Full" or "Data". + // - if onPause is "Data", then onCommit must be "Data". // // +optional OnCommit SnapshotScope `json:"onCommit,omitempty"` diff --git a/pkg/api/v1alpha1/actortemplate_validation_test.go b/pkg/api/v1alpha1/actortemplate_validation_test.go index c9d5f771..86557ef9 100644 --- a/pkg/api/v1alpha1/actortemplate_validation_test.go +++ b/pkg/api/v1alpha1/actortemplate_validation_test.go @@ -481,43 +481,43 @@ func TestActorTemplateValidation(t *testing.T) { wantErr: true, errMsg: "Unsupported value", }, { - name: "SnapshotsConfig: OnPause=full, OnCommit=full", + name: "SnapshotsConfig: OnPause=Full, OnCommit=Full", mutate: func(at *ActorTemplate) { at.Spec.SnapshotsConfig.OnPause = SnapshotScopeFull at.Spec.SnapshotsConfig.OnCommit = SnapshotScopeFull }, wantErr: false, }, { - name: "SnapshotsConfig: OnPause=full, OnCommit=data", + name: "SnapshotsConfig: OnPause=Full, OnCommit=Data", mutate: func(at *ActorTemplate) { at.Spec.SnapshotsConfig.OnPause = SnapshotScopeFull at.Spec.SnapshotsConfig.OnCommit = SnapshotScopeData }, wantErr: false, }, { - name: "SnapshotsConfig: OnPause=data, OnCommit=data", + name: "SnapshotsConfig: OnPause=Data, OnCommit=Data", mutate: func(at *ActorTemplate) { at.Spec.SnapshotsConfig.OnPause = SnapshotScopeData at.Spec.SnapshotsConfig.OnCommit = SnapshotScopeData }, wantErr: false, }, { - name: "SnapshotsConfig: OnPause=data, OnCommit=full (invalid)", + name: "SnapshotsConfig: OnPause=Data, OnCommit=Full (invalid)", mutate: func(at *ActorTemplate) { at.Spec.SnapshotsConfig.OnPause = SnapshotScopeData at.Spec.SnapshotsConfig.OnCommit = SnapshotScopeFull }, wantErr: true, - errMsg: "OnCommit must be a subset of OnPause", + errMsg: "onCommit must be a subset of onPause", }, { - name: "SnapshotsConfig: OnPause=data, OnCommit unset (defaults to full, invalid)", + name: "SnapshotsConfig: OnPause=Data, OnCommit unset (defaults to Full, invalid)", mutate: func(at *ActorTemplate) { at.Spec.SnapshotsConfig.OnPause = SnapshotScopeData }, wantErr: true, - errMsg: "OnCommit must be a subset of OnPause", + errMsg: "onCommit must be a subset of onPause", }, { - name: "SnapshotsConfig: OnPause unset (defaults to full), OnCommit=data", + name: "SnapshotsConfig: OnPause unset (defaults to Full), OnCommit=Data", mutate: func(at *ActorTemplate) { at.Spec.SnapshotsConfig.OnCommit = SnapshotScopeData }, @@ -777,6 +777,39 @@ func TestActorTemplateValidation(t *testing.T) { } }, wantErr: false, + }, { + name: "Volumes: DurableDir MountPath with segment starting with '..' is valid", + mutate: func(at *ActorTemplate) { + at.Spec.Volumes = []Volume{ + {Name: "vol1", VolumeSource: VolumeSource{DurableDir: &DurableDirVolumeSource{}}}, + } + at.Spec.Containers[0].VolumeMounts = []VolumeMount{ + {Name: "vol1", MountPath: "/home/..config"}, + } + }, + wantErr: false, + }, { + name: "Volumes: DurableDir MountPath with embedded dots inside a segment is valid", + mutate: func(at *ActorTemplate) { + at.Spec.Volumes = []Volume{ + {Name: "vol1", VolumeSource: VolumeSource{DurableDir: &DurableDirVolumeSource{}}}, + } + at.Spec.Containers[0].VolumeMounts = []VolumeMount{ + {Name: "vol1", MountPath: "/home/x..y"}, + } + }, + wantErr: false, + }, { + name: "Volumes: DurableDir MountPath with spaces is valid", + mutate: func(at *ActorTemplate) { + at.Spec.Volumes = []Volume{ + {Name: "vol1", VolumeSource: VolumeSource{DurableDir: &DurableDirVolumeSource{}}}, + } + at.Spec.Containers[0].VolumeMounts = []VolumeMount{ + {Name: "vol1", MountPath: "/my home directory"}, + } + }, + wantErr: false, }, { name: "Volumes: DurableDir MountPath with NUL byte is invalid", mutate: func(at *ActorTemplate) { From b3e36ff499bb8d0c357995fa56387d3d757a2c66 Mon Sep 17 00:00:00 2001 From: dberkov Date: Fri, 26 Jun 2026 15:31:43 -0700 Subject: [PATCH 11/17] additional comments --- cmd/ateapi/internal/controlapi/converter.go | 3 +++ manifests/ate-install/generated/ate.dev_actortemplates.yaml | 2 ++ pkg/api/v1alpha1/actortemplate_types.go | 2 ++ 3 files changed, 7 insertions(+) diff --git a/cmd/ateapi/internal/controlapi/converter.go b/cmd/ateapi/internal/controlapi/converter.go index 46351097..b851aa42 100644 --- a/cmd/ateapi/internal/controlapi/converter.go +++ b/cmd/ateapi/internal/controlapi/converter.go @@ -15,6 +15,8 @@ package controlapi import ( + "log/slog" + "github.com/agent-substrate/substrate/internal/proto/ateletpb" atev1alpha1 "github.com/agent-substrate/substrate/pkg/api/v1alpha1" ) @@ -27,6 +29,7 @@ func toAteletSnapshotScope(in atev1alpha1.SnapshotScope) ateletpb.SnapshotScope case atev1alpha1.SnapshotScopeData: return ateletpb.SnapshotScope_SNAPSHOT_SCOPE_DATA default: + slog.Warn("unknown SnapshotScope; falling back to Full", "scope", string(in)) return ateletpb.SnapshotScope_SNAPSHOT_SCOPE_FULL } } diff --git a/manifests/ate-install/generated/ate.dev_actortemplates.yaml b/manifests/ate-install/generated/ate.dev_actortemplates.yaml index 5aac0173..def153ed 100644 --- a/manifests/ate-install/generated/ate.dev_actortemplates.yaml +++ b/manifests/ate-install/generated/ate.dev_actortemplates.yaml @@ -267,6 +267,7 @@ spec: minLength: 1 type: string onCommit: + default: Full description: |- OnCommit specifies what to include in the snapshot when a commit is requested. If not provided, the "Full" behavior is used by default. @@ -280,6 +281,7 @@ spec: - Data type: string onPause: + default: Full description: |- OnPause specifies what to include in the snapshot when the actor is paused. If not provided, the "Full" behavior is used by default. diff --git a/pkg/api/v1alpha1/actortemplate_types.go b/pkg/api/v1alpha1/actortemplate_types.go index 99ebd665..5e222886 100644 --- a/pkg/api/v1alpha1/actortemplate_types.go +++ b/pkg/api/v1alpha1/actortemplate_types.go @@ -243,6 +243,7 @@ type SnapshotsConfig struct { // If not provided, the "Full" behavior is used by default. // // +optional + // +kubebuilder:default=Full OnPause SnapshotScope `json:"onPause,omitempty"` // OnCommit specifies what to include in the snapshot when a commit is requested. @@ -254,6 +255,7 @@ type SnapshotsConfig struct { // - if onPause is "Data", then onCommit must be "Data". // // +optional + // +kubebuilder:default=Full OnCommit SnapshotScope `json:"onCommit,omitempty"` } From d9d3832e2dcaaf700a10e3308b931b9e323a67cd Mon Sep 17 00:00:00 2001 From: dberkov Date: Fri, 26 Jun 2026 15:41:24 -0700 Subject: [PATCH 12/17] fix yaml file --- demos/counter/counter.yaml.tmpl | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/demos/counter/counter.yaml.tmpl b/demos/counter/counter.yaml.tmpl index 5ca0edda..f1dc416b 100644 --- a/demos/counter/counter.yaml.tmpl +++ b/demos/counter/counter.yaml.tmpl @@ -54,8 +54,8 @@ spec: matchLabels: workload: counter snapshotsConfig: - onPause: full - onCommit: data + onPause: Full + onCommit: Data location: gs://${BUCKET_NAME}/ate-demo-counter/ volumes: - name: data From deb4506154327bf83cc07ff0f8e681ad4bf9b16c Mon Sep 17 00:00:00 2001 From: dberkov Date: Fri, 26 Jun 2026 16:01:33 -0700 Subject: [PATCH 13/17] test: skip TestDurableDirLifecycle unless E2E_TEMPLATE_NAME is set to counter --- internal/e2e/suites/demo/demo_test.go | 9 +++++++++ 1 file changed, 9 insertions(+) diff --git a/internal/e2e/suites/demo/demo_test.go b/internal/e2e/suites/demo/demo_test.go index f864a283..4b604cbf 100644 --- a/internal/e2e/suites/demo/demo_test.go +++ b/internal/e2e/suites/demo/demo_test.go @@ -84,6 +84,15 @@ func TestActorLifecycle(t *testing.T) { // 5. Suspend & Resume actor. // 6. Call to actor and validate memory and file counters. func TestDurableDirLifecycle(t *testing.T) { + // TODO(BenTheElder) remove it once https://github.com/agent-substrate/substrate/pull/313 is merged. + srcName := "counter" + if v := os.Getenv("E2E_TEMPLATE_NAME"); v != "" { + srcName = v + } + if srcName != "counter" { + t.Skip("Skipping TestDurableDirLifecycle because E2E_TEMPLATE_NAME is not set to \"counter\"") + } + tests := []struct { name string onCommit v1alpha1.SnapshotScope From 5eae7ebc5f8dcd88ebecefa6cd507774423b766a Mon Sep 17 00:00:00 2001 From: dberkov Date: Fri, 26 Jun 2026 16:21:07 -0700 Subject: [PATCH 14/17] test: adjust durable directory lifecycle expectations for microVM environment --- internal/e2e/suites/demo/demo_test.go | 39 +++++++++++++++++++-------- 1 file changed, 28 insertions(+), 11 deletions(-) diff --git a/internal/e2e/suites/demo/demo_test.go b/internal/e2e/suites/demo/demo_test.go index 4b604cbf..09b0fccf 100644 --- a/internal/e2e/suites/demo/demo_test.go +++ b/internal/e2e/suites/demo/demo_test.go @@ -84,13 +84,8 @@ func TestActorLifecycle(t *testing.T) { // 5. Suspend & Resume actor. // 6. Call to actor and validate memory and file counters. func TestDurableDirLifecycle(t *testing.T) { - // TODO(BenTheElder) remove it once https://github.com/agent-substrate/substrate/pull/313 is merged. - srcName := "counter" - if v := os.Getenv("E2E_TEMPLATE_NAME"); v != "" { - srcName = v - } - if srcName != "counter" { - t.Skip("Skipping TestDurableDirLifecycle because E2E_TEMPLATE_NAME is not set to \"counter\"") + if isMicroVMEnvironment() + t.Skip("Skipping TestDurableDirLifecycle for microVM environment") } tests := []struct { @@ -330,7 +325,12 @@ func pauseActor(ctx context.Context, t *testing.T, clients *e2e.Clients, nsObj * if err != nil { t.Fatalf("failed to call actor: %v", err) } - validateCounterResponse(t, resp, "after creation", 1, 1) + + if isMicroVMEnvironment() { + validateCounterResponse(t, resp, "after creation", 1, -1) + } else { + validateCounterResponse(t, resp, "after creation", 1, 1) + } // Pausing the actor t.Logf("Pausing Actor %q...", actorID) @@ -354,7 +354,11 @@ func pauseActor(ctx context.Context, t *testing.T, clients *e2e.Clients, nsObj * if err != nil { t.Fatalf("failed to call actor again: %v", err) } - validateCounterResponse(t, resp, "after pause", 2, 2) + if isMicroVMEnvironment() { + validateCounterResponse(t, resp, "after pause", 2, -1) + } else { + validateCounterResponse(t, resp, "after pause", 2, 2) + } // Suspending the actor before deletion t.Logf("Suspending Actor %q before deletion...", actorID) @@ -409,7 +413,11 @@ func suspendActor(ctx context.Context, t *testing.T, clients *e2e.Clients, nsObj if err != nil { t.Fatalf("failed to call actor: %v", err) } - validateCounterResponse(t, resp, "after creation", 1, 1) + if isMicroVMEnvironment() { + validateCounterResponse(t, resp, "after creation", 1, -1) + } else { + validateCounterResponse(t, resp, "after creation", 1, 1) + } // Suspending the actor t.Logf("Suspending Actor %q...", actorID) @@ -433,7 +441,11 @@ func suspendActor(ctx context.Context, t *testing.T, clients *e2e.Clients, nsObj if err != nil { t.Fatalf("failed to call actor again: %v", err) } - validateCounterResponse(t, resp, "after suspend", 2, 2) + if isMicroVMEnvironment() { + validateCounterResponse(t, resp, "after suspend", 2, -1) + } else { + validateCounterResponse(t, resp, "after suspend", 2, 2) + } // Suspending the actor before deletion t.Logf("Suspending Actor %q before deletion...", actorID) @@ -689,3 +701,8 @@ func callActor(t *testing.T, actorID string) (string, error) { return string(body), nil } + +func isMicroVMEnvironment() bool { + // TODO(BenTheElder) remove it once https://github.com/agent-substrate/substrate/pull/313 is merged. + return os.Getenv("E2E_TEMPLATE_NAMESPACE") == "ate-demo-counter-microvm" +} From 0398f4df1692e4ef82700065a661d4fa24ba8b1b Mon Sep 17 00:00:00 2001 From: dberkov Date: Fri, 26 Jun 2026 16:26:32 -0700 Subject: [PATCH 15/17] fix: add missing brace to if condition in TestDurableDirLifecycle --- internal/e2e/suites/demo/demo_test.go | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/internal/e2e/suites/demo/demo_test.go b/internal/e2e/suites/demo/demo_test.go index 09b0fccf..48130d2a 100644 --- a/internal/e2e/suites/demo/demo_test.go +++ b/internal/e2e/suites/demo/demo_test.go @@ -84,7 +84,7 @@ func TestActorLifecycle(t *testing.T) { // 5. Suspend & Resume actor. // 6. Call to actor and validate memory and file counters. func TestDurableDirLifecycle(t *testing.T) { - if isMicroVMEnvironment() + if isMicroVMEnvironment() { t.Skip("Skipping TestDurableDirLifecycle for microVM environment") } From 6ea40b3988fbbe3f6b5bde9a1fbd07c3aeaa0b6a Mon Sep 17 00:00:00 2001 From: dberkov Date: Fri, 26 Jun 2026 18:53:14 -0700 Subject: [PATCH 16/17] refactor: split WorkloadSpec builder into basic and environment-aware variants --- .../internal/controlapi/workflow_pause.go | 5 +- .../internal/controlapi/workflow_resume.go | 2 +- .../internal/controlapi/workflow_suspend.go | 5 +- .../internal/controlapi/workload_spec.go | 51 ++-- .../internal/controlapi/workload_spec_test.go | 272 ++++++++++-------- 5 files changed, 193 insertions(+), 142 deletions(-) diff --git a/cmd/ateapi/internal/controlapi/workflow_pause.go b/cmd/ateapi/internal/controlapi/workflow_pause.go index c1137bb0..b507b15d 100644 --- a/cmd/ateapi/internal/controlapi/workflow_pause.go +++ b/cmd/ateapi/internal/controlapi/workflow_pause.go @@ -114,10 +114,7 @@ func (s *CallAteletPauseStep) Execute(ctx context.Context, input *PauseInput, st } client := ateletpb.NewAteomHerderClient(ateletConn) - workloadSpec, err := workloadSpecFromActorTemplate(ctx, nil, nil, state.ActorTemplate) - if err != nil { - return err - } + workloadSpec := workloadSpecFromActorTemplate(state.ActorTemplate) // Checkpoint does not carry the sandbox config: atelet uses the version the // actor is currently running (recorded on-node at Run/Restore) and pins it diff --git a/cmd/ateapi/internal/controlapi/workflow_resume.go b/cmd/ateapi/internal/controlapi/workflow_resume.go index 49ca8161..aa2507b7 100644 --- a/cmd/ateapi/internal/controlapi/workflow_resume.go +++ b/cmd/ateapi/internal/controlapi/workflow_resume.go @@ -252,7 +252,7 @@ func (s *CallAteletRestoreStep) Execute(ctx context.Context, input *ResumeInput, } client := ateletpb.NewAteomHerderClient(ateletConn) - workloadSpec, err := workloadSpecFromActorTemplate(ctx, s.kubeClient, s.secretCache, state.ActorTemplate) + workloadSpec, err := workloadSpecFromActorTemplateWithEnv(ctx, s.kubeClient, s.secretCache, state.ActorTemplate) if err != nil { return err } diff --git a/cmd/ateapi/internal/controlapi/workflow_suspend.go b/cmd/ateapi/internal/controlapi/workflow_suspend.go index 50a77734..a87cb7d3 100644 --- a/cmd/ateapi/internal/controlapi/workflow_suspend.go +++ b/cmd/ateapi/internal/controlapi/workflow_suspend.go @@ -116,10 +116,7 @@ func (s *CallAteletSuspendStep) Execute(ctx context.Context, input *SuspendInput } client := ateletpb.NewAteomHerderClient(ateletConn) - workloadSpec, err := workloadSpecFromActorTemplate(ctx, nil, nil, state.ActorTemplate) - if err != nil { - return err - } + workloadSpec := workloadSpecFromActorTemplate(state.ActorTemplate) // Checkpoint does not carry the sandbox config: atelet uses the version the // actor is currently running (recorded on-node at Run/Restore) and pins it diff --git a/cmd/ateapi/internal/controlapi/workload_spec.go b/cmd/ateapi/internal/controlapi/workload_spec.go index c28a579b..a0d0a806 100644 --- a/cmd/ateapi/internal/controlapi/workload_spec.go +++ b/cmd/ateapi/internal/controlapi/workload_spec.go @@ -32,7 +32,10 @@ import ( const envSecretCacheTTL = 30 * time.Second -func workloadSpecFromActorTemplate(ctx context.Context, kubeClient kubernetes.Interface, secretCache *envSecretCache, actorTemplate *atev1alpha1.ActorTemplate) (*ateletpb.WorkloadSpec, error) { +// workloadSpecFromActorTemplate builds a WorkloadSpec without resolving +// container env vars. Use this when downstream consumers (e.g. checkpoint +// requests) don't need env entries materialized. +func workloadSpecFromActorTemplate(actorTemplate *atev1alpha1.ActorTemplate) *ateletpb.WorkloadSpec { workloadSpec := &ateletpb.WorkloadSpec{ PauseImage: actorTemplate.Spec.PauseImage, } @@ -41,23 +44,16 @@ func workloadSpecFromActorTemplate(ctx context.Context, kubeClient kubernetes.In for _, vol := range actorTemplate.Spec.Volumes { // volume is durable-dir type if vol.VolumeSource.DurableDir != nil { - ateletVol := &ateletpb.Volume{ + workloadSpec.Volumes = append(workloadSpec.Volumes, &ateletpb.Volume{ Name: vol.Name, Type: ateletpb.VolumeType_VOLUME_TYPE_DURABLE_DIR, Source: &ateletpb.Volume_DurableDir{ DurableDir: &ateletpb.DurableDirVolume{}, }, - } - workloadSpec.Volumes = append(workloadSpec.Volumes, ateletVol) + }) } } - resolver := envResolver{ - kubeClient: kubeClient, - namespace: actorTemplate.Namespace, - cache: secretCache, - } - for _, ctr := range actorTemplate.Spec.Containers { ateletCtr := &ateletpb.Container{ Name: ctr.Name, @@ -65,23 +61,40 @@ func workloadSpecFromActorTemplate(ctx context.Context, kubeClient kubernetes.In Command: ctr.Command, Readyz: toAteletReadyz(ctr.Readyz), } + for _, mount := range ctr.VolumeMounts { + ateletCtr.VolumeMounts = append(ateletCtr.VolumeMounts, &ateletpb.VolumeMount{ + Name: mount.Name, + MountPath: mount.MountPath, + }) + } + workloadSpec.Containers = append(workloadSpec.Containers, ateletCtr) + } + + return workloadSpec +} + +// workloadSpecFromActorTemplateWithEnv builds a WorkloadSpec and resolves each +// container's env vars against the cluster. kubeClient must be non-nil; +// secretCache is optional and, when supplied, deduplicates Secret reads. +func workloadSpecFromActorTemplateWithEnv(ctx context.Context, kubeClient kubernetes.Interface, secretCache *envSecretCache, actorTemplate *atev1alpha1.ActorTemplate) (*ateletpb.WorkloadSpec, error) { + workloadSpec := workloadSpecFromActorTemplate(actorTemplate) + + resolver := envResolver{ + kubeClient: kubeClient, + namespace: actorTemplate.Namespace, + cache: secretCache, + } + + for i, ctr := range actorTemplate.Spec.Containers { for _, env := range ctr.Env { ateletEnv, err := resolver.resolve(ctx, ctr.Name, env) if err != nil { return nil, err } if ateletEnv != nil { - ateletCtr.Env = append(ateletCtr.Env, ateletEnv) + workloadSpec.Containers[i].Env = append(workloadSpec.Containers[i].Env, ateletEnv) } } - for _, mount := range ctr.VolumeMounts { - ateletCtr.VolumeMounts = append(ateletCtr.VolumeMounts, &ateletpb.VolumeMount{ - Name: mount.Name, - MountPath: mount.MountPath, - }) - } - - workloadSpec.Containers = append(workloadSpec.Containers, ateletCtr) } return workloadSpec, nil diff --git a/cmd/ateapi/internal/controlapi/workload_spec_test.go b/cmd/ateapi/internal/controlapi/workload_spec_test.go index a19dedc2..523a6e2b 100644 --- a/cmd/ateapi/internal/controlapi/workload_spec_test.go +++ b/cmd/ateapi/internal/controlapi/workload_spec_test.go @@ -32,6 +32,155 @@ import ( ) func TestWorkloadSpecFromActorTemplate(t *testing.T) { + tests := []struct { + name string + template *atev1alpha1.ActorTemplate + want *ateletpb.WorkloadSpec + }{ + { + name: "converts DurableDir volume and mounts", + template: &atev1alpha1.ActorTemplate{ + ObjectMeta: metav1.ObjectMeta{Name: "tmpl1", Namespace: "agent-ns"}, + Spec: atev1alpha1.ActorTemplateSpec{ + PauseImage: "pause", + Volumes: []atev1alpha1.Volume{ + {Name: "home", VolumeSource: atev1alpha1.VolumeSource{DurableDir: &atev1alpha1.DurableDirVolumeSource{}}}, + }, + Containers: []atev1alpha1.Container{ + { + Name: "main", + Image: "main", + VolumeMounts: []atev1alpha1.VolumeMount{ + {Name: "home", MountPath: "/home/user"}, + {Name: "home", MountPath: "/workspace"}, + }, + }, + }, + }, + }, + want: &ateletpb.WorkloadSpec{ + PauseImage: "pause", + Volumes: []*ateletpb.Volume{ + { + Name: "home", + Type: ateletpb.VolumeType_VOLUME_TYPE_DURABLE_DIR, + Source: &ateletpb.Volume_DurableDir{DurableDir: &ateletpb.DurableDirVolume{}}, + }, + }, + Containers: []*ateletpb.Container{ + { + Name: "main", + Image: "main", + VolumeMounts: []*ateletpb.VolumeMount{ + {Name: "home", MountPath: "/home/user"}, + {Name: "home", MountPath: "/workspace"}, + }, + }, + }, + }, + }, + { + name: "skips non-DurableDir volumes", + template: &atev1alpha1.ActorTemplate{ + ObjectMeta: metav1.ObjectMeta{Name: "tmpl1", Namespace: "agent-ns"}, + Spec: atev1alpha1.ActorTemplateSpec{ + Volumes: []atev1alpha1.Volume{ + {Name: "unsupported", VolumeSource: atev1alpha1.VolumeSource{}}, + {Name: "home", VolumeSource: atev1alpha1.VolumeSource{DurableDir: &atev1alpha1.DurableDirVolumeSource{}}}, + }, + Containers: []atev1alpha1.Container{ + { + Name: "main", + Image: "main", + VolumeMounts: []atev1alpha1.VolumeMount{ + {Name: "home", MountPath: "/workspace"}, + }, + }, + }, + }, + }, + want: &ateletpb.WorkloadSpec{ + Volumes: []*ateletpb.Volume{ + { + Name: "home", + Type: ateletpb.VolumeType_VOLUME_TYPE_DURABLE_DIR, + Source: &ateletpb.Volume_DurableDir{DurableDir: &ateletpb.DurableDirVolume{}}, + }, + }, + Containers: []*ateletpb.Container{ + { + Name: "main", + Image: "main", + VolumeMounts: []*ateletpb.VolumeMount{ + {Name: "home", MountPath: "/workspace"}, + }, + }, + }, + }, + }, + { + name: "container without volume mounts has none", + template: &atev1alpha1.ActorTemplate{ + ObjectMeta: metav1.ObjectMeta{Name: "tmpl1", Namespace: "agent-ns"}, + Spec: atev1alpha1.ActorTemplateSpec{ + Volumes: []atev1alpha1.Volume{ + {Name: "home", VolumeSource: atev1alpha1.VolumeSource{DurableDir: &atev1alpha1.DurableDirVolumeSource{}}}, + }, + Containers: []atev1alpha1.Container{ + {Name: "main", Image: "main"}, + }, + }, + }, + want: &ateletpb.WorkloadSpec{ + Volumes: []*ateletpb.Volume{ + { + Name: "home", + Type: ateletpb.VolumeType_VOLUME_TYPE_DURABLE_DIR, + Source: &ateletpb.Volume_DurableDir{DurableDir: &ateletpb.DurableDirVolume{}}, + }, + }, + Containers: []*ateletpb.Container{{Name: "main", Image: "main"}}, + }, + }, + { + name: "ignores container env", + template: &atev1alpha1.ActorTemplate{ + ObjectMeta: metav1.ObjectMeta{Name: "tmpl1", Namespace: "agent-ns"}, + Spec: atev1alpha1.ActorTemplateSpec{ + Containers: []atev1alpha1.Container{ + { + Name: "main", + Image: "main", + Env: []atev1alpha1.EnvVar{ + {Name: "LITERAL", Value: ptr.To("plain")}, + { + Name: "SECRET", + ValueFrom: &atev1alpha1.EnvVarSource{ + SecretKeyRef: &atev1alpha1.SecretKeySelector{Name: "any", Key: "any"}, + }, + }, + }, + }, + }, + }, + }, + want: &ateletpb.WorkloadSpec{ + Containers: []*ateletpb.Container{{Name: "main", Image: "main"}}, + }, + }, + } + + for _, tt := range tests { + t.Run(tt.name, func(t *testing.T) { + got := workloadSpecFromActorTemplate(tt.template) + if diff := cmp.Diff(tt.want, got, protocmp.Transform()); diff != "" { + t.Errorf("WorkloadSpec mismatch (-want +got):\n%s", diff) + } + }) + } +} + +func TestWorkloadSpecFromActorTemplateWithEnv(t *testing.T) { tests := []struct { name string secrets []runtime.Object @@ -216,117 +365,12 @@ func TestWorkloadSpecFromActorTemplateOptionalSecretKeyRefSkipsMissingSecret(t * }, wantErrCode: codes.FailedPrecondition, }, - { - name: "converts DurableDir volume and mounts", - template: &atev1alpha1.ActorTemplate{ - ObjectMeta: metav1.ObjectMeta{Name: "tmpl1", Namespace: "agent-ns"}, - Spec: atev1alpha1.ActorTemplateSpec{ - PauseImage: "pause", - Volumes: []atev1alpha1.Volume{ - {Name: "home", VolumeSource: atev1alpha1.VolumeSource{DurableDir: &atev1alpha1.DurableDirVolumeSource{}}}, - }, - Containers: []atev1alpha1.Container{ - { - Name: "main", - Image: "main", - VolumeMounts: []atev1alpha1.VolumeMount{ - {Name: "home", MountPath: "/home/user"}, - {Name: "home", MountPath: "/workspace"}, - }, - }, - }, - }, - }, - want: &ateletpb.WorkloadSpec{ - PauseImage: "pause", - Volumes: []*ateletpb.Volume{ - { - Name: "home", - Type: ateletpb.VolumeType_VOLUME_TYPE_DURABLE_DIR, - Source: &ateletpb.Volume_DurableDir{DurableDir: &ateletpb.DurableDirVolume{}}, - }, - }, - Containers: []*ateletpb.Container{ - { - Name: "main", - Image: "main", - VolumeMounts: []*ateletpb.VolumeMount{ - {Name: "home", MountPath: "/home/user"}, - {Name: "home", MountPath: "/workspace"}, - }, - }, - }, - }, - }, - { - name: "skips non-DurableDir volumes", - template: &atev1alpha1.ActorTemplate{ - ObjectMeta: metav1.ObjectMeta{Name: "tmpl1", Namespace: "agent-ns"}, - Spec: atev1alpha1.ActorTemplateSpec{ - Volumes: []atev1alpha1.Volume{ - {Name: "unsupported", VolumeSource: atev1alpha1.VolumeSource{}}, - {Name: "home", VolumeSource: atev1alpha1.VolumeSource{DurableDir: &atev1alpha1.DurableDirVolumeSource{}}}, - }, - Containers: []atev1alpha1.Container{ - { - Name: "main", - Image: "main", - VolumeMounts: []atev1alpha1.VolumeMount{ - {Name: "home", MountPath: "/workspace"}, - }, - }, - }, - }, - }, - want: &ateletpb.WorkloadSpec{ - Volumes: []*ateletpb.Volume{ - { - Name: "home", - Type: ateletpb.VolumeType_VOLUME_TYPE_DURABLE_DIR, - Source: &ateletpb.Volume_DurableDir{DurableDir: &ateletpb.DurableDirVolume{}}, - }, - }, - Containers: []*ateletpb.Container{ - { - Name: "main", - Image: "main", - VolumeMounts: []*ateletpb.VolumeMount{ - {Name: "home", MountPath: "/workspace"}, - }, - }, - }, - }, - }, - { - name: "container without volume mounts has none", - template: &atev1alpha1.ActorTemplate{ - ObjectMeta: metav1.ObjectMeta{Name: "tmpl1", Namespace: "agent-ns"}, - Spec: atev1alpha1.ActorTemplateSpec{ - Volumes: []atev1alpha1.Volume{ - {Name: "home", VolumeSource: atev1alpha1.VolumeSource{DurableDir: &atev1alpha1.DurableDirVolumeSource{}}}, - }, - Containers: []atev1alpha1.Container{ - {Name: "main", Image: "main"}, - }, - }, - }, - want: &ateletpb.WorkloadSpec{ - Volumes: []*ateletpb.Volume{ - { - Name: "home", - Type: ateletpb.VolumeType_VOLUME_TYPE_DURABLE_DIR, - Source: &ateletpb.Volume_DurableDir{DurableDir: &ateletpb.DurableDirVolume{}}, - }, - }, - Containers: []*ateletpb.Container{{Name: "main", Image: "main"}}, - }, - }, } for _, tt := range tests { t.Run(tt.name, func(t *testing.T) { kubeClient := fake.NewSimpleClientset(tt.secrets...) - got, err := workloadSpecFromActorTemplate(context.Background(), kubeClient, nil, tt.template) + got, err := workloadSpecFromActorTemplateWithEnv(context.Background(), kubeClient, nil, tt.template) if tt.wantErrCode != codes.OK { if status.Code(err) != tt.wantErrCode { t.Fatalf("error code = %v, want %v: %v", status.Code(err), tt.wantErrCode, err) @@ -334,7 +378,7 @@ func TestWorkloadSpecFromActorTemplateOptionalSecretKeyRefSkipsMissingSecret(t * return } if err != nil { - t.Fatalf("workloadSpecFromActorTemplate failed: %v", err) + t.Fatalf("workloadSpecFromActorTemplateWithEnv failed: %v", err) } if diff := cmp.Diff(tt.want, got, protocmp.Transform()); diff != "" { t.Errorf("WorkloadSpec mismatch (-want +got):\n%s", diff) @@ -343,7 +387,7 @@ func TestWorkloadSpecFromActorTemplateOptionalSecretKeyRefSkipsMissingSecret(t * } } -func TestWorkloadSpecFromActorTemplateCachesSecretsAcrossCalls(t *testing.T) { +func TestWorkloadSpecFromActorTemplateWithEnvCachesSecretsAcrossCalls(t *testing.T) { ctx := context.Background() secretCache := newEnvSecretCache(envSecretCacheTTL) kubeClient := fake.NewSimpleClientset( @@ -383,19 +427,19 @@ func TestWorkloadSpecFromActorTemplateCachesSecretsAcrossCalls(t *testing.T) { }, } - if _, err := workloadSpecFromActorTemplate(ctx, kubeClient, secretCache, actorTemplate); err != nil { - t.Fatalf("first workloadSpecFromActorTemplate failed: %v", err) + if _, err := workloadSpecFromActorTemplateWithEnv(ctx, kubeClient, secretCache, actorTemplate); err != nil { + t.Fatalf("first workloadSpecFromActorTemplateWithEnv failed: %v", err) } - if _, err := workloadSpecFromActorTemplate(ctx, kubeClient, secretCache, actorTemplate); err != nil { - t.Fatalf("second workloadSpecFromActorTemplate failed: %v", err) + if _, err := workloadSpecFromActorTemplateWithEnv(ctx, kubeClient, secretCache, actorTemplate); err != nil { + t.Fatalf("second workloadSpecFromActorTemplateWithEnv failed: %v", err) } if got := secretGetCount(kubeClient); got != 1 { t.Fatalf("secret gets before TTL expiry = %d, want 1", got) } expireSecretCache(secretCache) - if _, err := workloadSpecFromActorTemplate(ctx, kubeClient, secretCache, actorTemplate); err != nil { - t.Fatalf("third workloadSpecFromActorTemplate failed: %v", err) + if _, err := workloadSpecFromActorTemplateWithEnv(ctx, kubeClient, secretCache, actorTemplate); err != nil { + t.Fatalf("third workloadSpecFromActorTemplateWithEnv failed: %v", err) } if got := secretGetCount(kubeClient); got != 2 { t.Fatalf("secret gets after TTL expiry = %d, want 2", got) From 222cbf1effef719714739906cb578f247d76bf7a Mon Sep 17 00:00:00 2001 From: dberkov Date: Fri, 26 Jun 2026 19:54:05 -0700 Subject: [PATCH 17/17] fix mistakes happened during rebase --- .../internal/controlapi/workload_spec_test.go | 106 +++++++----------- .../generated/ate.dev_actortemplates.yaml | 71 ++++++------ pkg/api/v1alpha1/actortemplate_types.go | 12 +- pkg/api/v1alpha1/zz_generated.deepcopy.go | 12 +- 4 files changed, 87 insertions(+), 114 deletions(-) diff --git a/cmd/ateapi/internal/controlapi/workload_spec_test.go b/cmd/ateapi/internal/controlapi/workload_spec_test.go index 523a6e2b..24918f17 100644 --- a/cmd/ateapi/internal/controlapi/workload_spec_test.go +++ b/cmd/ateapi/internal/controlapi/workload_spec_test.go @@ -233,78 +233,12 @@ func TestWorkloadSpecFromActorTemplateWithEnv(t *testing.T) { }, }, }, -<<<<<<< HEAD - } - if diff := cmp.Diff(want, got, protocmp.Transform()); diff != "" { - t.Errorf("WorkloadSpec mismatch (-want +got):\n%s", diff) - } -} - -func TestWorkloadSpecFromActorTemplatePropagatesReadyz(t *testing.T) { - ctx := context.Background() - got, err := workloadSpecFromActorTemplate(ctx, fake.NewSimpleClientset(), nil, &atev1alpha1.ActorTemplate{ - ObjectMeta: metav1.ObjectMeta{Name: "tmpl-readyz", Namespace: "agent-ns"}, - Spec: atev1alpha1.ActorTemplateSpec{ - Containers: []atev1alpha1.Container{ - { - Name: "with-probe", - Image: "main", - Readyz: &atev1alpha1.ContainerReadyz{ - HTTPGet: &atev1alpha1.HTTPGetAction{Path: "/health", Port: 8080}, - }, - }, - { - Name: "without-probe", - Image: "side", - }, - }, - }, - }) - if err != nil { - t.Fatalf("workloadSpecFromActorTemplate failed: %v", err) - } - - want := &ateletpb.WorkloadSpec{ - Containers: []*ateletpb.Container{ - { - Name: "with-probe", - Image: "main", - Readyz: &ateletpb.Readyz{ - HttpGet: &ateletpb.HTTPGetAction{Path: "/health", Port: 8080}, - }, - }, - { - Name: "without-probe", - Image: "side", - }, - }, - } - if diff := cmp.Diff(want, got, protocmp.Transform()); diff != "" { - t.Errorf("WorkloadSpec mismatch (-want +got):\n%s", diff) - } -} - -func TestWorkloadSpecFromActorTemplateOptionalSecretKeyRefSkipsMissingSecret(t *testing.T) { - optional := true - got, err := workloadSpecFromActorTemplate(context.Background(), fake.NewSimpleClientset(), nil, &atev1alpha1.ActorTemplate{ - ObjectMeta: metav1.ObjectMeta{ - Name: "tmpl1", - Namespace: "agent-ns", - }, - Spec: atev1alpha1.ActorTemplateSpec{ - Containers: []atev1alpha1.Container{ - { - Name: "main", - Image: "main", - Env: []atev1alpha1.EnvVar{ -======= { name: "skips optional missing secret", template: &atev1alpha1.ActorTemplate{ ObjectMeta: metav1.ObjectMeta{Name: "tmpl1", Namespace: "agent-ns"}, Spec: atev1alpha1.ActorTemplateSpec{ Containers: []atev1alpha1.Container{ ->>>>>>> 3962a2c (implement suspend/resume with homedir support) { Name: "main", Image: "main", @@ -387,6 +321,46 @@ func TestWorkloadSpecFromActorTemplateOptionalSecretKeyRefSkipsMissingSecret(t * } } +func TestWorkloadSpecFromActorTemplatePropagatesReadyz(t *testing.T) { + got := workloadSpecFromActorTemplate(&atev1alpha1.ActorTemplate{ + ObjectMeta: metav1.ObjectMeta{Name: "tmpl-readyz", Namespace: "agent-ns"}, + Spec: atev1alpha1.ActorTemplateSpec{ + Containers: []atev1alpha1.Container{ + { + Name: "with-probe", + Image: "main", + Readyz: &atev1alpha1.ContainerReadyz{ + HTTPGet: &atev1alpha1.HTTPGetAction{Path: "/health", Port: 8080}, + }, + }, + { + Name: "without-probe", + Image: "side", + }, + }, + }, + }) + + want := &ateletpb.WorkloadSpec{ + Containers: []*ateletpb.Container{ + { + Name: "with-probe", + Image: "main", + Readyz: &ateletpb.Readyz{ + HttpGet: &ateletpb.HTTPGetAction{Path: "/health", Port: 8080}, + }, + }, + { + Name: "without-probe", + Image: "side", + }, + }, + } + if diff := cmp.Diff(want, got, protocmp.Transform()); diff != "" { + t.Errorf("WorkloadSpec mismatch (-want +got):\n%s", diff) + } +} + func TestWorkloadSpecFromActorTemplateWithEnvCachesSecretsAcrossCalls(t *testing.T) { ctx := context.Background() secretCache := newEnvSecretCache(envSecretCacheTTL) diff --git a/manifests/ate-install/generated/ate.dev_actortemplates.yaml b/manifests/ate-install/generated/ate.dev_actortemplates.yaml index def153ed..dfc94dca 100644 --- a/manifests/ate-install/generated/ate.dev_actortemplates.yaml +++ b/manifests/ate-install/generated/ate.dev_actortemplates.yaml @@ -179,48 +179,47 @@ spec: maximum: 65535 minimum: 1 type: integer - volumeMounts: - description: volumeMounts define the volumes to mount - into this container. - items: - description: VolumeMount describes a mounting of a - Volume within a actor. - properties: - mountPath: - description: |- - Path within the actor at which the volume should be mounted. Must be a - clean absolute Unix path: must start with '/', not be '/', and contain - no ':', '..', '.', '//', trailing '/', or control characters. - maxLength: 4096 - type: string - x-kubernetes-validations: - - message: 'MountPath must be a clean absolute - Unix path: must start with ''/'', not be ''/'', - and contain no '':'', ''..'', ''.'', ''//'', - trailing ''/'', or control characters' - rule: self.startsWith('/') && size(self) > 1 - && !self.endsWith('/') && !self.contains('//') - && !self.contains(':') && !self.matches('[\x00-\x1f\x7f]') - && !self.matches('(^|/)[.][.]?(/|$)') - name: - description: This must match the Name of a Volume. - maxLength: 63 - type: string - x-kubernetes-validations: - - message: Name must be a valid DNS label - rule: '!format.dns1123Label().validate(self).hasValue()' - required: - - mountPath - - name - type: object - maxItems: 32 - type: array required: - port type: object required: - httpGet type: object + volumeMounts: + description: volumeMounts define the volumes to mount into this + container. + items: + description: VolumeMount describes a mounting of a Volume + within a actor. + properties: + mountPath: + description: |- + Path within the actor at which the volume should be mounted. Must be a + clean absolute Unix path: must start with '/', not be '/', and contain + no ':', '..', '.', '//', trailing '/', or control characters. + maxLength: 4096 + type: string + x-kubernetes-validations: + - message: 'MountPath must be a clean absolute Unix path: + must start with ''/'', not be ''/'', and contain no + '':'', ''..'', ''.'', ''//'', trailing ''/'', or control + characters' + rule: self.startsWith('/') && size(self) > 1 && !self.endsWith('/') + && !self.contains('//') && !self.contains(':') && + !self.matches('[\x00-\x1f\x7f]') && !self.matches('(^|/)[.][.]?(/|$)') + name: + description: This must match the Name of a Volume. + maxLength: 63 + type: string + x-kubernetes-validations: + - message: Name must be a valid DNS label + rule: '!format.dns1123Label().validate(self).hasValue()' + required: + - mountPath + - name + type: object + maxItems: 32 + type: array required: - image - name diff --git a/pkg/api/v1alpha1/actortemplate_types.go b/pkg/api/v1alpha1/actortemplate_types.go index 5e222886..192a5932 100644 --- a/pkg/api/v1alpha1/actortemplate_types.go +++ b/pkg/api/v1alpha1/actortemplate_types.go @@ -111,6 +111,12 @@ type Container struct { // // +optional Readyz *ContainerReadyz `json:"readyz,omitempty"` + + // volumeMounts define the volumes to mount into this container. + // + // +optional + // +kubebuilder:validation:MaxItems=32 + VolumeMounts []VolumeMount `json:"volumeMounts,omitempty"` } // ContainerReadyz configures the readiness signal for a container. @@ -142,12 +148,6 @@ type HTTPGetAction struct { // +kubebuilder:validation:Minimum=1 // +kubebuilder:validation:Maximum=65535 Port int32 `json:"port"` - - // volumeMounts define the volumes to mount into this container. - // - // +optional - // +kubebuilder:validation:MaxItems=32 - VolumeMounts []VolumeMount `json:"volumeMounts,omitempty"` } // EnvVar represents an environment variable supplied to a container in an diff --git a/pkg/api/v1alpha1/zz_generated.deepcopy.go b/pkg/api/v1alpha1/zz_generated.deepcopy.go index f6a678ca..e977908e 100644 --- a/pkg/api/v1alpha1/zz_generated.deepcopy.go +++ b/pkg/api/v1alpha1/zz_generated.deepcopy.go @@ -176,6 +176,11 @@ func (in *Container) DeepCopyInto(out *Container) { *out = new(ContainerReadyz) (*in).DeepCopyInto(*out) } + if in.VolumeMounts != nil { + in, out := &in.VolumeMounts, &out.VolumeMounts + *out = make([]VolumeMount, len(*in)) + copy(*out, *in) + } } // DeepCopy is an autogenerated deepcopy function, copying the receiver, creating a new Container. @@ -194,7 +199,7 @@ func (in *ContainerReadyz) DeepCopyInto(out *ContainerReadyz) { if in.HTTPGet != nil { in, out := &in.HTTPGet, &out.HTTPGet *out = new(HTTPGetAction) - (*in).DeepCopyInto(*out) + **out = **in } } @@ -271,11 +276,6 @@ func (in *EnvVarSource) DeepCopy() *EnvVarSource { // DeepCopyInto is an autogenerated deepcopy function, copying the receiver, writing into out. in must be non-nil. func (in *HTTPGetAction) DeepCopyInto(out *HTTPGetAction) { *out = *in - if in.VolumeMounts != nil { - in, out := &in.VolumeMounts, &out.VolumeMounts - *out = make([]VolumeMount, len(*in)) - copy(*out, *in) - } } // DeepCopy is an autogenerated deepcopy function, copying the receiver, creating a new HTTPGetAction.