From a69748e183293658d2e7409b15810332d8ec3c22 Mon Sep 17 00:00:00 2001 From: Harsh Rawat Date: Sat, 7 Mar 2026 16:17:30 +0530 Subject: [PATCH 1/6] added `containerd-shim-lcow-v1` shim draft In this commit, we are adding the initial draft of `containerd-shim-lcow-v1`. This shim is used for running Linux Containers on Windows. As part of this commit, we are adding the following- - The main entrypoint and supporting logic to start + serve the shim server. - The stubs for Task Service and Shimdiag service. - The implementation for `Sandbox` service. - The implementation for `VM Controller` which manages the VM lifecycle and operations. Signed-off-by: Harsh Rawat --- .../containerd-shim-lcow-v1.exe.manifest | 17 + cmd/containerd-shim-lcow-v1/main.go | 147 ++++++ cmd/containerd-shim-lcow-v1/manager.go | 297 ++++++++++++ cmd/containerd-shim-lcow-v1/manager_test.go | 46 ++ .../resource_windows_386.syso | Bin 0 -> 1526 bytes .../resource_windows_amd64.syso | Bin 0 -> 1526 bytes .../resource_windows_arm.syso | Bin 0 -> 1526 bytes .../resource_windows_arm64.syso | Bin 0 -> 1526 bytes .../service/plugin/plugin.go | 79 ++++ .../service/service.go | 116 +++++ .../service/service_sandbox.go | 169 +++++++ .../service/service_sandbox_internal.go | 311 +++++++++++++ .../service/service_shimdiag.go | 92 ++++ .../service/service_shimdiag_internal.go | 50 ++ .../service/service_task.go | 321 +++++++++++++ .../service/service_task_internal.go | 113 +++++ cmd/containerd-shim-lcow-v1/versioninfo.json | 44 ++ internal/controller/vm/doc.go | 51 +++ internal/controller/vm/interface.go | 116 +++++ internal/controller/vm/status.go | 79 ++++ internal/controller/vm/vm.go | 430 ++++++++++++++++++ internal/controller/vm/vm_lcow.go | 96 ++++ internal/controller/vm/vm_wcow.go | 112 +++++ 23 files changed, 2686 insertions(+) create mode 100644 cmd/containerd-shim-lcow-v1/containerd-shim-lcow-v1.exe.manifest create mode 100644 cmd/containerd-shim-lcow-v1/main.go create mode 100644 cmd/containerd-shim-lcow-v1/manager.go create mode 100644 cmd/containerd-shim-lcow-v1/manager_test.go create mode 100644 cmd/containerd-shim-lcow-v1/resource_windows_386.syso create mode 100644 cmd/containerd-shim-lcow-v1/resource_windows_amd64.syso create mode 100644 cmd/containerd-shim-lcow-v1/resource_windows_arm.syso create mode 100644 cmd/containerd-shim-lcow-v1/resource_windows_arm64.syso create mode 100644 cmd/containerd-shim-lcow-v1/service/plugin/plugin.go create mode 100644 cmd/containerd-shim-lcow-v1/service/service.go create mode 100644 cmd/containerd-shim-lcow-v1/service/service_sandbox.go create mode 100644 cmd/containerd-shim-lcow-v1/service/service_sandbox_internal.go create mode 100644 cmd/containerd-shim-lcow-v1/service/service_shimdiag.go create mode 100644 cmd/containerd-shim-lcow-v1/service/service_shimdiag_internal.go create mode 100644 cmd/containerd-shim-lcow-v1/service/service_task.go create mode 100644 cmd/containerd-shim-lcow-v1/service/service_task_internal.go create mode 100644 cmd/containerd-shim-lcow-v1/versioninfo.json create mode 100644 internal/controller/vm/doc.go create mode 100644 internal/controller/vm/interface.go create mode 100644 internal/controller/vm/status.go create mode 100644 internal/controller/vm/vm.go create mode 100644 internal/controller/vm/vm_lcow.go create mode 100644 internal/controller/vm/vm_wcow.go diff --git a/cmd/containerd-shim-lcow-v1/containerd-shim-lcow-v1.exe.manifest b/cmd/containerd-shim-lcow-v1/containerd-shim-lcow-v1.exe.manifest new file mode 100644 index 0000000000..ce573f2b4f --- /dev/null +++ b/cmd/containerd-shim-lcow-v1/containerd-shim-lcow-v1.exe.manifest @@ -0,0 +1,17 @@ + + + containerd-shim-lcow-v1 + + + + + + + + + + true + + + + diff --git a/cmd/containerd-shim-lcow-v1/main.go b/cmd/containerd-shim-lcow-v1/main.go new file mode 100644 index 0000000000..097c6f0e2b --- /dev/null +++ b/cmd/containerd-shim-lcow-v1/main.go @@ -0,0 +1,147 @@ +//go:build windows + +// containerd-shim-lcow-v1 is a containerd shim implementation for Linux Containers on Windows (LCOW). +package main + +import ( + "context" + "errors" + "fmt" + "io" + "os" + + "github.com/Microsoft/hcsshim/cmd/containerd-shim-lcow-v1/service/plugin" + runhcsopts "github.com/Microsoft/hcsshim/cmd/containerd-shim-runhcs-v1/options" + "github.com/Microsoft/hcsshim/internal/log" + "github.com/Microsoft/hcsshim/internal/oc" + "github.com/Microsoft/hcsshim/internal/shim" + hcsversion "github.com/Microsoft/hcsshim/internal/version" + "github.com/containerd/errdefs" + + "github.com/Microsoft/go-winio/pkg/etw" + "github.com/Microsoft/go-winio/pkg/etwlogrus" + "github.com/sirupsen/logrus" + "go.opencensus.io/trace" +) + +const ( + // name is the name of lcow shim implementation. + name = "containerd-shim-lcow-v1" + // etwProviderName is the ETW provider name for lcow shim. + etwProviderName = "Microsoft.Virtualization.RunHCSLCOW" +) + +// Add a manifest to get proper Windows version detection. +//go:generate go tool github.com/josephspurrier/goversioninfo/cmd/goversioninfo -platform-specific + +// `-ldflags '-X ...'` only works if the variable is uninitialized or set to a constant value. +// keep empty and override with data from [internal/version] only if empty to allow +// workflows currently setting these values to work. +var ( + // version will be the repo version that the binary was built from. + // Injected at build time via -ldflags '-X ...'. + version = "" + // gitCommit will be the hash that the binary was built from. + // Injected at build time via -ldflags '-X ...'. + gitCommit = "" +) + +func main() { + logrus.AddHook(log.NewHook()) + + // Provider ID: 64F6FC7F-8326-5EE8-B890-3734AE584136 + // Provider and hook aren't closed explicitly, as they will exist until process exit. + provider, err := etw.NewProvider(etwProviderName, plugin.ETWCallback) + if err != nil { + logrus.Error(err) + } else { + if hook, err := etwlogrus.NewHookFromProvider(provider); err == nil { + logrus.AddHook(hook) + } else { + logrus.Error(err) + } + } + + // fall back on embedded version info (if any), if variables above were not set + if version == "" { + version = hcsversion.Version + } + if gitCommit == "" { + gitCommit = hcsversion.Commit + } + + _ = provider.WriteEvent( + "ShimLaunched", + nil, + etw.WithFields( + etw.StringArray("Args", os.Args), + etw.StringField("version", version), + etw.StringField("commit", gitCommit), + ), + ) + + // Register our OpenCensus logrus exporter so that trace spans are emitted via logrus. + trace.ApplyConfig(trace.Config{DefaultSampler: oc.DefaultSampler}) + trace.RegisterExporter(&oc.LogrusExporter{}) + + // LCOW shim is specifically designed for internal MS scenarios and therefore, + // will only log to ETW. + logrus.SetFormatter(log.NopFormatter{}) + logrus.SetOutput(io.Discard) + + // Set the log configuration. + // If we encounter an error, we exit with non-zero code. + if err := setLogConfiguration(); err != nil { + fmt.Fprintf(os.Stderr, "%s: %s", name, err) + os.Exit(1) + } + + // Start the shim manager event loop. The manager is responsible for + // handling containerd start/stop lifecycle calls for the shim process. + shim.Run(context.Background(), newShimManager(name), func(c *shim.Config) { + // We don't want the shim package to set up logging options. + c.NoSetupLogger = true + }) +} + +// setLogConfiguration reads the runtime options from stdin and sets the log configuration. +// We only set up the log configuration for serve action. +func setLogConfiguration() error { + // We set up the log configuration in the serve action only. + // This is because we want to avoid reading the stdin in start action, + // so that we can pass it along to the invocation for serve action. + if len(os.Args) > 1 && os.Args[len(os.Args)-1] == "serve" { + // The serve process is started with stderr pointing to panic.log file. + // We want to keep that file only for pure Go panics. Any explicit writes + // to os.Stderr should go to stdout instead, which is connected to the parent's + // stderr for regular logging. + // We can safely redirect os.Stderr to os.Stdout because in case of panics, + // the Go runtime will write the panic stack trace directly to the file descriptor, + // bypassing os.Stderr, so it will still go to panic.log. + os.Stderr = os.Stdout + + opts, err := shim.ReadRuntimeOptions[*runhcsopts.Options](os.Stdin) + if err != nil { + if !errors.Is(err, errdefs.ErrNotFound) { + return fmt.Errorf("failed to read runtime options from stdin: %w", err) + } + } + + if opts != nil { + if opts.LogLevel != "" { + // If log level is specified, set the corresponding logrus logging level. + lvl, err := logrus.ParseLevel(opts.LogLevel) + if err != nil { + return fmt.Errorf("failed to parse shim log level %q: %w", opts.LogLevel, err) + } + logrus.SetLevel(lvl) + } + + if opts.ScrubLogs { + log.SetScrubbing(true) + } + } + os.Stdin.Close() + } + return nil +} diff --git a/cmd/containerd-shim-lcow-v1/manager.go b/cmd/containerd-shim-lcow-v1/manager.go new file mode 100644 index 0000000000..e2dd14e3a9 --- /dev/null +++ b/cmd/containerd-shim-lcow-v1/manager.go @@ -0,0 +1,297 @@ +//go:build windows + +package main + +import ( + "context" + "errors" + "fmt" + "io" + "log" + "os" + "os/exec" + "path/filepath" + "strings" + "syscall" + "time" + + runhcsopts "github.com/Microsoft/hcsshim/cmd/containerd-shim-runhcs-v1/options" + "github.com/Microsoft/hcsshim/internal/hcs" + "github.com/Microsoft/hcsshim/internal/memory" + "github.com/Microsoft/hcsshim/internal/oc" + "github.com/Microsoft/hcsshim/internal/shim" + + "github.com/containerd/containerd/api/types" + "github.com/containerd/containerd/v2/pkg/namespaces" + "github.com/containerd/errdefs" + "github.com/containerd/typeurl/v2" + "github.com/opencontainers/runtime-spec/specs-go" + "github.com/sirupsen/logrus" + "golang.org/x/sys/windows" +) + +const ( + // addrFmt is the format of the address used for containerd shim. + addrFmt = "\\\\.\\pipe\\ProtectedPrefix\\Administrators\\containerd-shim-%s-%s-pipe" +) + +// shimManager implements the shim.Manager interface. It is the entry-point +// used by the containerd shim runner to create and destroy shim instances. +type shimManager struct { + name string +} + +// Verify that shimManager implements shim.Manager interface +var _ shim.Manager = (*shimManager)(nil) + +// newShimManager returns a shimManager with the given binary name. +func newShimManager(name string) *shimManager { + return &shimManager{ + name: name, + } +} + +// newCommand builds the exec.Cmd that will be used to spawn the long-running +// "serve" child process. +func newCommand(ctx context.Context, + id, + containerdAddress, + socketAddr string, + stderr io.Writer, +) (*exec.Cmd, error) { + ns, err := namespaces.NamespaceRequired(ctx) + if err != nil { + return nil, err + } + self, err := os.Executable() + if err != nil { + return nil, err + } + cwd, err := os.Getwd() + if err != nil { + return nil, err + } + + args := []string{ + "-namespace", ns, + "-id", id, + "-address", containerdAddress, + "-socket", socketAddr, + "serve", + } + cmd := exec.Command(self, args...) + cmd.Dir = cwd + // Limit Go runtime parallelism in the child to avoid excessive CPU usage. + cmd.Env = append(os.Environ(), "GOMAXPROCS=4") + // Place the child in its own process group so OS signals (e.g. Ctrl-C) + // sent to the parent are not automatically forwarded to the child. + cmd.SysProcAttr = &syscall.SysProcAttr{ + CreationFlags: windows.CREATE_NEW_PROCESS_GROUP, + } + cmd.Stdin = os.Stdin + cmd.Stdout = os.Stderr + cmd.Stderr = stderr + + return cmd, nil +} + +// Name returns the name of the shim +func (m *shimManager) Name() string { + return m.name +} + +// Start starts a shim instance for 'containerd-shim-lcow-v1'. +// This shim relies on containerd's Sandbox API to start a sandbox. +// There can be following scenarios that will launch a shim- +// +// 1. Containerd Sandbox Controller calls the Start command to start +// the sandbox for the pod. All the container create requests will +// set the SandboxID via `WithSandbox` ContainerOpts. Thereby, the +// container create request within the pod will be routed directly to the +// shim without calling the start command again. +// +// NOTE: This shim will not support routing the create request to an existing +// shim based on annotations like `io.kubernetes.cri.sandbox-id`. +func (m *shimManager) Start(ctx context.Context, id string, opts shim.StartOpts) (_ shim.BootstrapParams, retErr error) { + // We cant write anything to stdout/stderr for this cmd. + logrus.SetOutput(io.Discard) + + var params shim.BootstrapParams + params.Version = 3 + params.Protocol = "ttrpc" + + cwd, err := os.Getwd() + if err != nil { + return params, fmt.Errorf("failed to get current working directory: %w", err) + } + + f, err := os.Create(filepath.Join(cwd, "panic.log")) + if err != nil { + return params, fmt.Errorf("failed to create panic log file: %w", err) + } + defer f.Close() + + ns, err := namespaces.NamespaceRequired(ctx) + if err != nil { + return params, fmt.Errorf("failed to get namespace from context: %w", err) + } + + // Create an event on which we will listen to know when the shim is ready to accept connections. + // The child serve process signals this event once its TTRPC server is fully initialized. + eventName, _ := windows.UTF16PtrFromString(fmt.Sprintf("%s-%s", ns, id)) + + // Create the named event + handle, err := windows.CreateEvent(nil, 0, 0, eventName) + if err != nil { + log.Fatalf("Failed to create event: %v", err) + } + defer windows.CloseHandle(handle) + + // address is the named pipe address that the shim will use to serve the ttrpc service. + address := fmt.Sprintf(addrFmt, ns, id) + + // Create the serve command. + cmd, err := newCommand(ctx, id, opts.Address, address, f) + if err != nil { + return params, err + } + + if err = cmd.Start(); err != nil { + return params, err + } + + defer func() { + if retErr != nil { + cmd.Process.Kill() + } + }() + + // Block until the child signals the event. + _, _ = windows.WaitForSingleObject(handle, windows.INFINITE) + + params.Address = address + return params, nil +} + +// Stop tears down a running shim instance identified by id. +// It reads and logs any panic messages written to panic.log, then tries to +// terminate the associated HCS compute system and waits up to 30 seconds for +// it to exit. +func (m *shimManager) Stop(ctx context.Context, id string) (resp shim.StopStatus, err error) { + ctx, span := oc.StartSpan(context.Background(), "delete") + defer span.End() + defer func() { oc.SetSpanStatus(span, err) }() + + var bundlePath string + if opts, ok := ctx.Value(shim.OptsKey{}).(shim.Opts); ok { + bundlePath = opts.BundlePath + } + + if bundlePath == "" { + return resp, fmt.Errorf("bundle path not found in context") + } + + // hcsshim shim writes panic logs in the bundle directory in a file named "panic.log" + // log those messages (if any) on stderr so that it shows up in containerd's log. + // This should be done as the first thing so that we don't miss any panic logs even if + // something goes wrong during delete op. + // The file can be very large so read only first 1MB of data. + readLimit := int64(memory.MiB) // 1MB + logBytes, err := limitedRead(filepath.Join(bundlePath, "panic.log"), readLimit) + if err == nil && len(logBytes) > 0 { + if int64(len(logBytes)) == readLimit { + logrus.Warnf("shim panic log file %s is larger than 1MB, logging only first 1MB", filepath.Join(bundlePath, "panic.log")) + } + logrus.WithField("log", string(logBytes)).Warn("found shim panic logs during delete") + } else if err != nil && !errors.Is(err, os.ErrNotExist) { + logrus.WithError(err).Warn("failed to open shim panic log") + } + + // Attempt to find the hcssystem for this bundle and terminate it. + if sys, _ := hcs.OpenComputeSystem(ctx, id); sys != nil { + defer sys.Close() + if err := sys.Terminate(ctx); err != nil { + fmt.Fprintf(os.Stderr, "failed to terminate '%s': %v", id, err) + } else { + ch := make(chan error, 1) + go func() { ch <- sys.Wait() }() + t := time.NewTimer(time.Second * 30) + select { + case <-t.C: + sys.Close() + return resp, fmt.Errorf("timed out waiting for '%s' to terminate", id) + case err := <-ch: + t.Stop() + if err != nil { + fmt.Fprintf(os.Stderr, "failed to wait for '%s' to terminate: %v", id, err) + } + } + } + } + + resp = shim.StopStatus{ + ExitedAt: time.Now(), + ExitStatus: 255, + } + return resp, nil +} + +// limitedRead reads at max `readLimitBytes` bytes from the file at path `filePath`. If the file has +// more than `readLimitBytes` bytes of data then first `readLimitBytes` will be returned. +// Read at most readLimitBytes so delete does not flood logs. +func limitedRead(filePath string, readLimitBytes int64) ([]byte, error) { + f, err := os.Open(filePath) + if err != nil { + return nil, fmt.Errorf("limited read failed to open file: %s: %w", filePath, err) + } + defer f.Close() + fi, err := f.Stat() + if err != nil { + return []byte{}, fmt.Errorf("limited read failed during file stat: %s: %w", filePath, err) + } + if fi.Size() < readLimitBytes { + readLimitBytes = fi.Size() + } + buf := make([]byte, readLimitBytes) + _, err = f.Read(buf) + if err != nil { + return []byte{}, fmt.Errorf("limited read failed during file read: %s: %w", filePath, err) + } + return buf, nil +} + +// Info returns runtime information about this shim including its name, version, +// git commit, OCI spec version, and any runtime options decoded from optionsR. +func (m *shimManager) Info(ctx context.Context, optionsR io.Reader) (*types.RuntimeInfo, error) { + var v []string + if version != "" { + v = append(v, version) + } + if gitCommit != "" { + v = append(v, fmt.Sprintf("commit: %s", gitCommit)) + } + v = append(v, fmt.Sprintf("spec: %s", specs.Version)) + + info := &types.RuntimeInfo{ + Name: m.name, + Version: &types.RuntimeVersion{ + Version: strings.Join(v, "\n"), + }, + Annotations: nil, + } + + opts, err := shim.ReadRuntimeOptions[*runhcsopts.Options](optionsR) + if err != nil { + if !errors.Is(err, errdefs.ErrNotFound) { + return nil, fmt.Errorf("failed to read runtime options (*options.Options): %w", err) + } + } + if opts != nil { + info.Options, err = typeurl.MarshalAnyToProto(opts) + if err != nil { + return nil, fmt.Errorf("failed to marshal %T: %w", opts, err) + } + } + + return info, nil +} diff --git a/cmd/containerd-shim-lcow-v1/manager_test.go b/cmd/containerd-shim-lcow-v1/manager_test.go new file mode 100644 index 0000000000..fd93692299 --- /dev/null +++ b/cmd/containerd-shim-lcow-v1/manager_test.go @@ -0,0 +1,46 @@ +//go:build windows + +package main + +import ( + "os" + "path/filepath" + "testing" +) + +// TestLimitedRead verifies that limitedRead correctly enforces the byte limit +// when the file is larger than the limit, and reads the full content when the +// file is smaller than the limit. +func TestLimitedRead(t *testing.T) { + dir := t.TempDir() + filePath := filepath.Join(dir, "panic.log") + content := []byte("hello") + if err := os.WriteFile(filePath, content, 0o644); err != nil { + t.Fatalf("WriteFile: %v", err) + } + + buf, err := limitedRead(filePath, 2) + if err != nil { + t.Fatalf("limitedRead: %v", err) + } + if string(buf) != "he" { + t.Fatalf("expected 'he', got %q", string(buf)) + } + + buf, err = limitedRead(filePath, 10) + if err != nil { + t.Fatalf("limitedRead: %v", err) + } + if string(buf) != "hello" { + t.Fatalf("expected 'hello', got %q", string(buf)) + } +} + +// TestLimitedReadMissingFile verifies that limitedRead returns an error when +// the target file does not exist. +func TestLimitedReadMissingFile(t *testing.T) { + _, err := limitedRead(filepath.Join(t.TempDir(), "missing.log"), 10) + if err == nil { + t.Fatalf("expected error for missing file") + } +} diff --git a/cmd/containerd-shim-lcow-v1/resource_windows_386.syso b/cmd/containerd-shim-lcow-v1/resource_windows_386.syso new file mode 100644 index 0000000000000000000000000000000000000000..74c73731b69cd212ef0abc718919b150c92eb6c4 GIT binary patch literal 1526 zcma)6O->sz6n+7!ib$2ZWYL8?>aI*C69NsmpAlR#3sz=`c=`+MK}{A|xbr9!6sdO%cKE0tQIiqOuM%@x)H zm_N~*vV{85{>^Asmgq4|Ms&G`{un*Fxl*zGt^drguH2b~Ux^>yS*no1UjgwA^|}-d z3qS~av}{e=TvwXtPpLO7b(6s1t3bq26#Ok1FlsY1n)ok!BY6U04ft>B&Hfu!B zkZ+JAe!%%emOI?jiaJ-6jBrMZlF znoH4fE`)M+y48RyC9-~c!Qd}d#~By0t-_(mxN@^3EOM2{+70up%~e)A^BtCPnZ!cr z)A?ZCDnkh(p~4~=X^7~Cxzs$7qKI5IOfokO^ND-r2i`oWbOZf}>!hEgiN1%D1-Q{v zMz;$1Xp|-)$e_TIn^{wWr>@IBCNjz=iuv{D>$d}co%KY4NsCoq8`8Ce#7m@{SCJnic{dvR%phsmO%X3wsJCWqx`>Yg`;v z25L`KqvH&<9<@DhdN*n}ccTWeUhi<8nA*6110&+qgcqW#i?Im2 syYn|^hs8xv(|j!E4QZTSPr06n<&TqLGkTcEQ4o*wM*k(zK+}B;~J_Dp6XIQkLM%c#@H3Jki)mS|Oya zSoa7VfeUa0j?e`+;0B1#o}^7E3!K<~w!ini&(HQmUn*qE&wE6rwN|MWstE0D*<4{Q zfcXQxDJ!VY?B9&$WrZHVWJH%6=ugn2TPPLF-}=Y=`r4gI_?7tKouvvH{1p&iQLjtU zumFUx`>WQp&2^=T{)~F`0qs7$g>^{B^akTYv zHfpKq#72#143u8dH1}gSTtqEeho)tpGq4rFBvuE_qSO(otY(9+P%AhQ4&;0Xw^<{4 zjC>QFV%6kEK{JP%IV0ixzZn1pd*@loCZbSDE<282t2<1{Fpm;B=s53A58Rf+l;$$x zX)Z;_xe&_P>sAA!hEgiT)2IOK_v9 zjBXY1(I`zqkU@bZH?yV$k6f3%Ph^x&74z#)Hg5<1ChLg;$zzSWzvH3VMbl`QW}6{r zEFSA|A(-p_#i_=lQC?^f9rxH_)NwwwL_L1W{f66*`&(`^7A?2G%eUQTD~`77+~4NA z{IlcT(gfZe#eui9!4exR!2a{BwifeZcKR literal 0 HcmV?d00001 diff --git a/cmd/containerd-shim-lcow-v1/resource_windows_arm.syso b/cmd/containerd-shim-lcow-v1/resource_windows_arm.syso new file mode 100644 index 0000000000000000000000000000000000000000..c8fa9b4c5e390aed1399e1e1f9e3f78833b02949 GIT binary patch literal 1526 zcma)6O>Pr06n<&TqLGkTcEQ4o*wM*k(zK+}B;~J_Dp6XIQkLM%c#@H3Jki)mS|Ow! zp-12dT!0-*j?e`+;0B1#o}^7E3!K<~w!ini&(HRJuT;pCpZADLYpqf%R1wb@GJ4dJ4+QZ_$wg3pk9}v zVF3tX_gAfHo9jvw{TcP>1KNFh3+s@M=?%t*Xb)hU@@?(c?{fEZva;W^t3+$~%~xhq z6kwtPK1~HBBxwM)gi;bDIK_Rh1EO+=xRTy`A4R(F_?VIC!N&~e_K9=I)sDa~cX z(_D&dk=2G)SiXw8=Fv;9B%%|?e54=TC=?3}{*GWG~6a61bmf%KH z8Qm)2qfwfKAcF!+Ze~ph9=R@ipU5bmD(2UpY~Bw1P1X|ylE)f#f5$_!i>A>q%{D{M zSUlF_LNM3;i&Kq9qrA`}I_|NrvbDW_P1@b2n-b>-CNfP1<(6S%T*T3kvKRfT@l9H!vcePkAA_x)_VV syE}h#c34~%HOt3x-jK%GMebIscn_E6QJ}2YuVZN~e#^}_Y}LN4KXSlGo&W#< literal 0 HcmV?d00001 diff --git a/cmd/containerd-shim-lcow-v1/resource_windows_arm64.syso b/cmd/containerd-shim-lcow-v1/resource_windows_arm64.syso new file mode 100644 index 0000000000000000000000000000000000000000..59d0b3bf3d484a0a5af503dce0b65e8d520541ea GIT binary patch literal 1526 zcma)6O>Pr06n<&TqLGkTcEQ4o*wM*k(zK+}B;~J_Dp6XIQkLM%c#@H3Jki)mS|Ow! zp+{iNiVJW9j?e`+;0B1#o}^7E3!K<~w!ini&(HQmUn^wF&wE6rwN|MWstE0D*<4{Q zfcXQxDJ!VY?B9&$WrZHVWJH%6=ugn2TPPLF-}=Y=`r4gI_?7tKouvvH{1p&iP_IkT zumFUx`>WQp&2^=T{)~F`0qs7$g>^{B^akTYv zHfpKq#72#143u8dH1}gSTtqEeho)tpGq4rFBvuE_qSO(otY(9+P%AhQ4&;0Xw^<{4 zjC>QFV%6kEK{JP%IV0ixzZn1pd*@loCZbSDE<282t2<1{Fpm;B=s53A58Rf+l;$$x zX)Z;_xe&_P>sAA!hEgiT)2IOK_v9 zjBXY1(I`zqkU@bZH?yV$k6f3%Ph^x&74z#)Hg5<1ChLg;$zzSWzvH3VMbl`QW}6{r zEFSA|A(-p_#i_=lQC?^f9rxH_)NwwwL_L1W{f66*`&(`^7A?2G%eUQTD~`77+~4NA z{IlcT(gfZe#eui9!4exR!2a{BwifeZcK PodController for each active pod. + // TODO: Phase B/Pod-delete – wire CreateTask / Delete to create/remove entries here. + podControllers sync.Map + + // shutdown manages graceful shutdown operations and allows registration of cleanup callbacks. + shutdown shutdown.Service +} + +var _ shim.TTRPCService = (*Service)(nil) + +// NewService creates a new instance of the Service with the shared state. +func NewService(ctx context.Context, eventsPublisher shim.Publisher, sd shutdown.Service) *Service { + svc := &Service{ + publisher: eventsPublisher, + events: make(chan interface{}, 128), // Buffered channel for events + vmController: vm.NewController(), + shutdown: sd, + } + + go svc.forward(ctx, eventsPublisher) + + sd.RegisterCallback(func(context.Context) error { + close(svc.events) + return nil + }) + + // Perform best-effort VM cleanup on shutdown. + sd.RegisterCallback(func(ctx context.Context) error { + _ = svc.vmController.TerminateVM(ctx) + return nil + }) + + return svc +} + +// RegisterTTRPC registers the Task, Sandbox, and ShimDiag TTRPC services on +// the provided server so that containerd can call into the shim over TTRPC. +func (s *Service) RegisterTTRPC(server *ttrpc.Server) error { + tasksvc.RegisterTTRPCTaskService(server, s) + sandboxsvc.RegisterTTRPCSandboxService(server, s) + shimdiag.RegisterShimDiagService(server, s) + return nil +} + +// SandboxID returns the unique identifier for the sandbox managed by this Service. +func (s *Service) SandboxID() string { + return s.sandboxID +} + +// send enqueues an event onto the internal events channel so that it can be +// forwarded to containerd asynchronously by the forward goroutine. +func (s *Service) send(evt interface{}) { + s.events <- evt +} + +// forward runs in a dedicated goroutine and publishes events from the internal +// events channel to containerd using the provided Publisher. It exits when the +// events channel is closed (which happens during graceful shutdown). +func (s *Service) forward(ctx context.Context, publisher shim.Publisher) { + ns, _ := namespaces.Namespace(ctx) + ctx = namespaces.WithNamespace(context.Background(), ns) + for e := range s.events { + err := publisher.Publish(ctx, runtime.GetTopic(e), e) + if err != nil { + log.G(ctx).WithError(err).Error("post event") + } + } + _ = publisher.Close() +} diff --git a/cmd/containerd-shim-lcow-v1/service/service_sandbox.go b/cmd/containerd-shim-lcow-v1/service/service_sandbox.go new file mode 100644 index 0000000000..9d62b911e5 --- /dev/null +++ b/cmd/containerd-shim-lcow-v1/service/service_sandbox.go @@ -0,0 +1,169 @@ +//go:build windows + +package service + +import ( + "context" + "encoding/json" + "os" + "path/filepath" + + runhcsopts "github.com/Microsoft/hcsshim/cmd/containerd-shim-runhcs-v1/options" + "github.com/Microsoft/hcsshim/internal/log" + "github.com/Microsoft/hcsshim/internal/oc" + "github.com/Microsoft/hcsshim/sandbox-spec-v2/vm" + + "github.com/containerd/containerd/api/runtime/sandbox/v1" + errdefs2 "github.com/containerd/errdefs/pkg/errgrpc" + "github.com/containerd/typeurl/v2" + "github.com/sirupsen/logrus" + "go.opencensus.io/trace" +) + +// Ensure Service implements the TTRPCSandboxService interface at compile time. +var _ sandbox.TTRPCSandboxService = &Service{} + +// CreateSandbox creates (or prepares) a new sandbox for the given SandboxID. +func (s *Service) CreateSandbox(ctx context.Context, request *sandbox.CreateSandboxRequest) (resp *sandbox.CreateSandboxResponse, err error) { + ctx, span := oc.StartSpan(ctx, "CreateSandbox") + defer span.End() + defer func() { + oc.SetSpanStatus(span, err) + }() + + span.AddAttributes( + trace.StringAttribute("sandbox-id", request.SandboxID), + trace.StringAttribute("bundle", request.BundlePath), + trace.StringAttribute("net-ns-path", request.NetnsPath)) + + // Decode the Sandbox spec passed along from CRI. + var sandboxSpec vm.Spec + f, err := os.Open(filepath.Join(request.BundlePath, "config.json")) + if err != nil { + return nil, err + } + if err := json.NewDecoder(f).Decode(&sandboxSpec); err != nil { + f.Close() + return nil, err + } + f.Close() + + // options is nil when the runtime does not pass any per-sandbox options; + // fall back to an empty Options struct in that case so later code has a + // consistent non-nil value to work with. + shimOpts := &runhcsopts.Options{} + if request.Options != nil { + v, err := typeurl.UnmarshalAny(request.Options) + if err != nil { + return nil, err + } + shimOpts = v.(*runhcsopts.Options) + + if entry := log.G(ctx); entry.Logger.IsLevelEnabled(logrus.DebugLevel) { + entry.WithField("options", log.Format(ctx, shimOpts)).Debug("parsed runhcs runtime options") + } + } + + r, e := s.createSandboxInternal(ctx, request.SandboxID, request.BundlePath, sandboxSpec, shimOpts) + return r, errdefs2.ToGRPC(e) +} + +// StartSandbox transitions a previously created sandbox to the "running" state. +func (s *Service) StartSandbox(ctx context.Context, request *sandbox.StartSandboxRequest) (resp *sandbox.StartSandboxResponse, err error) { + ctx, span := oc.StartSpan(ctx, "StartSandbox") + defer span.End() + defer func() { oc.SetSpanStatus(span, err) }() + + span.AddAttributes(trace.StringAttribute("sandbox-id", request.SandboxID)) + + r, e := s.startSandboxInternal(ctx, request.SandboxID) + return r, errdefs2.ToGRPC(e) +} + +// Platform returns the platform details for the sandbox ("windows/amd64" or "linux/amd64"). +func (s *Service) Platform(ctx context.Context, request *sandbox.PlatformRequest) (resp *sandbox.PlatformResponse, err error) { + ctx, span := oc.StartSpan(ctx, "Platform") + defer span.End() + defer func() { oc.SetSpanStatus(span, err) }() + + span.AddAttributes(trace.StringAttribute("sandbox-id", request.SandboxID)) + + r, e := s.platformInternal(ctx, request.SandboxID) + return r, errdefs2.ToGRPC(e) +} + +// StopSandbox attempts a graceful stop of the sandbox within the specified timeout. +func (s *Service) StopSandbox(ctx context.Context, request *sandbox.StopSandboxRequest) (resp *sandbox.StopSandboxResponse, err error) { + ctx, span := oc.StartSpan(ctx, "StopSandbox") + defer span.End() + defer func() { oc.SetSpanStatus(span, err) }() + + span.AddAttributes(trace.StringAttribute("sandbox-id", request.SandboxID)) + span.AddAttributes(trace.Int64Attribute("timeout-secs", int64(request.TimeoutSecs))) + + r, e := s.stopSandboxInternal(ctx, request.GetSandboxID()) + return r, errdefs2.ToGRPC(e) +} + +// WaitSandbox blocks until the sandbox reaches a terminal state (stopped/errored) and returns the outcome. +func (s *Service) WaitSandbox(ctx context.Context, request *sandbox.WaitSandboxRequest) (resp *sandbox.WaitSandboxResponse, err error) { + ctx, span := oc.StartSpan(ctx, "WaitSandbox") + defer span.End() + defer func() { oc.SetSpanStatus(span, err) }() + + span.AddAttributes(trace.StringAttribute("sandbox-id", request.SandboxID)) + + r, e := s.waitSandboxInternal(ctx, request.SandboxID) + return r, errdefs2.ToGRPC(e) +} + +// SandboxStatus returns current status for the sandbox, optionally verbose. +func (s *Service) SandboxStatus(ctx context.Context, request *sandbox.SandboxStatusRequest) (resp *sandbox.SandboxStatusResponse, err error) { + ctx, span := oc.StartSpan(ctx, "SandboxStatus") + defer span.End() + defer func() { oc.SetSpanStatus(span, err) }() + + span.AddAttributes(trace.StringAttribute("sandbox-id", request.SandboxID)) + span.AddAttributes(trace.BoolAttribute("verbose", request.Verbose)) + + r, e := s.sandboxStatusInternal(ctx, request.SandboxID, request.Verbose) + return r, errdefs2.ToGRPC(e) +} + +// PingSandbox performs a minimal liveness check on the sandbox and returns quickly. +func (s *Service) PingSandbox(ctx context.Context, request *sandbox.PingRequest) (resp *sandbox.PingResponse, err error) { + ctx, span := oc.StartSpan(ctx, "PingSandbox") + defer span.End() + defer func() { oc.SetSpanStatus(span, err) }() + + span.AddAttributes(trace.StringAttribute("sandbox-id", request.SandboxID)) + + r, e := s.pingSandboxInternal(ctx, request.SandboxID) + return r, errdefs2.ToGRPC(e) +} + +// ShutdownSandbox requests a full shim + sandbox shutdown (stronger than StopSandbox), +// typically used by the higher-level controller to tear down resources and exit the shim. +func (s *Service) ShutdownSandbox(ctx context.Context, request *sandbox.ShutdownSandboxRequest) (resp *sandbox.ShutdownSandboxResponse, err error) { + ctx, span := oc.StartSpan(ctx, "ShutdownSandbox") + defer span.End() + defer func() { oc.SetSpanStatus(span, err) }() + + span.AddAttributes(trace.StringAttribute("sandbox-id", request.SandboxID)) + + r, e := s.shutdownSandboxInternal(ctx, request.SandboxID) + return r, errdefs2.ToGRPC(e) +} + +// SandboxMetrics returns runtime metrics for the sandbox (e.g., CPU/memory/IO), +// suitable for monitoring and autoscaling decisions. +func (s *Service) SandboxMetrics(ctx context.Context, request *sandbox.SandboxMetricsRequest) (resp *sandbox.SandboxMetricsResponse, err error) { + ctx, span := oc.StartSpan(ctx, "SandboxMetrics") + defer span.End() + defer func() { oc.SetSpanStatus(span, err) }() + + span.AddAttributes(trace.StringAttribute("sandbox-id", request.SandboxID)) + + r, e := s.sandboxMetricsInternal(ctx, request.SandboxID) + return r, errdefs2.ToGRPC(e) +} diff --git a/cmd/containerd-shim-lcow-v1/service/service_sandbox_internal.go b/cmd/containerd-shim-lcow-v1/service/service_sandbox_internal.go new file mode 100644 index 0000000000..6c95837169 --- /dev/null +++ b/cmd/containerd-shim-lcow-v1/service/service_sandbox_internal.go @@ -0,0 +1,311 @@ +//go:build windows + +package service + +import ( + "context" + "fmt" + "os" + "path/filepath" + "time" + + runhcsoptions "github.com/Microsoft/hcsshim/cmd/containerd-shim-runhcs-v1/options" + "github.com/Microsoft/hcsshim/internal/builder/vm/lcow" + "github.com/Microsoft/hcsshim/internal/controller/vm" + "github.com/Microsoft/hcsshim/internal/gcs/prot" + "github.com/Microsoft/hcsshim/internal/log" + "github.com/Microsoft/hcsshim/internal/logfields" + "github.com/Microsoft/hcsshim/internal/protocol/guestresource" + "github.com/Microsoft/hcsshim/internal/vm/vmutils" + vmsandbox "github.com/Microsoft/hcsshim/sandbox-spec-v2/vm" + "github.com/containerd/typeurl/v2" + "github.com/sirupsen/logrus" + + "github.com/Microsoft/go-winio" + "github.com/containerd/containerd/api/runtime/sandbox/v1" + "github.com/containerd/containerd/api/types" + "github.com/containerd/errdefs" + "golang.org/x/sys/windows" + "google.golang.org/protobuf/types/known/timestamppb" +) + +const ( + // linuxPlatform refers to the Linux guest OS platform. + linuxPlatform = "linux" + + // SandboxStateReady indicates the sandbox is ready. + SandboxStateReady = "SANDBOX_READY" + // SandboxStateNotReady indicates the sandbox is not ready. + SandboxStateNotReady = "SANDBOX_NOTREADY" +) + +// createSandboxInternal is the implementation for CreateSandbox. +// +// It enforces that only one sandbox can exist per shim instance (this shim +// follows a one-sandbox-per-shim model). It builds the HCS compute-system +// document from the sandbox spec and delegates VM creation to vmController. +func (s *Service) createSandboxInternal(ctx context.Context, sandboxID string, bundlePath string, sandboxSpec vmsandbox.Spec, options *runhcsoptions.Options) (*sandbox.CreateSandboxResponse, error) { + s.mu.Lock() + if s.sandboxID != "" { + return nil, fmt.Errorf("failed to create sandbox: sandbox already exists with ID %s", s.sandboxID) + } + + ctx, _ = log.WithContext(ctx, logrus.WithField(logfields.UVMID, sandboxID)) + + // By setting the sandboxID here, we ensure that any parallel calls for CreateSandbox + // will fail with an error. + s.sandboxID = sandboxID + s.mu.Unlock() + + // Use the shim binary name as the HCS owner, matching the convention used elsewhere in hcsshim. + owner := filepath.Base(os.Args[0]) + + hcsDocument, sandboxOptions, err := lcow.BuildSandboxConfig(ctx, owner, bundlePath, options, sandboxSpec.Annotations, sandboxSpec.Devices) //vmbuilder.ParseSpecs(ctx, owner, sandboxSpec) + if err != nil { + return nil, fmt.Errorf("failed to parse sandbox spec: %w", err) + } + + s.vmHcsDocument = hcsDocument + s.sandboxOptions = sandboxOptions + + err = s.vmController.CreateVM(ctx, &vm.CreateOptions{ + ID: fmt.Sprintf("%s@vm", sandboxID), + HCSDocument: hcsDocument, + }) + if err != nil { + return nil, fmt.Errorf("failed to create VM: %w", err) + } + + return &sandbox.CreateSandboxResponse{}, nil +} + +// startSandboxInternal is the implementation for StartSandbox. +// +// It instructs the vmController to start the VM. If the +// sandbox was created with confidential settings, confidential options are +// applied to the VM after starting. +func (s *Service) startSandboxInternal(ctx context.Context, sandboxID string) (*sandbox.StartSandboxResponse, error) { + if s.sandboxID != sandboxID { + return nil, fmt.Errorf("failed to start sandbox: sandbox ID mismatch, expected %s, got %s", s.sandboxID, sandboxID) + } + + ctx, _ = log.WithContext(ctx, logrus.WithField(logfields.UVMID, sandboxID)) + + var confidentialOpts *guestresource.ConfidentialOptions + if s.sandboxOptions.ConfidentialConfig != nil { + uvmReferenceInfoEncoded, err := vmutils.ParseUVMReferenceInfo( + ctx, + vmutils.DefaultLCOWOSBootFilesPath(), + s.sandboxOptions.ConfidentialConfig.UvmReferenceInfoFile, + ) + if err != nil { + return nil, fmt.Errorf("failed to parse UVM reference info: %w", err) + } + confidentialOpts = &guestresource.ConfidentialOptions{ + EnforcerType: s.sandboxOptions.ConfidentialConfig.SecurityPolicyEnforcer, + EncodedSecurityPolicy: s.sandboxOptions.ConfidentialConfig.SecurityPolicy, + EncodedUVMReference: uvmReferenceInfoEncoded, + } + } + + // VM controller ensures that only once of the Start call goes through. + err := s.vmController.StartVM(ctx, &vm.StartOptions{ + GCSServiceID: winio.VsockServiceID(prot.LinuxGcsVsockPort), + ConfidentialOptions: confidentialOpts, + }) + if err != nil { + return nil, fmt.Errorf("failed to start VM: %w", err) + } + + return &sandbox.StartSandboxResponse{ + CreatedAt: timestamppb.New(s.vmController.StartTime()), + }, nil +} + +// platformInternal is the implementation for Platform. +// +// It returns the guest OS and CPU architecture for the running sandbox. +// An error is returned if the sandbox is not currently in the running state. +func (s *Service) platformInternal(ctx context.Context, sandboxID string) (*sandbox.PlatformResponse, error) { + if s.sandboxID != sandboxID { + return nil, fmt.Errorf("failed to get platform: sandbox ID mismatch, expected %s, got %s", s.sandboxID, sandboxID) + } + + ctx, _ = log.WithContext(ctx, logrus.WithField(logfields.UVMID, sandboxID)) + + if s.vmController.State() != vm.StateRunning { + return nil, fmt.Errorf("failed to get platform: sandbox is not running (state: %s)", s.vmController.State()) + } + + return &sandbox.PlatformResponse{ + Platform: &types.Platform{ + OS: linuxPlatform, + Architecture: s.sandboxOptions.Architecture, + }, + }, nil +} + +// stopSandboxInternal is the implementation for StopSandbox. +// +// It terminates the VM and performs any cleanup, if needed. +func (s *Service) stopSandboxInternal(ctx context.Context, sandboxID string) (*sandbox.StopSandboxResponse, error) { + if s.sandboxID != sandboxID { + return nil, fmt.Errorf("failed to stop sandbox: sandbox ID mismatch, expected %s, got %s", s.sandboxID, sandboxID) + } + + ctx, _ = log.WithContext(ctx, logrus.WithField(logfields.UVMID, sandboxID)) + + err := s.vmController.TerminateVM(ctx) + if err != nil { + return nil, fmt.Errorf("failed to terminate VM: %w", err) + } + + if s.vmHcsDocument.VirtualMachine.GuestState != nil { + if err := os.Remove(s.vmHcsDocument.VirtualMachine.GuestState.GuestStateFilePath); err != nil { + log.G(ctx).WithField("VMGS File", s.vmHcsDocument.VirtualMachine.GuestState.GuestStateFilePath). + WithError(err).Error("failed to remove VMGS file") + } + } + + return &sandbox.StopSandboxResponse{}, nil +} + +// waitSandboxInternal is the implementation for WaitSandbox. +// +// It blocks until the underlying VM has stopped, then maps the stopped status +// to a sandbox exit code. +func (s *Service) waitSandboxInternal(ctx context.Context, sandboxID string) (*sandbox.WaitSandboxResponse, error) { + if s.sandboxID != sandboxID { + return nil, fmt.Errorf("failed to wait for sandbox: sandbox ID mismatch, expected %s, got %s", s.sandboxID, sandboxID) + } + + ctx, _ = log.WithContext(ctx, logrus.WithField(logfields.UVMID, sandboxID)) + + // Wait for the VM to stop, then return the exit code. + err := s.vmController.Wait(ctx) + if err != nil { + return nil, fmt.Errorf("failed to wait for VM: %w", err) + } + + stoppedStatus, err := s.vmController.StoppedStatus() + if err != nil { + return nil, fmt.Errorf("failed to get sandbox stopped status: %w", err) + } + + exitStatus := 0 + // If there was an exit error, set a non-zero exit status. + if stoppedStatus.Err != nil { + exitStatus = int(windows.ERROR_INTERNAL_ERROR) + } + + return &sandbox.WaitSandboxResponse{ + ExitStatus: uint32(exitStatus), + ExitedAt: timestamppb.New(stoppedStatus.StoppedTime), + }, nil +} + +// sandboxStatusInternal is the implementation for SandboxStatus. +// +// It synthesizes a status response from the current vmController state. +// When verbose is true, the response may be extended with additional +// diagnostic information. +func (s *Service) sandboxStatusInternal(_ context.Context, sandboxID string, verbose bool) (*sandbox.SandboxStatusResponse, error) { + if s.sandboxID != sandboxID { + return nil, fmt.Errorf("failed to get sandbox status: sandbox ID mismatch, expected %s, got %s", s.sandboxID, sandboxID) + } + + resp := &sandbox.SandboxStatusResponse{ + SandboxID: sandboxID, + State: SandboxStateNotReady, + } + + if s.vmController.State() == vm.StateNotCreated || s.vmController.State() == vm.StateCreated { + return resp, nil + } + + resp.CreatedAt = timestamppb.New(s.vmController.StartTime()) + + if s.vmController.State() == vm.StateRunning { + resp.State = SandboxStateReady + } + + if s.vmController.State() == vm.StateStopped { + stoppedStatus, err := s.vmController.StoppedStatus() + if err != nil { + return nil, fmt.Errorf("failed to get sandbox stopped status: %w", err) + } + resp.ExitedAt = timestamppb.New(stoppedStatus.StoppedTime) + } + + if verbose { + // Add compat info and any other detail + // resp.Info map[string]string + // resp.Extra any + } + + return resp, nil +} + +// pingSandboxInternal is the implementation for PingSandbox. +// +// Ping is not yet implemented for this shim. +func (s *Service) pingSandboxInternal(_ context.Context, _ string) (*sandbox.PingResponse, error) { + // This functionality is not yet applicable for this shim. + // Best scenario, we can return true if the VM is running. + return nil, errdefs.ErrNotImplemented +} + +// shutdownSandboxInternal is used to trigger sandbox shutdown when the shim receives +// a shutdown request from containerd. +// +// The sandbox must already be in the stopped state before shutdown is accepted. +func (s *Service) shutdownSandboxInternal(ctx context.Context, sandboxID string) (*sandbox.ShutdownSandboxResponse, error) { + if sandboxID != s.sandboxID { + return &sandbox.ShutdownSandboxResponse{}, fmt.Errorf("failed to shutdown sandbox: sandbox ID mismatch, expected %s, got %s", s.sandboxID, sandboxID) + } + + if s.vmController.State() != vm.StateStopped { + return &sandbox.ShutdownSandboxResponse{}, fmt.Errorf("failed to shutdown sandbox: sandbox is not stopped (state: %s)", s.vmController.State()) + } + + ctx, _ = log.WithContext(ctx, logrus.WithField(logfields.UVMID, sandboxID)) + + // Use a goroutine to wait for the context to be done. + // This allows us to return the response of the shutdown call prior to + // the server being shut down. + go func() { + <-ctx.Done() + time.Sleep(20 * time.Millisecond) // tiny cushion to avoid edge races + + s.shutdown.Shutdown() + }() + + return &sandbox.ShutdownSandboxResponse{}, nil +} + +// sandboxMetricsInternal is the implementation for SandboxMetrics. +// +// It collects and returns runtime statistics from the vmController. +func (s *Service) sandboxMetricsInternal(ctx context.Context, sandboxID string) (*sandbox.SandboxMetricsResponse, error) { + if sandboxID != s.sandboxID { + return &sandbox.SandboxMetricsResponse{}, fmt.Errorf("failed to get sandbox metrics: sandbox ID mismatch, expected %s, got %s", s.sandboxID, sandboxID) + } + + stats, err := s.vmController.Stats(ctx) + if err != nil { + return &sandbox.SandboxMetricsResponse{}, fmt.Errorf("failed to get sandbox metrics: %w", err) + } + + anyStat, err := typeurl.MarshalAny(stats) + if err != nil { + return &sandbox.SandboxMetricsResponse{}, fmt.Errorf("failed to marshal sandbox metrics: %w", err) + } + + return &sandbox.SandboxMetricsResponse{ + Metrics: &types.Metric{ + Timestamp: timestamppb.Now(), + ID: sandboxID, + Data: typeurl.MarshalProto(anyStat), + }, + }, nil +} diff --git a/cmd/containerd-shim-lcow-v1/service/service_shimdiag.go b/cmd/containerd-shim-lcow-v1/service/service_shimdiag.go new file mode 100644 index 0000000000..74c240ce6a --- /dev/null +++ b/cmd/containerd-shim-lcow-v1/service/service_shimdiag.go @@ -0,0 +1,92 @@ +//go:build windows + +package service + +import ( + "context" + "os" + "strings" + + "github.com/Microsoft/hcsshim/internal/oc" + "github.com/Microsoft/hcsshim/internal/shimdiag" + + "github.com/containerd/errdefs/pkg/errgrpc" + "go.opencensus.io/trace" +) + +// Ensure Service implements the ShimDiagService interface at compile time. +var _ shimdiag.ShimDiagService = &Service{} + +// DiagExecInHost executes a process in the host namespace for diagnostic purposes. +func (s *Service) DiagExecInHost(ctx context.Context, request *shimdiag.ExecProcessRequest) (resp *shimdiag.ExecProcessResponse, err error) { + ctx, span := oc.StartSpan(ctx, "DiagExecInHost") + defer span.End() + defer func() { oc.SetSpanStatus(span, err) }() + + span.AddAttributes( + trace.StringAttribute("sandbox-id", s.sandboxID), + trace.StringAttribute("args", strings.Join(request.Args, " ")), + trace.StringAttribute("workdir", request.Workdir), + trace.BoolAttribute("terminal", request.Terminal), + trace.StringAttribute("stdin", request.Stdin), + trace.StringAttribute("stdout", request.Stdout), + trace.StringAttribute("stderr", request.Stderr)) + + r, e := s.diagExecInHostInternal(ctx, request) + return r, errgrpc.ToGRPC(e) +} + +// DiagTasks returns information about all tasks in the shim. +func (s *Service) DiagTasks(ctx context.Context, request *shimdiag.TasksRequest) (resp *shimdiag.TasksResponse, err error) { + ctx, span := oc.StartSpan(ctx, "DiagTasks") + defer span.End() + defer func() { oc.SetSpanStatus(span, err) }() + + span.AddAttributes( + trace.StringAttribute("sandbox-id", s.sandboxID), + trace.BoolAttribute("execs", request.Execs)) + + r, e := s.diagTasksInternal(ctx, request) + return r, errgrpc.ToGRPC(e) +} + +// DiagShare shares a directory from the host into the sandbox. +func (s *Service) DiagShare(ctx context.Context, request *shimdiag.ShareRequest) (resp *shimdiag.ShareResponse, err error) { + ctx, span := oc.StartSpan(ctx, "DiagShare") + defer span.End() + defer func() { oc.SetSpanStatus(span, err) }() + + span.AddAttributes( + trace.StringAttribute("sandbox-id", s.sandboxID), + trace.StringAttribute("host-path", request.HostPath), + trace.StringAttribute("uvm-path", request.UvmPath), + trace.BoolAttribute("readonly", request.ReadOnly)) + + r, e := s.diagShareInternal(ctx, request) + return r, errgrpc.ToGRPC(e) +} + +// DiagStacks returns the stack traces of all goroutines in the shim. +func (s *Service) DiagStacks(ctx context.Context, request *shimdiag.StacksRequest) (resp *shimdiag.StacksResponse, err error) { + ctx, span := oc.StartSpan(ctx, "DiagStacks") + defer span.End() + defer func() { oc.SetSpanStatus(span, err) }() + + span.AddAttributes(trace.StringAttribute("sandbox-id", s.sandboxID)) + + r, e := s.diagStacksInternal(ctx, request) + return r, errgrpc.ToGRPC(e) +} + +// DiagPid returns the process ID (PID) of the shim for diagnostic purposes. +func (s *Service) DiagPid(ctx context.Context, _ *shimdiag.PidRequest) (resp *shimdiag.PidResponse, err error) { + ctx, span := oc.StartSpan(ctx, "DiagPid") + defer span.End() + defer func() { oc.SetSpanStatus(span, err) }() + + span.AddAttributes(trace.StringAttribute("sandbox-id", s.sandboxID)) + + return &shimdiag.PidResponse{ + Pid: int32(os.Getpid()), + }, nil +} diff --git a/cmd/containerd-shim-lcow-v1/service/service_shimdiag_internal.go b/cmd/containerd-shim-lcow-v1/service/service_shimdiag_internal.go new file mode 100644 index 0000000000..1ef881351a --- /dev/null +++ b/cmd/containerd-shim-lcow-v1/service/service_shimdiag_internal.go @@ -0,0 +1,50 @@ +//go:build windows + +package service + +import ( + "context" + "fmt" + + "github.com/Microsoft/hcsshim/internal/controller/vm" + "github.com/Microsoft/hcsshim/internal/shimdiag" + "github.com/containerd/errdefs" +) + +// diagExecInHostInternal is the implementation for DiagExecInHost. +// +// It is used to create an exec session into the hosting UVM. +func (s *Service) diagExecInHostInternal(ctx context.Context, request *shimdiag.ExecProcessRequest) (*shimdiag.ExecProcessResponse, error) { + if request.Terminal && request.Stderr != "" { + return nil, fmt.Errorf("if using terminal, stderr must be empty: %w", errdefs.ErrFailedPrecondition) + } + + if s.vmController.State() != vm.StateRunning { + return nil, fmt.Errorf("cannot exec in host when vm is not running: %w", errdefs.ErrFailedPrecondition) + } + + ec, err := s.vmController.ExecIntoHost(ctx, request) + if err != nil { + return nil, fmt.Errorf("failed to exec into host: %w", err) + } + + return &shimdiag.ExecProcessResponse{ExitCode: int32(ec)}, nil +} + +func (s *Service) diagTasksInternal(ctx context.Context, request *shimdiag.TasksRequest) (*shimdiag.TasksResponse, error) { + _ = ctx + _ = request + return nil, errdefs.ErrNotImplemented +} + +func (s *Service) diagShareInternal(ctx context.Context, request *shimdiag.ShareRequest) (*shimdiag.ShareResponse, error) { + _ = ctx + _ = request + return nil, errdefs.ErrNotImplemented +} + +func (s *Service) diagStacksInternal(ctx context.Context, request *shimdiag.StacksRequest) (*shimdiag.StacksResponse, error) { + _ = ctx + _ = request + return nil, errdefs.ErrNotImplemented +} diff --git a/cmd/containerd-shim-lcow-v1/service/service_task.go b/cmd/containerd-shim-lcow-v1/service/service_task.go new file mode 100644 index 0000000000..c75b37076b --- /dev/null +++ b/cmd/containerd-shim-lcow-v1/service/service_task.go @@ -0,0 +1,321 @@ +//go:build windows + +package service + +import ( + "context" + + "github.com/Microsoft/hcsshim/internal/oc" + + "github.com/containerd/containerd/api/runtime/task/v3" + "github.com/containerd/errdefs/pkg/errgrpc" + "go.opencensus.io/trace" + "google.golang.org/protobuf/types/known/emptypb" +) + +// Ensure Service implements the TTRPCTaskService interface at compile time. +var _ task.TTRPCTaskService = &Service{} + +// State returns the current state of a task or process. +func (s *Service) State(ctx context.Context, request *task.StateRequest) (resp *task.StateResponse, err error) { + ctx, span := oc.StartSpan(ctx, "State") + defer span.End() + defer func() { + if resp != nil { + span.AddAttributes( + trace.StringAttribute("status", resp.Status.String()), + trace.Int64Attribute("exit-status", int64(resp.ExitStatus)), + trace.StringAttribute("exited-at", resp.ExitedAt.String())) + } + oc.SetSpanStatus(span, err) + }() + + span.AddAttributes( + trace.StringAttribute("sandbox-id", s.sandboxID), + trace.StringAttribute("id", request.ID), + trace.StringAttribute("exec-id", request.ExecID)) + + r, e := s.stateInternal(ctx, request) + return r, errgrpc.ToGRPC(e) +} + +// Create creates a new task. +func (s *Service) Create(ctx context.Context, request *task.CreateTaskRequest) (resp *task.CreateTaskResponse, err error) { + ctx, span := oc.StartSpan(ctx, "Create") + defer span.End() + defer func() { + if resp != nil { + span.AddAttributes(trace.Int64Attribute("pid", int64(resp.Pid))) + } + oc.SetSpanStatus(span, err) + }() + + span.AddAttributes( + trace.StringAttribute("sandbox-id", s.sandboxID), + trace.StringAttribute("id", request.ID), + trace.StringAttribute("bundle", request.Bundle), + trace.BoolAttribute("terminal", request.Terminal), + trace.StringAttribute("stdin", request.Stdin), + trace.StringAttribute("stdout", request.Stdout), + trace.StringAttribute("stderr", request.Stderr), + trace.StringAttribute("checkpoint", request.Checkpoint), + trace.StringAttribute("parent-checkpoint", request.ParentCheckpoint)) + + r, e := s.createInternal(ctx, request) + return r, errgrpc.ToGRPC(e) +} + +// Start starts a previously created task. +func (s *Service) Start(ctx context.Context, request *task.StartRequest) (resp *task.StartResponse, err error) { + ctx, span := oc.StartSpan(ctx, "Start") + defer span.End() + defer func() { + if resp != nil { + span.AddAttributes(trace.Int64Attribute("pid", int64(resp.Pid))) + } + oc.SetSpanStatus(span, err) + }() + + span.AddAttributes( + trace.StringAttribute("sandbox-id", s.sandboxID), + trace.StringAttribute("id", request.ID), + trace.StringAttribute("exec-id", request.ExecID)) + + r, e := s.startInternal(ctx, request) + return r, errgrpc.ToGRPC(e) +} + +// Delete deletes a task and returns its exit status. +func (s *Service) Delete(ctx context.Context, request *task.DeleteRequest) (resp *task.DeleteResponse, err error) { + ctx, span := oc.StartSpan(ctx, "Delete") + defer span.End() + defer func() { + if resp != nil { + span.AddAttributes( + trace.Int64Attribute("pid", int64(resp.Pid)), + trace.Int64Attribute("exit-status", int64(resp.ExitStatus)), + trace.StringAttribute("exited-at", resp.ExitedAt.String())) + } + oc.SetSpanStatus(span, err) + }() + + span.AddAttributes( + trace.StringAttribute("sandbox-id", s.sandboxID), + trace.StringAttribute("id", request.ID), + trace.StringAttribute("exec-id", request.ExecID)) + + r, e := s.deleteInternal(ctx, request) + return r, errgrpc.ToGRPC(e) +} + +// Pids returns all process IDs for a task. +func (s *Service) Pids(ctx context.Context, request *task.PidsRequest) (resp *task.PidsResponse, err error) { + ctx, span := oc.StartSpan(ctx, "Pids") + defer span.End() + defer func() { oc.SetSpanStatus(span, err) }() + + span.AddAttributes( + trace.StringAttribute("sandbox-id", s.sandboxID), + trace.StringAttribute("id", request.ID)) + + r, e := s.pidsInternal(ctx, request) + return r, errgrpc.ToGRPC(e) +} + +// Pause pauses a task. +func (s *Service) Pause(ctx context.Context, request *task.PauseRequest) (resp *emptypb.Empty, err error) { + ctx, span := oc.StartSpan(ctx, "Pause") + defer span.End() + defer func() { oc.SetSpanStatus(span, err) }() + + span.AddAttributes( + trace.StringAttribute("sandbox-id", s.sandboxID), + trace.StringAttribute("id", request.ID)) + + r, e := s.pauseInternal(ctx, request) + return r, errgrpc.ToGRPC(e) +} + +// Resume resumes a previously paused task. +func (s *Service) Resume(ctx context.Context, request *task.ResumeRequest) (resp *emptypb.Empty, err error) { + ctx, span := oc.StartSpan(ctx, "Resume") + defer span.End() + defer func() { oc.SetSpanStatus(span, err) }() + + span.AddAttributes( + trace.StringAttribute("sandbox-id", s.sandboxID), + trace.StringAttribute("id", request.ID)) + + r, e := s.resumeInternal(ctx, request) + return r, errgrpc.ToGRPC(e) +} + +// Checkpoint creates a checkpoint of a task. +func (s *Service) Checkpoint(ctx context.Context, request *task.CheckpointTaskRequest) (resp *emptypb.Empty, err error) { + ctx, span := oc.StartSpan(ctx, "Checkpoint") + defer span.End() + defer func() { oc.SetSpanStatus(span, err) }() + + span.AddAttributes( + trace.StringAttribute("sandbox-id", s.sandboxID), + trace.StringAttribute("id", request.ID), + trace.StringAttribute("path", request.Path)) + + r, e := s.checkpointInternal(ctx, request) + return r, errgrpc.ToGRPC(e) +} + +// Kill sends a signal to a task or process. +func (s *Service) Kill(ctx context.Context, request *task.KillRequest) (resp *emptypb.Empty, err error) { + ctx, span := oc.StartSpan(ctx, "Kill") + defer span.End() + defer func() { oc.SetSpanStatus(span, err) }() + + span.AddAttributes( + trace.StringAttribute("sandbox-id", s.sandboxID), + trace.StringAttribute("id", request.ID), + trace.StringAttribute("exec-id", request.ExecID), + trace.Int64Attribute("signal", int64(request.Signal)), + trace.BoolAttribute("all", request.All)) + + r, e := s.killInternal(ctx, request) + return r, errgrpc.ToGRPC(e) +} + +// Exec executes an additional process inside a task. +func (s *Service) Exec(ctx context.Context, request *task.ExecProcessRequest) (resp *emptypb.Empty, err error) { + ctx, span := oc.StartSpan(ctx, "Exec") + defer span.End() + defer func() { oc.SetSpanStatus(span, err) }() + + span.AddAttributes( + trace.StringAttribute("sandbox-id", s.sandboxID), + trace.StringAttribute("id", request.ID), + trace.StringAttribute("exec-id", request.ExecID), + trace.BoolAttribute("terminal", request.Terminal), + trace.StringAttribute("stdin", request.Stdin), + trace.StringAttribute("stdout", request.Stdout), + trace.StringAttribute("stderr", request.Stderr)) + + r, e := s.execInternal(ctx, request) + return r, errgrpc.ToGRPC(e) +} + +// ResizePty resizes the terminal of a process. +func (s *Service) ResizePty(ctx context.Context, request *task.ResizePtyRequest) (resp *emptypb.Empty, err error) { + ctx, span := oc.StartSpan(ctx, "ResizePty") + defer span.End() + defer func() { oc.SetSpanStatus(span, err) }() + + span.AddAttributes( + trace.StringAttribute("sandbox-id", s.sandboxID), + trace.StringAttribute("id", request.ID), + trace.StringAttribute("exec-id", request.ExecID), + trace.Int64Attribute("width", int64(request.Width)), + trace.Int64Attribute("height", int64(request.Height))) + + r, e := s.resizePtyInternal(ctx, request) + return r, errgrpc.ToGRPC(e) +} + +// CloseIO closes the IO for a process. +func (s *Service) CloseIO(ctx context.Context, request *task.CloseIORequest) (resp *emptypb.Empty, err error) { + ctx, span := oc.StartSpan(ctx, "CloseIO") + defer span.End() + defer func() { oc.SetSpanStatus(span, err) }() + + span.AddAttributes( + trace.StringAttribute("sandbox-id", s.sandboxID), + trace.StringAttribute("id", request.ID), + trace.StringAttribute("exec-id", request.ExecID), + trace.BoolAttribute("stdin", request.Stdin)) + + r, e := s.closeIOInternal(ctx, request) + return r, errgrpc.ToGRPC(e) +} + +// Update updates a running task with new resource constraints. +func (s *Service) Update(ctx context.Context, request *task.UpdateTaskRequest) (resp *emptypb.Empty, err error) { + ctx, span := oc.StartSpan(ctx, "Update") + defer span.End() + defer func() { oc.SetSpanStatus(span, err) }() + + span.AddAttributes( + trace.StringAttribute("sandbox-id", s.sandboxID), + trace.StringAttribute("id", request.ID)) + + r, e := s.updateInternal(ctx, request) + return r, errgrpc.ToGRPC(e) +} + +// Wait waits for a task or process to exit. +func (s *Service) Wait(ctx context.Context, request *task.WaitRequest) (resp *task.WaitResponse, err error) { + ctx, span := oc.StartSpan(ctx, "Wait") + defer span.End() + defer func() { + if resp != nil { + span.AddAttributes( + trace.Int64Attribute("exit-status", int64(resp.ExitStatus)), + trace.StringAttribute("exited-at", resp.ExitedAt.String())) + } + oc.SetSpanStatus(span, err) + }() + + span.AddAttributes( + trace.StringAttribute("sandbox-id", s.sandboxID), + trace.StringAttribute("id", request.ID), + trace.StringAttribute("exec-id", request.ExecID)) + + r, e := s.waitInternal(ctx, request) + return r, errgrpc.ToGRPC(e) +} + +// Stats returns resource usage statistics for a task. +func (s *Service) Stats(ctx context.Context, request *task.StatsRequest) (resp *task.StatsResponse, err error) { + ctx, span := oc.StartSpan(ctx, "Stats") + defer span.End() + defer func() { oc.SetSpanStatus(span, err) }() + + span.AddAttributes( + trace.StringAttribute("sandbox-id", s.sandboxID), + trace.StringAttribute("id", request.ID)) + + r, e := s.statsInternal(ctx, request) + return r, errgrpc.ToGRPC(e) +} + +// Connect reconnects to a running task. +func (s *Service) Connect(ctx context.Context, request *task.ConnectRequest) (resp *task.ConnectResponse, err error) { + ctx, span := oc.StartSpan(ctx, "Connect") + defer span.End() + defer func() { + if resp != nil { + span.AddAttributes( + trace.Int64Attribute("shim-pid", int64(resp.ShimPid)), + trace.Int64Attribute("task-pid", int64(resp.TaskPid)), + trace.StringAttribute("version", resp.Version)) + } + oc.SetSpanStatus(span, err) + }() + + span.AddAttributes( + trace.StringAttribute("sandbox-id", s.sandboxID), + trace.StringAttribute("id", request.ID)) + + r, e := s.connectInternal(ctx, request) + return r, errgrpc.ToGRPC(e) +} + +// Shutdown gracefully shuts down the Service. +func (s *Service) Shutdown(ctx context.Context, request *task.ShutdownRequest) (resp *emptypb.Empty, err error) { + ctx, span := oc.StartSpan(ctx, "Shutdown") + defer span.End() + defer func() { oc.SetSpanStatus(span, err) }() + + span.AddAttributes( + trace.StringAttribute("sandbox-id", s.sandboxID), + trace.StringAttribute("id", request.ID)) + + r, e := s.shutdownInternal(ctx, request) + return r, errgrpc.ToGRPC(e) +} diff --git a/cmd/containerd-shim-lcow-v1/service/service_task_internal.go b/cmd/containerd-shim-lcow-v1/service/service_task_internal.go new file mode 100644 index 0000000000..b68ddff470 --- /dev/null +++ b/cmd/containerd-shim-lcow-v1/service/service_task_internal.go @@ -0,0 +1,113 @@ +//go:build windows + +package service + +import ( + "context" + + "github.com/containerd/containerd/api/runtime/task/v3" + "github.com/containerd/errdefs" + "google.golang.org/protobuf/types/known/emptypb" +) + +func (s *Service) stateInternal(ctx context.Context, request *task.StateRequest) (*task.StateResponse, error) { + _ = ctx + _ = request + return nil, errdefs.ErrNotImplemented +} + +func (s *Service) createInternal(ctx context.Context, request *task.CreateTaskRequest) (*task.CreateTaskResponse, error) { + _ = ctx + _ = request + return nil, errdefs.ErrNotImplemented +} + +func (s *Service) startInternal(ctx context.Context, request *task.StartRequest) (*task.StartResponse, error) { + _ = ctx + _ = request + return nil, errdefs.ErrNotImplemented +} + +func (s *Service) deleteInternal(ctx context.Context, request *task.DeleteRequest) (*task.DeleteResponse, error) { + _ = ctx + _ = request + return nil, errdefs.ErrNotImplemented +} + +func (s *Service) pidsInternal(ctx context.Context, request *task.PidsRequest) (*task.PidsResponse, error) { + _ = ctx + _ = request + return nil, errdefs.ErrNotImplemented +} + +func (s *Service) pauseInternal(ctx context.Context, request *task.PauseRequest) (*emptypb.Empty, error) { + _ = ctx + _ = request + return nil, errdefs.ErrNotImplemented +} + +func (s *Service) resumeInternal(ctx context.Context, request *task.ResumeRequest) (*emptypb.Empty, error) { + _ = ctx + _ = request + return nil, errdefs.ErrNotImplemented +} + +func (s *Service) checkpointInternal(ctx context.Context, request *task.CheckpointTaskRequest) (*emptypb.Empty, error) { + _ = ctx + _ = request + return nil, errdefs.ErrNotImplemented +} + +func (s *Service) killInternal(ctx context.Context, request *task.KillRequest) (*emptypb.Empty, error) { + _ = ctx + _ = request + return nil, errdefs.ErrNotImplemented +} + +func (s *Service) execInternal(ctx context.Context, request *task.ExecProcessRequest) (*emptypb.Empty, error) { + _ = ctx + _ = request + return nil, errdefs.ErrNotImplemented +} + +func (s *Service) resizePtyInternal(ctx context.Context, request *task.ResizePtyRequest) (*emptypb.Empty, error) { + _ = ctx + _ = request + return nil, errdefs.ErrNotImplemented +} + +func (s *Service) closeIOInternal(ctx context.Context, request *task.CloseIORequest) (*emptypb.Empty, error) { + _ = ctx + _ = request + return nil, errdefs.ErrNotImplemented +} + +func (s *Service) updateInternal(ctx context.Context, request *task.UpdateTaskRequest) (*emptypb.Empty, error) { + _ = ctx + _ = request + return nil, errdefs.ErrNotImplemented +} + +func (s *Service) waitInternal(ctx context.Context, request *task.WaitRequest) (*task.WaitResponse, error) { + _ = ctx + _ = request + return nil, errdefs.ErrNotImplemented +} + +func (s *Service) statsInternal(ctx context.Context, request *task.StatsRequest) (*task.StatsResponse, error) { + _ = ctx + _ = request + return nil, errdefs.ErrNotImplemented +} + +func (s *Service) connectInternal(ctx context.Context, request *task.ConnectRequest) (*task.ConnectResponse, error) { + _ = ctx + _ = request + return nil, errdefs.ErrNotImplemented +} + +func (s *Service) shutdownInternal(ctx context.Context, request *task.ShutdownRequest) (*emptypb.Empty, error) { + _ = ctx + _ = request + return nil, errdefs.ErrNotImplemented +} diff --git a/cmd/containerd-shim-lcow-v1/versioninfo.json b/cmd/containerd-shim-lcow-v1/versioninfo.json new file mode 100644 index 0000000000..e450a4fe60 --- /dev/null +++ b/cmd/containerd-shim-lcow-v1/versioninfo.json @@ -0,0 +1,44 @@ +{ + "FixedFileInfo": { + "FileVersion": { + "Major": 1, + "Minor": 0, + "Patch": 0, + "Build": 0 + }, + "ProductVersion": { + "Major": 1, + "Minor": 0, + "Patch": 0, + "Build": 0 + }, + "FileFlagsMask": "3f", + "FileFlags ": "00", + "FileOS": "040004", + "FileType": "01", + "FileSubType": "00" + }, + "StringFileInfo": { + "Comments": "", + "CompanyName": "Microsoft", + "FileDescription": "", + "FileVersion": "", + "InternalName": "", + "LegalCopyright": "", + "LegalTrademarks": "", + "OriginalFilename": "containerd-shim-lcow-v1.exe", + "PrivateBuild": "", + "ProductName": "lcow shim", + "ProductVersion": "v1.0.0.0", + "SpecialBuild": "" + }, + "VarFileInfo": { + "Translation": { + "LangID": "0409", + "CharsetID": "04B0" + } + }, + "IconPath": "", + "ManifestPath": "containerd-shim-lcow-v1.exe.manifest" +} + diff --git a/internal/controller/vm/doc.go b/internal/controller/vm/doc.go new file mode 100644 index 0000000000..af6373f282 --- /dev/null +++ b/internal/controller/vm/doc.go @@ -0,0 +1,51 @@ +//go:build windows + +// Package vm provides a controller for managing the lifecycle of a Utility VM (UVM). +// +// A Utility VM is a lightweight virtual machine used to host Linux (LCOW) or +// Windows (WCOW) containers. This package abstracts the VM lifecycle — +// creation, startup, stats collection, and termination — behind the [Controller] +// interface, with [Manager] as the primary implementation. +// +// # Lifecycle +// +// A VM progresses through the following states: +// +// [StateNotCreated] → [StateCreated] → [StateRunning] → [StateStopped] +// +// - [StateNotCreated]: initial state after [NewController] is called. +// - [StateCreated]: after [Controller.CreateVM] succeeds; the VM process exists but has not started. +// - [StateRunning]: after [Controller.StartVM] succeeds; the guest OS is up and the +// Guest Compute Service (GCS) connection is established. +// - [StateStopped]: terminal state reached after the VM exits or [Controller.TerminateVM] is called. +// +// # Platform Variants +// +// Certain behaviours differ between LCOW and WCOW guests and are implemented in +// platform-specific source files selected via build tags (default for lcow shim and "wcow" tag for wcow shim). +// +// # Usage +// +// ctrl := vm.NewController() +// +// if err := ctrl.CreateVM(ctx, &vm.CreateOptions{ +// ID: "my-uvm", +// HCSDocument: doc, +// }); err != nil { +// // handle error +// } +// +// if err := ctrl.StartVM(ctx, &vm.StartOptions{ +// GCSServiceID: serviceGUID, +// }); err != nil { +// // handle error +// } +// +// // ... use ctrl.Guest() for guest interactions ... +// +// if err := ctrl.TerminateVM(ctx); err != nil { +// // handle error +// } +// +// _ = ctrl.Wait(ctx) +package vm diff --git a/internal/controller/vm/interface.go b/internal/controller/vm/interface.go new file mode 100644 index 0000000000..ac42b38722 --- /dev/null +++ b/internal/controller/vm/interface.go @@ -0,0 +1,116 @@ +//go:build windows + +package vm + +import ( + "context" + "time" + + "github.com/Microsoft/hcsshim/cmd/containerd-shim-runhcs-v1/stats" + hcsschema "github.com/Microsoft/hcsshim/internal/hcs/schema2" + "github.com/Microsoft/hcsshim/internal/protocol/guestresource" + "github.com/Microsoft/hcsshim/internal/shimdiag" + "github.com/Microsoft/hcsshim/internal/vm/guestmanager" + "github.com/Microsoft/hcsshim/internal/vm/vmmanager" + + "github.com/Microsoft/go-winio/pkg/guid" +) + +type Controller interface { + // Host returns the vm manager instance for this VM. + Host() *vmmanager.UtilityVM + + // Guest returns the guest manager instance for this VM. + Guest() *guestmanager.Guest + + // State returns the current VM state. + State() State + + // CreateVM creates and initializes a new VM with the specified options. + // This prepares the VM but does not start it. + CreateVM(ctx context.Context, opts *CreateOptions) error + + // StartVM starts the created VM with the specified options. + // This establishes the guest connection, sets up necessary listeners for + // guest-host communication, and transitions the VM to StateRunning. + StartVM(context.Context, *StartOptions) error + + // AddGuestDrivers adds the specified drivers to the VM. + AddGuestDrivers(ctx context.Context, drivers []string) error + + // ExecIntoHost executes a command in the running UVM. + ExecIntoHost(ctx context.Context, request *shimdiag.ExecProcessRequest) (int, error) + + // Wait blocks until the VM exits or the context is cancelled. + // It also waits for log output processing to complete. + Wait(ctx context.Context) error + + Stats(ctx context.Context) (*stats.VirtualMachineStatistics, error) + + TerminateVM(context.Context) error + + // StartTime returns the timestamp when the VM was started. + // Returns zero value of time.time, if the VM is not in StateRunning or StateStopped. + StartTime() time.Time + + // StoppedStatus returns information about the stopped VM, including when it + // stopped and any exit error. Returns an error if the VM is not in StateStopped. + StoppedStatus() (*StoppedStatus, error) +} + +// Handle is the subset of Controller that grants a consumer access to the +// VM's host and guest surfaces, and the ability to wait for the VM to exit. +// Accepting this narrow interface instead of the full Controller keeps callers +// (e.g. pod.Controller) decoupled from VM lifecycle management concerns they +// do not own. +type Handle interface { + // Host returns the vm manager instance for this VM. + // It can be used to interact with and modify the UVM host state. + Host() *vmmanager.UtilityVM + + // Guest returns the guest manager instance for this VM. + // It can be used to perform actions within the guest. + Guest() *guestmanager.Guest + + // AddGuestDrivers adds the specified drivers to the VM. + AddGuestDrivers(ctx context.Context, drivers []string) error + + // State returns the current VM state. + State() State + + // Wait blocks until the VM exits or the context is cancelled. + Wait(ctx context.Context) error +} + +// CreateOptions contains the configuration needed to create a new VM. +type CreateOptions struct { + // ID specifies the unique identifier for the VM. + ID string + + // HCSDocument specifies the HCS schema document used to create the VM. + HCSDocument *hcsschema.ComputeSystem +} + +// StartOptions contains the configuration needed to start a VM and establish +// the Guest Compute Service (GCS) connection. +type StartOptions struct { + // GCSServiceID specifies the GUID for the GCS vsock service. + GCSServiceID guid.GUID + + // ConfigOptions specifies additional configuration options for the guest config. + ConfigOptions []guestmanager.ConfigOption + + // ConfidentialOptions specifies security policy and confidential computing + // options for the VM. This is optional and only used for confidential VMs. + ConfidentialOptions *guestresource.ConfidentialOptions +} + +// StoppedStatus contains information about a stopped VM's final state. +type StoppedStatus struct { + // StoppedTime is the timestamp when the VM stopped. + StoppedTime time.Time + + // Err is the error that caused the VM to stop, if any. + // This will be nil if the VM exited cleanly. + Err error +} diff --git a/internal/controller/vm/status.go b/internal/controller/vm/status.go new file mode 100644 index 0000000000..f1569ba1af --- /dev/null +++ b/internal/controller/vm/status.go @@ -0,0 +1,79 @@ +//go:build windows + +package vm + +import ( + "fmt" + "sync/atomic" +) + +// State represents the current state of the VM lifecycle. +// The VM progresses through states in the following order: +// StateNotCreated -> StateCreated -> StateRunning -> StateStopped +type State int32 + +const ( + // StateNotCreated indicates the VM has not been created yet. + // This is the initial state when a Controller is first instantiated. + // Valid transitions: StateNotCreated -> StateCreated (via CreateVM) + StateNotCreated State = iota + + // StateCreated indicates the VM has been created but not started. + // Valid transitions: StateCreated -> StateRunning (via StartVM) + StateCreated + + // StateRunning indicates the VM has been started and is running. + // The guest OS is running and the Guest Compute Service (GCS) connection + // is established. + // Valid transitions: StateRunning -> StateStopped (when VM exits or is terminated) + StateRunning + + // StateStopped indicates the VM has exited or been terminated. + // This is a terminal state - once stopped, the VM cannot be restarted. + // No further state transitions are possible. + StateStopped +) + +// String returns a human-readable string representation of the VM State. +func (s State) String() string { + switch s { + case StateNotCreated: + return "NotCreated" + case StateCreated: + return "Created" + case StateRunning: + return "Running" + case StateStopped: + return "Stopped" + default: + return "Unknown" + } +} + +// atomicState is a concurrency-safe VM state holder backed by an atomic int32. +// All reads and writes go through atomic operations, so no mutex is required +// for state itself. +type atomicState struct { + v atomic.Int32 +} + +// load returns the current State with an atomic read. +func (a *atomicState) load() State { + return State(a.v.Load()) +} + +// store unconditionally sets the state with an atomic write. +func (a *atomicState) store(s State) { + a.v.Store(int32(s)) +} + +// transition atomically moves from `from` to `to` using a compare-and-swap. +// It returns an error if the current state is not `from`, leaving the state +// unchanged. This prevents two concurrent callers from both believing they +// performed the same transition. +func (a *atomicState) transition(from, to State) error { + if !a.v.CompareAndSwap(int32(from), int32(to)) { + return fmt.Errorf("unexpected VM state: want %s, got %s", from, a.load()) + } + return nil +} diff --git a/internal/controller/vm/vm.go b/internal/controller/vm/vm.go new file mode 100644 index 0000000000..4aef86e390 --- /dev/null +++ b/internal/controller/vm/vm.go @@ -0,0 +1,430 @@ +//go:build windows + +package vm + +import ( + "context" + "errors" + "fmt" + "sync" + "sync/atomic" + "time" + + "github.com/Microsoft/hcsshim/cmd/containerd-shim-runhcs-v1/stats" + "github.com/Microsoft/hcsshim/internal/cmd" + hcsschema "github.com/Microsoft/hcsshim/internal/hcs/schema2" + "github.com/Microsoft/hcsshim/internal/log" + "github.com/Microsoft/hcsshim/internal/logfields" + "github.com/Microsoft/hcsshim/internal/shimdiag" + "github.com/Microsoft/hcsshim/internal/timeout" + "github.com/Microsoft/hcsshim/internal/vm/guestmanager" + "github.com/Microsoft/hcsshim/internal/vm/vmmanager" + "github.com/Microsoft/hcsshim/internal/vm/vmutils" + iwin "github.com/Microsoft/hcsshim/internal/windows" + + "github.com/Microsoft/go-winio/pkg/process" + "github.com/sirupsen/logrus" + "golang.org/x/sync/errgroup" + "golang.org/x/sys/windows" +) + +// Manager is the VM controller implementation that manages the lifecycle of a Utility VM +// and its associated resources. +type Manager struct { + vmID string + uvm *vmmanager.UtilityVM + guest *guestmanager.Guest + + // vmState tracks the current state of the VM lifecycle. + vmState atomicState + + // mu guards the concurrent access to the Manager's fields and operations. + mu sync.Mutex + + // logOutputDone is closed when the GCS log output processing goroutine completes. + logOutputDone chan struct{} + + // Handle to the vmmem process associated with this UVM. Used to look up + // memory metrics for the UVM. + vmmemProcess windows.Handle + + // activeExecCount tracks the number of ongoing ExecIntoHost calls. + activeExecCount atomic.Int64 + + // isPhysicallyBacked indicates whether the VM is using physical backing for its memory. + isPhysicallyBacked bool +} + +// Ensure both the Controller, and it's subset Handle are implemented by Manager. +var _ Controller = (*Manager)(nil) +var _ Handle = (*Manager)(nil) + +// NewController creates a new Manager instance in the [StateNotCreated] state. +func NewController() *Manager { + m := &Manager{ + logOutputDone: make(chan struct{}), + } + // Default of vmState would always be 0 and hence StateNotCreated, + // but setting it here explicitly for clarity. + m.vmState.store(StateNotCreated) + return m +} + +// Host returns the vm manager instance for this VM. +// It can be used to interact with and modify the UVM host state. +func (c *Manager) Host() *vmmanager.UtilityVM { + return c.uvm +} + +// Guest returns the guest manager instance for this VM. +// The guest manager provides access to guest-host communication. +func (c *Manager) Guest() *guestmanager.Guest { + return c.guest +} + +// State returns the current VM state. +func (c *Manager) State() State { + return c.vmState.load() +} + +// CreateVM creates the VM using the HCS document and initializes device state. +func (c *Manager) CreateVM(ctx context.Context, opts *CreateOptions) error { + ctx, _ = log.WithContext(ctx, logrus.WithField(logfields.Operation, "CreateVM")) + + c.mu.Lock() + defer c.mu.Unlock() + + if c.vmState.load() == StateCreated { + return nil + } + if c.vmState.load() != StateNotCreated { + return fmt.Errorf("cannot create VM: VM is already in state %s", c.vmState.load()) + } + + // Create the VM via vmmanager. + uvm, err := vmmanager.Create(ctx, opts.ID, opts.HCSDocument) + if err != nil { + return fmt.Errorf("failed to create VM: %w", err) + } + c.vmID = opts.ID + c.uvm = uvm + // Determine if the VM is physically backed based on the HCS document configuration. + // We need this while extracting memory metrics, as some of them are only relevant for physically backed VMs. + c.isPhysicallyBacked = !opts.HCSDocument.VirtualMachine.ComputeTopology.Memory.AllowOvercommit + + // Initialize the GuestManager for managing guest interactions. + // We will create the guest connection via GuestManager during StartVM. + c.guest = guestmanager.New(ctx, uvm) + + c.vmState.store(StateCreated) + return nil +} + +// StartVM starts the VM that was previously created via CreateVM. +// It starts the underlying HCS VM, establishes the GCS connection, +// and transitions the VM to [StateRunning]. +// On any failure the VM is transitioned to [StateStopped]. +func (c *Manager) StartVM(ctx context.Context, opts *StartOptions) (err error) { + ctx, _ = log.WithContext(ctx, logrus.WithField(logfields.Operation, "StartVM")) + + if c.uvm == nil || c.guest == nil { + return errors.New("VM has not been created") + } + + c.mu.Lock() + defer c.mu.Unlock() + + if c.vmState.load() == StateRunning { + return nil + } + if c.vmState.load() != StateCreated { + return fmt.Errorf("cannot start VM: VM is already in state %s", c.vmState.load()) + } + + defer func() { + if err != nil { + // If there was an error starting the VM, transition to Stopped. + c.vmState.store(StateStopped) + } + }() + + // save parent context, without timeout to use in terminate + pCtx := ctx + // For remaining operations, we expect them to complete within the GCS connection timeout, + // otherwise we want to fail and cleanup. + ctx, cancel := context.WithTimeout(pCtx, timeout.GCSConnectionTimeout) + log.G(ctx).Debugf("using gcs connection timeout: %s\n", timeout.GCSConnectionTimeout) + + g, gctx := errgroup.WithContext(ctx) + defer func() { + _ = g.Wait() + }() + defer cancel() + + // we should set up the necessary listeners for guest-host communication. + // The guest needs to connect to predefined vsock ports. + // The host must already be listening on these ports before the guest attempts to connect, + // otherwise the connection would fail. + c.setupEntropyListener(gctx, g) + c.setupLoggingListener(gctx, g) + + err = c.uvm.Start(ctx) + if err != nil { + // use parent context, to prevent 2 minute timout (set above) from overridding terminate operation's + // timeout and erroring out prematurely + _ = c.uvm.Terminate(pCtx) + return fmt.Errorf("failed to start VM: %w", err) + } + + // Start waiting on the utility VM in the background. + // This goroutine will complete when the VM exits. + go func() { + // the original context may have timeout or propagate a cancellation + // copy the original to prevent it affecting the background wait go routine + cCtx := context.WithoutCancel(pCtx) + _ = c.uvm.Wait(cCtx) + // Once the VM has exited, atomically record the stopped state. + c.vmState.store(StateStopped) + }() + + // Collect any errors from writing entropy or establishing the log + // connection. + if err = g.Wait(); err != nil { + return err + } + + err = c.guest.CreateConnection(ctx, opts.GCSServiceID, opts.ConfigOptions...) + if err != nil { + return fmt.Errorf("failed to create guest connection: %w", err) + } + + err = c.finalizeGCSConnection(ctx) + if err != nil { + return fmt.Errorf("failed to finalize GCS connection: %w", err) + } + + // Set the confidential options if applicable. + if opts.ConfidentialOptions != nil { + if err := c.guest.AddSecurityPolicy(ctx, *opts.ConfidentialOptions); err != nil { + return fmt.Errorf("failed to set confidential options: %w", err) + } + } + + c.vmState.store(StateRunning) + + return nil +} + +func (c *Manager) AddGuestDrivers(ctx context.Context, drivers []string) error { + ctx, _ = log.WithContext(ctx, logrus.WithField(logfields.Operation, "AddGuestDrivers")) + + if c.uvm == nil { + return errors.New("VM has not been created") + } + + c.mu.Lock() + defer c.mu.Unlock() + + if c.vmState.load() != StateRunning { + return fmt.Errorf("cannot add guest drivers: VM is in state %s", c.vmState.load()) + } + + for _, driver := range drivers { + _ = driver + } + + return nil +} + +// ExecIntoHost executes a command in the running UVM. +func (c *Manager) ExecIntoHost(ctx context.Context, request *shimdiag.ExecProcessRequest) (int, error) { + // Keep a count of active exec sessions. + // This will be used to disallow LM with existing exec sessions, + // as that can lead to orphaned processes within UVM. + c.activeExecCount.Add(1) + defer c.activeExecCount.Add(-1) + + cmdReq := &cmd.CmdProcessRequest{ + Args: request.Args, + Workdir: request.Workdir, + Terminal: request.Terminal, + Stdin: request.Stdin, + Stdout: request.Stdout, + Stderr: request.Stderr, + } + return c.guest.ExecIntoUVM(ctx, cmdReq) +} + +// Wait blocks until the VM exits and all log output processing has completed. +func (c *Manager) Wait(ctx context.Context) error { + ctx, _ = log.WithContext(ctx, logrus.WithField(logfields.Operation, "Wait")) + + if c.uvm == nil { + return errors.New("VM has not been created") + } + + var err error + // Wait for the utility VM to exit. + // This will be unblocked when the VM exits or if the context is cancelled. + err = c.uvm.Wait(ctx) + + // Wait for the log output processing to complete, + // which ensures all logs are processed before we return. + select { + case <-ctx.Done(): + ctxErr := fmt.Errorf("failed to wait on uvm output processing: %w", ctx.Err()) + err = errors.Join(err, ctxErr) + case <-c.logOutputDone: + } + + return err +} + +// Stats returns runtime statistics for the VM including processor runtime and +// memory usage. The VM must be in [StateRunning]. +func (c *Manager) Stats(ctx context.Context) (*stats.VirtualMachineStatistics, error) { + ctx, _ = log.WithContext(ctx, logrus.WithField(logfields.Operation, "Stats")) + + if c.uvm == nil { + return nil, errors.New("VM has not been created") + } + + if c.vmState.load() != StateRunning { + return nil, fmt.Errorf("cannot get stats: VM is in state %s", c.vmState.load()) + } + + // Initialization of vmmemProcess with double-checked locking + // to prevent concurrent lookups. + if c.vmmemProcess == 0 { + // At this point in workflow, we are in Running state and + // therefore, c.mu is expected to be uncontended and used only + // in Terminate workflow. + c.mu.Lock() + // Check again after acquiring lock in case another goroutine + // already initialized it + if c.vmmemProcess == 0 { + vmmemHandle, err := vmutils.LookupVMMEM(ctx, c.uvm.RuntimeID(), &iwin.WinAPI{}) + if err != nil { + c.mu.Unlock() + return nil, fmt.Errorf("cannot get stats: %w", err) + } + c.vmmemProcess = vmmemHandle + } + c.mu.Unlock() + } + + s := &stats.VirtualMachineStatistics{} + props, err := c.uvm.PropertiesV2(ctx, hcsschema.PTStatistics, hcsschema.PTMemory) + if err != nil { + return nil, fmt.Errorf("failed to get VM properties: %w", err) + } + s.Processor = &stats.VirtualMachineProcessorStatistics{} + s.Processor.TotalRuntimeNS = uint64(props.Statistics.Processor.TotalRuntime100ns * 100) + + s.Memory = &stats.VirtualMachineMemoryStatistics{} + if !c.isPhysicallyBacked { + // The HCS properties does not return sufficient information to calculate + // working set size for a VA-backed UVM. To work around this, we instead + // locate the vmmem process for the VM, and query that process's working set + // instead, which will be the working set for the VM. + memCounters, err := process.GetProcessMemoryInfo(c.vmmemProcess) + if err != nil { + return nil, err + } + s.Memory.WorkingSetBytes = uint64(memCounters.WorkingSetSize) + } + + if props.Memory != nil { + if c.isPhysicallyBacked { + // If the uvm is physically backed we set the working set to the total amount allocated + // to the UVM. AssignedMemory returns the number of 4KB pages. Will always be 4KB + // regardless of what the UVMs actual page size is so we don't need that information. + s.Memory.WorkingSetBytes = props.Memory.VirtualMachineMemory.AssignedMemory * 4096 + } + s.Memory.VirtualNodeCount = props.Memory.VirtualNodeCount + s.Memory.VmMemory = &stats.VirtualMachineMemory{} + s.Memory.VmMemory.AvailableMemory = props.Memory.VirtualMachineMemory.AvailableMemory + s.Memory.VmMemory.AvailableMemoryBuffer = props.Memory.VirtualMachineMemory.AvailableMemoryBuffer + s.Memory.VmMemory.ReservedMemory = props.Memory.VirtualMachineMemory.ReservedMemory + s.Memory.VmMemory.AssignedMemory = props.Memory.VirtualMachineMemory.AssignedMemory + s.Memory.VmMemory.SlpActive = props.Memory.VirtualMachineMemory.SlpActive + s.Memory.VmMemory.BalancingEnabled = props.Memory.VirtualMachineMemory.BalancingEnabled + s.Memory.VmMemory.DmOperationInProgress = props.Memory.VirtualMachineMemory.DmOperationInProgress + } + return s, nil +} + +// TerminateVM forcefully terminates a running VM, closes the guest connection, +// and releases HCS resources. +// +// The context is used for all operations, including waits, so timeouts/cancellations may prevent +// proper uVM cleanup. +func (c *Manager) TerminateVM(ctx context.Context) (err error) { + ctx, _ = log.WithContext(ctx, logrus.WithField(logfields.Operation, "TerminateVM")) + + if c.uvm == nil { + return errors.New("VM has not been created") + } + + c.mu.Lock() + defer c.mu.Unlock() + + if c.vmState.load() == StateStopped { + return nil + } + if c.vmState.load() != StateRunning { + return fmt.Errorf("cannot terminate VM: VM is in state %s", c.vmState.load()) + } + + // Best effort attempt to clean up the open vmmem handle. + _ = windows.Close(c.vmmemProcess) + // Terminate the utility VM. This will also cause the Wait() call in the background goroutine to unblock. + _ = c.uvm.Terminate(ctx) + + if err := c.guest.CloseConnection(); err != nil { + log.G(ctx).Errorf("close guest connection failed: %s", err) + } + + err = c.uvm.Close(ctx) + if err != nil { + return fmt.Errorf("failed to close utility VM: %w", err) + } + + // We set the Stopped status at the end and therefore, if any error is encountered during the termination + // or the context was canceled, the VM will not be marked as Stopped. + // In such a case, caller can retry the termination. + c.vmState.store(StateStopped) + return nil +} + +// StartTime returns the timestamp when the VM was started. +// Returns zero value of time.time, if the VM is not in StateRunning or StateStopped. +func (c *Manager) StartTime() (startTime time.Time) { + if c.uvm == nil { + return startTime + } + + if c.vmState.load() == StateNotCreated || c.vmState.load() == StateCreated { + return startTime + } + + return c.uvm.StartedTime() +} + +// StoppedStatus returns the final status of the VM once it has reached +// [StateStopped], including the time it stopped and any exit error. +// Returns an error if the VM has not yet stopped. +func (c *Manager) StoppedStatus() (*StoppedStatus, error) { + if c.uvm == nil { + return nil, errors.New("VM has not been created") + } + + if c.vmState.load() != StateStopped { + return nil, fmt.Errorf("cannot get stopped status: VM is in state %s", c.vmState.load()) + } + + return &StoppedStatus{ + StoppedTime: c.uvm.StoppedTime(), + Err: c.uvm.ExitError(), + }, nil +} diff --git a/internal/controller/vm/vm_lcow.go b/internal/controller/vm/vm_lcow.go new file mode 100644 index 0000000000..269871a316 --- /dev/null +++ b/internal/controller/vm/vm_lcow.go @@ -0,0 +1,96 @@ +//go:build windows && !wcow + +package vm + +import ( + "context" + "crypto/rand" + "fmt" + "io" + + "github.com/Microsoft/hcsshim/internal/vm/vmmanager" + "github.com/Microsoft/hcsshim/internal/vm/vmutils" + + "github.com/Microsoft/go-winio" + "golang.org/x/sync/errgroup" +) + +// setupEntropyListener sets up entropy for LCOW UVMs. +// +// Linux VMs require entropy to initialize their random number generators during boot. +// This method listens on a predefined vsock port and provides cryptographically secure +// random data to the Linux init process when it connects. +func (c *Manager) setupEntropyListener(ctx context.Context, group *errgroup.Group) { + group.Go(func() error { + // The Linux guest will connect to this port during init to receive entropy. + entropyConn, err := winio.ListenHvsock(&winio.HvsockAddr{ + VMID: c.uvm.RuntimeID(), + ServiceID: winio.VsockServiceID(vmutils.LinuxEntropyVsockPort), + }) + if err != nil { + return fmt.Errorf("failed to listen on hvSocket for entropy: %w", err) + } + + // Prepare to provide entropy to the init process in the background. This + // must be done in a goroutine since, when using the internal bridge, the + // call to Start() will block until the GCS launches, and this cannot occur + // until the host accepts and closes the entropy connection. + conn, err := vmmanager.AcceptConnection(ctx, c.uvm, entropyConn, true) + if err != nil { + return fmt.Errorf("failed to accept connection on hvSocket for entropy: %w", err) + } + defer conn.Close() + + // Write the required amount of entropy to the connection. + // The init process will read this data and use it to seed the kernel's + // random number generator (CRNG). + _, err = io.CopyN(conn, rand.Reader, vmutils.LinuxEntropyBytes) + if err != nil { + return fmt.Errorf("failed to write entropy to connection: %w", err) + } + + return nil + }) +} + +// setupLoggingListener sets up logging for LCOW UVMs. +// +// This method establishes a vsock connection to receive log output from GCS +// running inside the Linux VM. The logs are parsed and +// forwarded to the host's logging system for monitoring and debugging. +func (c *Manager) setupLoggingListener(ctx context.Context, group *errgroup.Group) { + group.Go(func() error { + // The GCS will connect to this port to stream log output. + logConn, err := winio.ListenHvsock(&winio.HvsockAddr{ + VMID: c.uvm.RuntimeID(), + ServiceID: winio.VsockServiceID(vmutils.LinuxLogVsockPort), + }) + if err != nil { + return fmt.Errorf("failed to listen on hvSocket for logs: %w", err) + } + + // Accept the connection from the GCS. + conn, err := vmmanager.AcceptConnection(ctx, c.uvm, logConn, true) + if err != nil { + return fmt.Errorf("failed to accept connection on hvSocket for logs: %w", err) + } + + // Launch a separate goroutine to process logs for the lifetime of the VM. + go func() { + // Parse GCS log output and forward it to the host logging system. + vmutils.ParseGCSLogrus(c.uvm.ID())(conn) + + // Signal that log output processing has completed. + // This allows Wait() to ensure all logs are processed before returning. + close(c.logOutputDone) + }() + + return nil + }) +} + +// finalizeGCSConnection finalizes the GCS connection for LCOW VMs. +// For LCOW, no additional finalization is needed. +func (c *Manager) finalizeGCSConnection(_ context.Context) error { + return nil +} diff --git a/internal/controller/vm/vm_wcow.go b/internal/controller/vm/vm_wcow.go new file mode 100644 index 0000000000..62afd94f6a --- /dev/null +++ b/internal/controller/vm/vm_wcow.go @@ -0,0 +1,112 @@ +//go:build windows && wcow + +package vm + +import ( + "context" + "fmt" + "sync" + + "github.com/Microsoft/go-winio" + "github.com/Microsoft/hcsshim/internal/gcs/prot" + hcsschema "github.com/Microsoft/hcsshim/internal/hcs/schema2" + "github.com/Microsoft/hcsshim/internal/vm/vmmanager" + "github.com/Microsoft/hcsshim/internal/vm/vmutils" + + "github.com/sirupsen/logrus" + "golang.org/x/net/netutil" + "golang.org/x/sync/errgroup" +) + +// setupEntropyListener sets up entropy for WCOW (Windows Containers on Windows) VMs. +// +// For WCOW, entropy setup is not required. Windows VMs have their own internal +// random number generation that does not depend on host-provided entropy. +// This is a no-op implementation to satisfy the platform-specific interface. +// +// For comparison, LCOW VMs require entropy to be provided during boot. +func (c *Manager) setupEntropyListener(_ context.Context, _ *errgroup.Group) {} + +// setupLoggingListener sets up logging for WCOW UVMs. +// +// Unlike LCOW, where the log connection must be established before the VM starts, +// WCOW allows the GCS to connect to the logging socket at any time after the VM +// is running. This method sets up a persistent listener that can accept connections +// even if the GCS restarts or reconnects. +// +// The listener is configured to accept only one concurrent connection at a time +// to prevent resource exhaustion, but will accept new connections if the current one is closed. +// This supports scenarios where the logging service inside the VM needs to restart. +func (c *Manager) setupLoggingListener(ctx context.Context, _ *errgroup.Group) { + // For Windows, the listener can receive a connection later (after VM starts), + // so we start the output handler in a goroutine with a non-timeout context. + // This allows the output handler to run independently of the VM creation lifecycle. + // This is useful for the case when the logging service is restarted. + go func() { + baseListener, err := winio.ListenHvsock(&winio.HvsockAddr{ + VMID: c.uvm.RuntimeID(), + ServiceID: prot.WindowsLoggingHvsockServiceID, + }) + if err != nil { + logrus.WithError(err).Fatal("failed to listen for windows logging connections") + } + + // Use a WaitGroup to track active log processing goroutines. + // This ensures we wait for all log processing to complete before closing logOutputDone. + var wg sync.WaitGroup + + // Limit the listener to accept at most 1 concurrent connection. + limitedListener := netutil.LimitListener(baseListener, 1) + + for { + // Accept a connection from the GCS. + conn, err := vmmanager.AcceptConnection(context.WithoutCancel(ctx), c.uvm, limitedListener, false) + if err != nil { + logrus.WithError(err).Error("failed to connect to log socket") + break + } + + // Launch a goroutine to process logs from this connection. + wg.Add(1) + go func() { + defer wg.Done() + logrus.Info("uvm output handler starting") + + // Parse GCS log output and forward it to the host logging system. + // The parser handles logrus-formatted logs from the GCS. + vmutils.ParseGCSLogrus(c.uvm.ID())(conn) + + logrus.Info("uvm output handler finished") + }() + } + + // Wait for all log processing goroutines to complete. + wg.Wait() + + // Signal that log output processing has completed. + if _, ok := <-c.logOutputDone; ok { + close(c.logOutputDone) + } + }() +} + +// finalizeGCSConnection finalizes the GCS connection for WCOW UVMs. +// This is called after CreateConnection succeeds and before the VM is considered fully started. +func (c *Manager) finalizeGCSConnection(ctx context.Context) error { + // Prepare the HvSocket address configuration for the external GCS connection. + // The LocalAddress is the VM's runtime ID, and the ParentAddress is the + // predefined host ID for Windows GCS communication. + hvsocketAddress := &hcsschema.HvSocketAddress{ + LocalAddress: c.uvm.RuntimeID().String(), + ParentAddress: prot.WindowsGcsHvHostID.String(), + } + + // Update the guest manager with the HvSocket address configuration. + // This enables the GCS to establish proper bidirectional communication. + err := c.guest.UpdateHvSocketAddress(ctx, hvsocketAddress) + if err != nil { + return fmt.Errorf("failed to create GCS connection: %w", err) + } + + return nil +} From a457c22fdf1eeb95d4b32377aa46f4fed0b5581e Mon Sep 17 00:00:00 2001 From: Harsh Rawat Date: Thu, 12 Mar 2026 03:20:59 +0530 Subject: [PATCH 2/6] address review comments Signed-off-by: Harsh Rawat --- .../containerd-shim-lcow-v2.exe.manifest} | 2 +- .../main.go | 62 +---- .../manager.go | 35 ++- .../manager_test.go | 0 .../resource_windows_386.syso | Bin 1526 -> 1526 bytes .../resource_windows_amd64.syso | Bin 1526 -> 1526 bytes .../resource_windows_arm.syso | Bin 1526 -> 1526 bytes .../resource_windows_arm64.syso | Bin 1526 -> 1526 bytes .../service/plugin/plugin.go | 42 +++- .../service/service.go | 17 +- .../service/service_sandbox.go | 115 ++++----- .../service/service_sandbox_internal.go | 198 ++++++++------- .../service/service_shimdiag.go | 37 +-- .../service/service_shimdiag_internal.go | 9 - .../service/service_task.go | 162 ++++++------ .../service/service_task_internal.go | 0 .../versioninfo.json | 4 +- internal/builder/vm/lcow/specs.go | 6 +- internal/builder/vm/lcow/specs_test.go | 6 +- internal/controller/vm/doc.go | 41 ++- internal/controller/vm/interface.go | 46 +--- internal/controller/vm/state.go | 78 ++++++ internal/controller/vm/status.go | 79 ------ internal/controller/vm/vm.go | 234 +++++++++--------- internal/logfields/fields.go | 37 ++- internal/vm/vmutils/doc.go | 2 +- internal/vm/vmutils/utils.go | 31 +++ 27 files changed, 661 insertions(+), 582 deletions(-) rename cmd/{containerd-shim-lcow-v1/containerd-shim-lcow-v1.exe.manifest => containerd-shim-lcow-v2/containerd-shim-lcow-v2.exe.manifest} (92%) rename cmd/{containerd-shim-lcow-v1 => containerd-shim-lcow-v2}/main.go (61%) rename cmd/{containerd-shim-lcow-v1 => containerd-shim-lcow-v2}/manager.go (89%) rename cmd/{containerd-shim-lcow-v1 => containerd-shim-lcow-v2}/manager_test.go (100%) rename cmd/{containerd-shim-lcow-v1 => containerd-shim-lcow-v2}/resource_windows_386.syso (95%) rename cmd/{containerd-shim-lcow-v1 => containerd-shim-lcow-v2}/resource_windows_amd64.syso (95%) rename cmd/{containerd-shim-lcow-v1 => containerd-shim-lcow-v2}/resource_windows_arm.syso (95%) rename cmd/{containerd-shim-lcow-v1 => containerd-shim-lcow-v2}/resource_windows_arm64.syso (95%) rename cmd/{containerd-shim-lcow-v1 => containerd-shim-lcow-v2}/service/plugin/plugin.go (66%) rename cmd/{containerd-shim-lcow-v1 => containerd-shim-lcow-v2}/service/service.go (89%) rename cmd/{containerd-shim-lcow-v1 => containerd-shim-lcow-v2}/service/service_sandbox.go (50%) rename cmd/{containerd-shim-lcow-v1 => containerd-shim-lcow-v2}/service/service_sandbox_internal.go (52%) rename cmd/{containerd-shim-lcow-v1 => containerd-shim-lcow-v2}/service/service_shimdiag.go (61%) rename cmd/{containerd-shim-lcow-v1 => containerd-shim-lcow-v2}/service/service_shimdiag_internal.go (76%) rename cmd/{containerd-shim-lcow-v1 => containerd-shim-lcow-v2}/service/service_task.go (52%) rename cmd/{containerd-shim-lcow-v1 => containerd-shim-lcow-v2}/service/service_task_internal.go (100%) rename cmd/{containerd-shim-lcow-v1 => containerd-shim-lcow-v2}/versioninfo.json (88%) create mode 100644 internal/controller/vm/state.go delete mode 100644 internal/controller/vm/status.go diff --git a/cmd/containerd-shim-lcow-v1/containerd-shim-lcow-v1.exe.manifest b/cmd/containerd-shim-lcow-v2/containerd-shim-lcow-v2.exe.manifest similarity index 92% rename from cmd/containerd-shim-lcow-v1/containerd-shim-lcow-v1.exe.manifest rename to cmd/containerd-shim-lcow-v2/containerd-shim-lcow-v2.exe.manifest index ce573f2b4f..9c5ba67277 100644 --- a/cmd/containerd-shim-lcow-v1/containerd-shim-lcow-v1.exe.manifest +++ b/cmd/containerd-shim-lcow-v2/containerd-shim-lcow-v2.exe.manifest @@ -1,6 +1,6 @@ - containerd-shim-lcow-v1 + containerd-shim-lcow-v2 diff --git a/cmd/containerd-shim-lcow-v1/main.go b/cmd/containerd-shim-lcow-v2/main.go similarity index 61% rename from cmd/containerd-shim-lcow-v1/main.go rename to cmd/containerd-shim-lcow-v2/main.go index 097c6f0e2b..44a3b093ad 100644 --- a/cmd/containerd-shim-lcow-v1/main.go +++ b/cmd/containerd-shim-lcow-v2/main.go @@ -1,6 +1,6 @@ //go:build windows -// containerd-shim-lcow-v1 is a containerd shim implementation for Linux Containers on Windows (LCOW). +// containerd-shim-lcow-v2 is a containerd shim implementation for Linux Containers on Windows (LCOW). package main import ( @@ -10,89 +10,39 @@ import ( "io" "os" - "github.com/Microsoft/hcsshim/cmd/containerd-shim-lcow-v1/service/plugin" + _ "github.com/Microsoft/hcsshim/cmd/containerd-shim-lcow-v2/service/plugin" runhcsopts "github.com/Microsoft/hcsshim/cmd/containerd-shim-runhcs-v1/options" "github.com/Microsoft/hcsshim/internal/log" "github.com/Microsoft/hcsshim/internal/oc" "github.com/Microsoft/hcsshim/internal/shim" - hcsversion "github.com/Microsoft/hcsshim/internal/version" - "github.com/containerd/errdefs" - "github.com/Microsoft/go-winio/pkg/etw" - "github.com/Microsoft/go-winio/pkg/etwlogrus" + "github.com/containerd/errdefs" "github.com/sirupsen/logrus" "go.opencensus.io/trace" ) const ( // name is the name of lcow shim implementation. - name = "containerd-shim-lcow-v1" - // etwProviderName is the ETW provider name for lcow shim. - etwProviderName = "Microsoft.Virtualization.RunHCSLCOW" + name = "containerd-shim-lcow-v2" ) // Add a manifest to get proper Windows version detection. //go:generate go tool github.com/josephspurrier/goversioninfo/cmd/goversioninfo -platform-specific -// `-ldflags '-X ...'` only works if the variable is uninitialized or set to a constant value. -// keep empty and override with data from [internal/version] only if empty to allow -// workflows currently setting these values to work. -var ( - // version will be the repo version that the binary was built from. - // Injected at build time via -ldflags '-X ...'. - version = "" - // gitCommit will be the hash that the binary was built from. - // Injected at build time via -ldflags '-X ...'. - gitCommit = "" -) - func main() { logrus.AddHook(log.NewHook()) - // Provider ID: 64F6FC7F-8326-5EE8-B890-3734AE584136 - // Provider and hook aren't closed explicitly, as they will exist until process exit. - provider, err := etw.NewProvider(etwProviderName, plugin.ETWCallback) - if err != nil { - logrus.Error(err) - } else { - if hook, err := etwlogrus.NewHookFromProvider(provider); err == nil { - logrus.AddHook(hook) - } else { - logrus.Error(err) - } - } - - // fall back on embedded version info (if any), if variables above were not set - if version == "" { - version = hcsversion.Version - } - if gitCommit == "" { - gitCommit = hcsversion.Commit - } - - _ = provider.WriteEvent( - "ShimLaunched", - nil, - etw.WithFields( - etw.StringArray("Args", os.Args), - etw.StringField("version", version), - etw.StringField("commit", gitCommit), - ), - ) - // Register our OpenCensus logrus exporter so that trace spans are emitted via logrus. trace.ApplyConfig(trace.Config{DefaultSampler: oc.DefaultSampler}) trace.RegisterExporter(&oc.LogrusExporter{}) - // LCOW shim is specifically designed for internal MS scenarios and therefore, - // will only log to ETW. logrus.SetFormatter(log.NopFormatter{}) logrus.SetOutput(io.Discard) // Set the log configuration. // If we encounter an error, we exit with non-zero code. if err := setLogConfiguration(); err != nil { - fmt.Fprintf(os.Stderr, "%s: %s", name, err) + _, _ = fmt.Fprintf(os.Stderr, "%s: %s", name, err) os.Exit(1) } @@ -141,7 +91,7 @@ func setLogConfiguration() error { log.SetScrubbing(true) } } - os.Stdin.Close() + _ = os.Stdin.Close() } return nil } diff --git a/cmd/containerd-shim-lcow-v1/manager.go b/cmd/containerd-shim-lcow-v2/manager.go similarity index 89% rename from cmd/containerd-shim-lcow-v1/manager.go rename to cmd/containerd-shim-lcow-v2/manager.go index e2dd14e3a9..8fd37a96f3 100644 --- a/cmd/containerd-shim-lcow-v1/manager.go +++ b/cmd/containerd-shim-lcow-v2/manager.go @@ -11,7 +11,6 @@ import ( "os" "os/exec" "path/filepath" - "strings" "syscall" "time" @@ -20,6 +19,7 @@ import ( "github.com/Microsoft/hcsshim/internal/memory" "github.com/Microsoft/hcsshim/internal/oc" "github.com/Microsoft/hcsshim/internal/shim" + hcsversion "github.com/Microsoft/hcsshim/internal/version" "github.com/containerd/containerd/api/types" "github.com/containerd/containerd/v2/pkg/namespaces" @@ -33,6 +33,11 @@ import ( const ( // addrFmt is the format of the address used for containerd shim. addrFmt = "\\\\.\\pipe\\ProtectedPrefix\\Administrators\\containerd-shim-%s-%s-pipe" + + // serveReadyEventNameFormat is the format string used to construct the named Windows event + // that signals when the child "serve" process is ready to accept ttrpc connections. + // It is formatted with the namespace and shim ID (e.g. "-"). + serveReadyEventNameFormat = "%s-%s" ) // shimManager implements the shim.Manager interface. It is the entry-point @@ -100,7 +105,7 @@ func (m *shimManager) Name() string { return m.name } -// Start starts a shim instance for 'containerd-shim-lcow-v1'. +// Start starts a shim instance for 'containerd-shim-lcow-v2'. // This shim relies on containerd's Sandbox API to start a sandbox. // There can be following scenarios that will launch a shim- // @@ -138,14 +143,16 @@ func (m *shimManager) Start(ctx context.Context, id string, opts shim.StartOpts) // Create an event on which we will listen to know when the shim is ready to accept connections. // The child serve process signals this event once its TTRPC server is fully initialized. - eventName, _ := windows.UTF16PtrFromString(fmt.Sprintf("%s-%s", ns, id)) + eventName, _ := windows.UTF16PtrFromString(fmt.Sprintf(serveReadyEventNameFormat, ns, id)) // Create the named event handle, err := windows.CreateEvent(nil, 0, 0, eventName) if err != nil { log.Fatalf("Failed to create event: %v", err) } - defer windows.CloseHandle(handle) + defer func() { + _ = windows.CloseHandle(handle) + }() // address is the named pipe address that the shim will use to serve the ttrpc service. address := fmt.Sprintf(addrFmt, ns, id) @@ -162,7 +169,7 @@ func (m *shimManager) Start(ctx context.Context, id string, opts shim.StartOpts) defer func() { if retErr != nil { - cmd.Process.Kill() + _ = cmd.Process.Kill() } }() @@ -177,7 +184,7 @@ func (m *shimManager) Start(ctx context.Context, id string, opts shim.StartOpts) // It reads and logs any panic messages written to panic.log, then tries to // terminate the associated HCS compute system and waits up to 30 seconds for // it to exit. -func (m *shimManager) Stop(ctx context.Context, id string) (resp shim.StopStatus, err error) { +func (m *shimManager) Stop(_ context.Context, id string) (resp shim.StopStatus, err error) { ctx, span := oc.StartSpan(context.Background(), "delete") defer span.End() defer func() { oc.SetSpanStatus(span, err) }() @@ -230,7 +237,8 @@ func (m *shimManager) Stop(ctx context.Context, id string) (resp shim.StopStatus } resp = shim.StopStatus{ - ExitedAt: time.Now(), + ExitedAt: time.Now(), + // 255 exit code is used by convention to indicate unknown exit reason. ExitStatus: 255, } return resp, nil @@ -262,20 +270,11 @@ func limitedRead(filePath string, readLimitBytes int64) ([]byte, error) { // Info returns runtime information about this shim including its name, version, // git commit, OCI spec version, and any runtime options decoded from optionsR. -func (m *shimManager) Info(ctx context.Context, optionsR io.Reader) (*types.RuntimeInfo, error) { - var v []string - if version != "" { - v = append(v, version) - } - if gitCommit != "" { - v = append(v, fmt.Sprintf("commit: %s", gitCommit)) - } - v = append(v, fmt.Sprintf("spec: %s", specs.Version)) - +func (m *shimManager) Info(_ context.Context, optionsR io.Reader) (*types.RuntimeInfo, error) { info := &types.RuntimeInfo{ Name: m.name, Version: &types.RuntimeVersion{ - Version: strings.Join(v, "\n"), + Version: fmt.Sprintf("%s\ncommit: %s\nspec: %s", hcsversion.Version, hcsversion.Commit, specs.Version), }, Annotations: nil, } diff --git a/cmd/containerd-shim-lcow-v1/manager_test.go b/cmd/containerd-shim-lcow-v2/manager_test.go similarity index 100% rename from cmd/containerd-shim-lcow-v1/manager_test.go rename to cmd/containerd-shim-lcow-v2/manager_test.go diff --git a/cmd/containerd-shim-lcow-v1/resource_windows_386.syso b/cmd/containerd-shim-lcow-v2/resource_windows_386.syso similarity index 95% rename from cmd/containerd-shim-lcow-v1/resource_windows_386.syso rename to cmd/containerd-shim-lcow-v2/resource_windows_386.syso index 74c73731b69cd212ef0abc718919b150c92eb6c4..5510dc97e267524a0f754dca07ffaf1a52d3b161 100644 GIT binary patch delta 21 acmeyy{f&FWA4W!_&3_p)KvWv@1SSAvum|1% delta 21 acmeyy{f&FWA4W#Q&3_p)KvWv@1SSAvod?_i diff --git a/cmd/containerd-shim-lcow-v1/resource_windows_amd64.syso b/cmd/containerd-shim-lcow-v2/resource_windows_amd64.syso similarity index 95% rename from cmd/containerd-shim-lcow-v1/resource_windows_amd64.syso rename to cmd/containerd-shim-lcow-v2/resource_windows_amd64.syso index cd25ea5e7e7dee06b436ef677de0b294c3f88f6a..2c00dedb2579fdf63c0a136ef2fe1cdb383020d4 100644 GIT binary patch delta 21 acmeyy{f&FWA4W!_&3_p)KvWv@1SSAvum|1% delta 21 acmeyy{f&FWA4W#Q&3_p)KvWv@1SSAvod?_i diff --git a/cmd/containerd-shim-lcow-v1/resource_windows_arm.syso b/cmd/containerd-shim-lcow-v2/resource_windows_arm.syso similarity index 95% rename from cmd/containerd-shim-lcow-v1/resource_windows_arm.syso rename to cmd/containerd-shim-lcow-v2/resource_windows_arm.syso index c8fa9b4c5e390aed1399e1e1f9e3f78833b02949..2706f485e11faac117291c1c25087286b160ba81 100644 GIT binary patch delta 21 acmeyy{f&FWA4W!_&3_p)KvWv@1SSAvum|1% delta 21 acmeyy{f&FWA4W#Q&3_p)KvWv@1SSAvod?_i diff --git a/cmd/containerd-shim-lcow-v1/resource_windows_arm64.syso b/cmd/containerd-shim-lcow-v2/resource_windows_arm64.syso similarity index 95% rename from cmd/containerd-shim-lcow-v1/resource_windows_arm64.syso rename to cmd/containerd-shim-lcow-v2/resource_windows_arm64.syso index 59d0b3bf3d484a0a5af503dce0b65e8d520541ea..718ad2bfb80de8b362502007be11375d31b805fb 100644 GIT binary patch delta 21 acmeyy{f&FWA4W!_&3_p)KvWv@1SSAvum|1% delta 21 acmeyy{f&FWA4W#Q&3_p)KvWv@1SSAvod?_i diff --git a/cmd/containerd-shim-lcow-v1/service/plugin/plugin.go b/cmd/containerd-shim-lcow-v2/service/plugin/plugin.go similarity index 66% rename from cmd/containerd-shim-lcow-v1/service/plugin/plugin.go rename to cmd/containerd-shim-lcow-v2/service/plugin/plugin.go index 643b4c1e8d..560b8de316 100644 --- a/cmd/containerd-shim-lcow-v1/service/plugin/plugin.go +++ b/cmd/containerd-shim-lcow-v2/service/plugin/plugin.go @@ -4,12 +4,15 @@ package plugin import ( "context" + "os" - "github.com/Microsoft/hcsshim/cmd/containerd-shim-lcow-v1/service" + "github.com/Microsoft/hcsshim/cmd/containerd-shim-lcow-v2/service" "github.com/Microsoft/hcsshim/internal/shim" "github.com/Microsoft/hcsshim/internal/shimdiag" + hcsversion "github.com/Microsoft/hcsshim/internal/version" "github.com/Microsoft/go-winio/pkg/etw" + "github.com/Microsoft/go-winio/pkg/etwlogrus" "github.com/Microsoft/go-winio/pkg/guid" "github.com/containerd/containerd/v2/pkg/shutdown" "github.com/containerd/containerd/v2/plugins" @@ -18,10 +21,39 @@ import ( "github.com/sirupsen/logrus" ) +const ( + // etwProviderName is the ETW provider name for lcow shim. + etwProviderName = "Microsoft.Virtualization.RunHCSLCOW" +) + // svc holds the single Service instance created during plugin initialization. var svc *service.Service func init() { + // Provider ID: 64F6FC7F-8326-5EE8-B890-3734AE584136 + // Provider and hook aren't closed explicitly, as they will exist until process exit. + provider, err := etw.NewProvider(etwProviderName, etwCallback) + if err != nil { + logrus.Error(err) + } else { + if hook, err := etwlogrus.NewHookFromProvider(provider); err == nil { + logrus.AddHook(hook) + } else { + logrus.Error(err) + } + } + + // Write the "ShimLaunched" event with the shim's command-line arguments. + _ = provider.WriteEvent( + "ShimLaunched", + nil, + etw.WithFields( + etw.StringArray("Args", os.Args), + etw.StringField("Version", hcsversion.Version), + etw.StringField("GitCommit", hcsversion.Commit), + ), + ) + // Register the shim's TTRPC plugin with the containerd plugin registry. // The plugin depends on the event publisher (for publishing task/sandbox // events to containerd) and the internal shutdown service (for co-ordinated @@ -57,15 +89,19 @@ func init() { }) } -// ETWCallback is the ETW callback method for this shim. +// etwCallback is the ETW callback method for this shim. // // On a CaptureState notification (triggered by tools such as wpr or xperf) it // dumps all goroutine stacks – both host-side Go stacks and, when available, // the guest Linux stacks – to the logrus logger tagged with the sandbox ID. // This provides an out-of-band diagnostic snapshot without requiring the shim // to be paused or restarted. -func ETWCallback(sourceID guid.GUID, state etw.ProviderState, level etw.Level, matchAnyKeyword uint64, matchAllKeyword uint64, filterData uintptr) { +func etwCallback(sourceID guid.GUID, state etw.ProviderState, level etw.Level, matchAnyKeyword uint64, matchAllKeyword uint64, filterData uintptr) { if state == etw.ProviderStateCaptureState { + if svc == nil { + logrus.Warn("service not initialized") + return + } resp, err := svc.DiagStacks(context.Background(), &shimdiag.StacksRequest{}) if err != nil { return diff --git a/cmd/containerd-shim-lcow-v1/service/service.go b/cmd/containerd-shim-lcow-v2/service/service.go similarity index 89% rename from cmd/containerd-shim-lcow-v1/service/service.go rename to cmd/containerd-shim-lcow-v2/service/service.go index 02b2a9e029..bd9b33cd76 100644 --- a/cmd/containerd-shim-lcow-v1/service/service.go +++ b/cmd/containerd-shim-lcow-v2/service/service.go @@ -8,7 +8,6 @@ import ( "github.com/Microsoft/hcsshim/internal/builder/vm/lcow" "github.com/Microsoft/hcsshim/internal/controller/vm" - hcsschema "github.com/Microsoft/hcsshim/internal/hcs/schema2" "github.com/Microsoft/hcsshim/internal/log" "github.com/Microsoft/hcsshim/internal/shim" "github.com/Microsoft/hcsshim/internal/shimdiag" @@ -22,7 +21,7 @@ import ( ) // Service is the shared Service struct that implements all TTRPC Service interfaces. -// All Service methods (sandbox, task, shimdiag,) operate on this shared struct. +// All Service methods (sandbox, task, and shimdiag) operate on this shared struct. type Service struct { // mu is used to synchronize access to shared state within the Service. mu sync.Mutex @@ -33,11 +32,9 @@ type Service struct { events chan interface{} // sandboxID is the unique identifier for the sandbox managed by this Service instance. + // For LCOW shim, sandboxID corresponds 1-1 with the UtilityVM managed by the shim. sandboxID string - // vmHcsDocument holds the HCS compute-system document used to create the VM. - vmHcsDocument *hcsschema.ComputeSystem - // sandboxOptions contains parsed, shim-level configuration for the sandbox // such as architecture and confidential-compute settings. sandboxOptions *lcow.SandboxOptions @@ -45,10 +42,6 @@ type Service struct { // vmController is responsible for managing the lifecycle of the underlying utility VM and its associated resources. vmController vm.Controller - // podControllers maps podID -> PodController for each active pod. - // TODO: Phase B/Pod-delete – wire CreateTask / Delete to create/remove entries here. - podControllers sync.Map - // shutdown manages graceful shutdown operations and allows registration of cleanup callbacks. shutdown shutdown.Service } @@ -66,6 +59,8 @@ func NewService(ctx context.Context, eventsPublisher shim.Publisher, sd shutdown go svc.forward(ctx, eventsPublisher) + // Register a shutdown callback to close the events channel, + // which signals the forward goroutine to exit. sd.RegisterCallback(func(context.Context) error { close(svc.events) return nil @@ -96,6 +91,10 @@ func (s *Service) SandboxID() string { // send enqueues an event onto the internal events channel so that it can be // forwarded to containerd asynchronously by the forward goroutine. +// +// TODO: wire up send() for task events once task lifecycle methods are implemented. +// +//nolint:unused func (s *Service) send(evt interface{}) { s.events <- evt } diff --git a/cmd/containerd-shim-lcow-v1/service/service_sandbox.go b/cmd/containerd-shim-lcow-v2/service/service_sandbox.go similarity index 50% rename from cmd/containerd-shim-lcow-v1/service/service_sandbox.go rename to cmd/containerd-shim-lcow-v2/service/service_sandbox.go index 9d62b911e5..a82aa6b467 100644 --- a/cmd/containerd-shim-lcow-v1/service/service_sandbox.go +++ b/cmd/containerd-shim-lcow-v2/service/service_sandbox.go @@ -4,18 +4,13 @@ package service import ( "context" - "encoding/json" - "os" - "path/filepath" - runhcsopts "github.com/Microsoft/hcsshim/cmd/containerd-shim-runhcs-v1/options" "github.com/Microsoft/hcsshim/internal/log" + "github.com/Microsoft/hcsshim/internal/logfields" "github.com/Microsoft/hcsshim/internal/oc" - "github.com/Microsoft/hcsshim/sandbox-spec-v2/vm" "github.com/containerd/containerd/api/runtime/sandbox/v1" errdefs2 "github.com/containerd/errdefs/pkg/errgrpc" - "github.com/containerd/typeurl/v2" "github.com/sirupsen/logrus" "go.opencensus.io/trace" ) @@ -24,6 +19,7 @@ import ( var _ sandbox.TTRPCSandboxService = &Service{} // CreateSandbox creates (or prepares) a new sandbox for the given SandboxID. +// This method is part of the instrumentation layer and business logic is included in createSandboxInternal. func (s *Service) CreateSandbox(ctx context.Context, request *sandbox.CreateSandboxRequest) (resp *sandbox.CreateSandboxResponse, err error) { ctx, span := oc.StartSpan(ctx, "CreateSandbox") defer span.End() @@ -32,138 +28,143 @@ func (s *Service) CreateSandbox(ctx context.Context, request *sandbox.CreateSand }() span.AddAttributes( - trace.StringAttribute("sandbox-id", request.SandboxID), - trace.StringAttribute("bundle", request.BundlePath), - trace.StringAttribute("net-ns-path", request.NetnsPath)) - - // Decode the Sandbox spec passed along from CRI. - var sandboxSpec vm.Spec - f, err := os.Open(filepath.Join(request.BundlePath, "config.json")) - if err != nil { - return nil, err - } - if err := json.NewDecoder(f).Decode(&sandboxSpec); err != nil { - f.Close() - return nil, err - } - f.Close() - - // options is nil when the runtime does not pass any per-sandbox options; - // fall back to an empty Options struct in that case so later code has a - // consistent non-nil value to work with. - shimOpts := &runhcsopts.Options{} - if request.Options != nil { - v, err := typeurl.UnmarshalAny(request.Options) - if err != nil { - return nil, err - } - shimOpts = v.(*runhcsopts.Options) - - if entry := log.G(ctx); entry.Logger.IsLevelEnabled(logrus.DebugLevel) { - entry.WithField("options", log.Format(ctx, shimOpts)).Debug("parsed runhcs runtime options") - } - } - - r, e := s.createSandboxInternal(ctx, request.SandboxID, request.BundlePath, sandboxSpec, shimOpts) + trace.StringAttribute(logfields.SandboxID, request.SandboxID), + trace.StringAttribute(logfields.Bundle, request.BundlePath), + trace.StringAttribute(logfields.NetNsPath, request.NetnsPath), + ) + + // Set the sandbox ID in the logger context for all subsequent logs in this request. + ctx, _ = log.WithContext(ctx, logrus.WithField(logfields.SandboxID, request.SandboxID)) + + r, e := s.createSandboxInternal(ctx, request) return r, errdefs2.ToGRPC(e) } // StartSandbox transitions a previously created sandbox to the "running" state. +// This method is part of the instrumentation layer and business logic is included in startSandboxInternal. func (s *Service) StartSandbox(ctx context.Context, request *sandbox.StartSandboxRequest) (resp *sandbox.StartSandboxResponse, err error) { ctx, span := oc.StartSpan(ctx, "StartSandbox") defer span.End() defer func() { oc.SetSpanStatus(span, err) }() - span.AddAttributes(trace.StringAttribute("sandbox-id", request.SandboxID)) + span.AddAttributes(trace.StringAttribute(logfields.SandboxID, request.SandboxID)) + + // Set the sandbox ID in the logger context for all subsequent logs in this request. + ctx, _ = log.WithContext(ctx, logrus.WithField(logfields.SandboxID, request.SandboxID)) - r, e := s.startSandboxInternal(ctx, request.SandboxID) + r, e := s.startSandboxInternal(ctx, request) return r, errdefs2.ToGRPC(e) } // Platform returns the platform details for the sandbox ("windows/amd64" or "linux/amd64"). +// This method is part of the instrumentation layer and business logic is included in platformInternal. func (s *Service) Platform(ctx context.Context, request *sandbox.PlatformRequest) (resp *sandbox.PlatformResponse, err error) { ctx, span := oc.StartSpan(ctx, "Platform") defer span.End() defer func() { oc.SetSpanStatus(span, err) }() - span.AddAttributes(trace.StringAttribute("sandbox-id", request.SandboxID)) + span.AddAttributes(trace.StringAttribute(logfields.SandboxID, request.SandboxID)) - r, e := s.platformInternal(ctx, request.SandboxID) + r, e := s.platformInternal(ctx, request) return r, errdefs2.ToGRPC(e) } // StopSandbox attempts a graceful stop of the sandbox within the specified timeout. +// This method is part of the instrumentation layer and business logic is included in stopSandboxInternal. func (s *Service) StopSandbox(ctx context.Context, request *sandbox.StopSandboxRequest) (resp *sandbox.StopSandboxResponse, err error) { ctx, span := oc.StartSpan(ctx, "StopSandbox") defer span.End() defer func() { oc.SetSpanStatus(span, err) }() - span.AddAttributes(trace.StringAttribute("sandbox-id", request.SandboxID)) - span.AddAttributes(trace.Int64Attribute("timeout-secs", int64(request.TimeoutSecs))) + span.AddAttributes(trace.StringAttribute(logfields.SandboxID, request.SandboxID)) + span.AddAttributes(trace.Int64Attribute(logfields.Timeout, int64(request.TimeoutSecs))) - r, e := s.stopSandboxInternal(ctx, request.GetSandboxID()) + // Set the sandbox ID in the logger context for all subsequent logs in this request. + ctx, _ = log.WithContext(ctx, logrus.WithField(logfields.SandboxID, request.SandboxID)) + + r, e := s.stopSandboxInternal(ctx, request) return r, errdefs2.ToGRPC(e) } // WaitSandbox blocks until the sandbox reaches a terminal state (stopped/errored) and returns the outcome. +// This method is part of the instrumentation layer and business logic is included in waitSandboxInternal. func (s *Service) WaitSandbox(ctx context.Context, request *sandbox.WaitSandboxRequest) (resp *sandbox.WaitSandboxResponse, err error) { ctx, span := oc.StartSpan(ctx, "WaitSandbox") defer span.End() defer func() { oc.SetSpanStatus(span, err) }() - span.AddAttributes(trace.StringAttribute("sandbox-id", request.SandboxID)) + span.AddAttributes(trace.StringAttribute(logfields.SandboxID, request.SandboxID)) + + // Set the sandbox ID in the logger context for all subsequent logs in this request. + ctx, _ = log.WithContext(ctx, logrus.WithField(logfields.SandboxID, request.SandboxID)) - r, e := s.waitSandboxInternal(ctx, request.SandboxID) + r, e := s.waitSandboxInternal(ctx, request) return r, errdefs2.ToGRPC(e) } // SandboxStatus returns current status for the sandbox, optionally verbose. +// This method is part of the instrumentation layer and business logic is included in sandboxStatusInternal. func (s *Service) SandboxStatus(ctx context.Context, request *sandbox.SandboxStatusRequest) (resp *sandbox.SandboxStatusResponse, err error) { ctx, span := oc.StartSpan(ctx, "SandboxStatus") defer span.End() defer func() { oc.SetSpanStatus(span, err) }() - span.AddAttributes(trace.StringAttribute("sandbox-id", request.SandboxID)) - span.AddAttributes(trace.BoolAttribute("verbose", request.Verbose)) + span.AddAttributes(trace.StringAttribute(logfields.SandboxID, request.SandboxID)) + span.AddAttributes(trace.BoolAttribute(logfields.Verbose, request.Verbose)) - r, e := s.sandboxStatusInternal(ctx, request.SandboxID, request.Verbose) + // Set the sandbox ID in the logger context for all subsequent logs in this request. + ctx, _ = log.WithContext(ctx, logrus.WithField(logfields.SandboxID, request.SandboxID)) + + r, e := s.sandboxStatusInternal(ctx, request) return r, errdefs2.ToGRPC(e) } // PingSandbox performs a minimal liveness check on the sandbox and returns quickly. +// This method is part of the instrumentation layer and business logic is included in pingSandboxInternal. func (s *Service) PingSandbox(ctx context.Context, request *sandbox.PingRequest) (resp *sandbox.PingResponse, err error) { ctx, span := oc.StartSpan(ctx, "PingSandbox") defer span.End() defer func() { oc.SetSpanStatus(span, err) }() - span.AddAttributes(trace.StringAttribute("sandbox-id", request.SandboxID)) + span.AddAttributes(trace.StringAttribute(logfields.SandboxID, request.SandboxID)) + + // Set the sandbox ID in the logger context for all subsequent logs in this request. + ctx, _ = log.WithContext(ctx, logrus.WithField(logfields.SandboxID, request.SandboxID)) - r, e := s.pingSandboxInternal(ctx, request.SandboxID) + r, e := s.pingSandboxInternal(ctx, request) return r, errdefs2.ToGRPC(e) } // ShutdownSandbox requests a full shim + sandbox shutdown (stronger than StopSandbox), // typically used by the higher-level controller to tear down resources and exit the shim. +// This method is part of the instrumentation layer and business logic is included in shutdownSandboxInternal. func (s *Service) ShutdownSandbox(ctx context.Context, request *sandbox.ShutdownSandboxRequest) (resp *sandbox.ShutdownSandboxResponse, err error) { ctx, span := oc.StartSpan(ctx, "ShutdownSandbox") defer span.End() defer func() { oc.SetSpanStatus(span, err) }() - span.AddAttributes(trace.StringAttribute("sandbox-id", request.SandboxID)) + span.AddAttributes(trace.StringAttribute(logfields.SandboxID, request.SandboxID)) - r, e := s.shutdownSandboxInternal(ctx, request.SandboxID) + // Set the sandbox ID in the logger context for all subsequent logs in this request. + ctx, _ = log.WithContext(ctx, logrus.WithField(logfields.SandboxID, request.SandboxID)) + + r, e := s.shutdownSandboxInternal(ctx, request) return r, errdefs2.ToGRPC(e) } // SandboxMetrics returns runtime metrics for the sandbox (e.g., CPU/memory/IO), // suitable for monitoring and autoscaling decisions. +// This method is part of the instrumentation layer and business logic is included in sandboxMetricsInternal. func (s *Service) SandboxMetrics(ctx context.Context, request *sandbox.SandboxMetricsRequest) (resp *sandbox.SandboxMetricsResponse, err error) { ctx, span := oc.StartSpan(ctx, "SandboxMetrics") defer span.End() defer func() { oc.SetSpanStatus(span, err) }() - span.AddAttributes(trace.StringAttribute("sandbox-id", request.SandboxID)) + span.AddAttributes(trace.StringAttribute(logfields.SandboxID, request.SandboxID)) + + // Set the sandbox ID in the logger context for all subsequent logs in this request. + ctx, _ = log.WithContext(ctx, logrus.WithField(logfields.SandboxID, request.SandboxID)) - r, e := s.sandboxMetricsInternal(ctx, request.SandboxID) + r, e := s.sandboxMetricsInternal(ctx, request) return r, errdefs2.ToGRPC(e) } diff --git a/cmd/containerd-shim-lcow-v1/service/service_sandbox_internal.go b/cmd/containerd-shim-lcow-v2/service/service_sandbox_internal.go similarity index 52% rename from cmd/containerd-shim-lcow-v1/service/service_sandbox_internal.go rename to cmd/containerd-shim-lcow-v2/service/service_sandbox_internal.go index 6c95837169..8742cada62 100644 --- a/cmd/containerd-shim-lcow-v1/service/service_sandbox_internal.go +++ b/cmd/containerd-shim-lcow-v2/service/service_sandbox_internal.go @@ -4,27 +4,25 @@ package service import ( "context" + "encoding/json" "fmt" "os" "path/filepath" "time" - runhcsoptions "github.com/Microsoft/hcsshim/cmd/containerd-shim-runhcs-v1/options" "github.com/Microsoft/hcsshim/internal/builder/vm/lcow" "github.com/Microsoft/hcsshim/internal/controller/vm" "github.com/Microsoft/hcsshim/internal/gcs/prot" "github.com/Microsoft/hcsshim/internal/log" - "github.com/Microsoft/hcsshim/internal/logfields" "github.com/Microsoft/hcsshim/internal/protocol/guestresource" "github.com/Microsoft/hcsshim/internal/vm/vmutils" - vmsandbox "github.com/Microsoft/hcsshim/sandbox-spec-v2/vm" - "github.com/containerd/typeurl/v2" - "github.com/sirupsen/logrus" + vmsandbox "github.com/Microsoft/hcsshim/sandbox-spec/vm/v2" "github.com/Microsoft/go-winio" "github.com/containerd/containerd/api/runtime/sandbox/v1" "github.com/containerd/containerd/api/types" "github.com/containerd/errdefs" + "github.com/containerd/typeurl/v2" "golang.org/x/sys/windows" "google.golang.org/protobuf/types/known/timestamppb" ) @@ -44,38 +42,56 @@ const ( // It enforces that only one sandbox can exist per shim instance (this shim // follows a one-sandbox-per-shim model). It builds the HCS compute-system // document from the sandbox spec and delegates VM creation to vmController. -func (s *Service) createSandboxInternal(ctx context.Context, sandboxID string, bundlePath string, sandboxSpec vmsandbox.Spec, options *runhcsoptions.Options) (*sandbox.CreateSandboxResponse, error) { - s.mu.Lock() - if s.sandboxID != "" { - return nil, fmt.Errorf("failed to create sandbox: sandbox already exists with ID %s", s.sandboxID) +func (s *Service) createSandboxInternal(ctx context.Context, request *sandbox.CreateSandboxRequest) (*sandbox.CreateSandboxResponse, error) { + // Decode the Sandbox spec passed along from CRI. + var sandboxSpec vmsandbox.Spec + f, err := os.Open(filepath.Join(request.BundlePath, "config.json")) + if err != nil { + return nil, err + } + if err := json.NewDecoder(f).Decode(&sandboxSpec); err != nil { + _ = f.Close() + return nil, err } + _ = f.Close() - ctx, _ = log.WithContext(ctx, logrus.WithField(logfields.UVMID, sandboxID)) + // Decode the runtime options. + shimOpts, err := vmutils.UnmarshalRuntimeOptions(ctx, request.Options) + if err != nil { + return nil, err + } - // By setting the sandboxID here, we ensure that any parallel calls for CreateSandbox - // will fail with an error. - s.sandboxID = sandboxID - s.mu.Unlock() + // We take a lock at this point so that if there are multiple parallel calls to CreateSandbox, + // only one will succeed in creating the sandbox. The successful caller will set the sandboxID, + // which will cause the other call(s) to fail with an error indicating that a sandbox already exists. + s.mu.Lock() + defer s.mu.Unlock() - // Use the shim binary name as the HCS owner, matching the convention used elsewhere in hcsshim. - owner := filepath.Base(os.Args[0]) + if s.sandboxID != "" { + return nil, fmt.Errorf("failed to create sandbox: sandbox already exists with ID %s", s.sandboxID) + } - hcsDocument, sandboxOptions, err := lcow.BuildSandboxConfig(ctx, owner, bundlePath, options, sandboxSpec.Annotations, sandboxSpec.Devices) //vmbuilder.ParseSpecs(ctx, owner, sandboxSpec) + hcsDocument, sandboxOptions, err := lcow.BuildSandboxConfig(ctx, request.BundlePath, shimOpts, &sandboxSpec) if err != nil { return nil, fmt.Errorf("failed to parse sandbox spec: %w", err) } - s.vmHcsDocument = hcsDocument s.sandboxOptions = sandboxOptions err = s.vmController.CreateVM(ctx, &vm.CreateOptions{ - ID: fmt.Sprintf("%s@vm", sandboxID), + ID: fmt.Sprintf("%s@vm", request.SandboxID), HCSDocument: hcsDocument, }) if err != nil { return nil, fmt.Errorf("failed to create VM: %w", err) } + // By setting the sandboxID here, we ensure that any parallel calls for CreateSandbox + // will fail with an error. + // Also, setting it here acts as a synchronization point - we know that if sandboxID is set, + // then the VM has been created successfully and sandboxOptions has been populated. + s.sandboxID = request.SandboxID + return &sandbox.CreateSandboxResponse{}, nil } @@ -84,15 +100,15 @@ func (s *Service) createSandboxInternal(ctx context.Context, sandboxID string, b // It instructs the vmController to start the VM. If the // sandbox was created with confidential settings, confidential options are // applied to the VM after starting. -func (s *Service) startSandboxInternal(ctx context.Context, sandboxID string) (*sandbox.StartSandboxResponse, error) { - if s.sandboxID != sandboxID { - return nil, fmt.Errorf("failed to start sandbox: sandbox ID mismatch, expected %s, got %s", s.sandboxID, sandboxID) +func (s *Service) startSandboxInternal(ctx context.Context, request *sandbox.StartSandboxRequest) (*sandbox.StartSandboxResponse, error) { + if s.sandboxID != request.SandboxID { + return nil, fmt.Errorf("failed to start sandbox: sandbox ID mismatch, expected %s, got %s", s.sandboxID, request.SandboxID) } - ctx, _ = log.WithContext(ctx, logrus.WithField(logfields.UVMID, sandboxID)) - + // If we successfully got past the above check, it means the sandbox was created and + // the sandboxOptions should be populated. var confidentialOpts *guestresource.ConfidentialOptions - if s.sandboxOptions.ConfidentialConfig != nil { + if s.sandboxOptions != nil && s.sandboxOptions.ConfidentialConfig != nil { uvmReferenceInfoEncoded, err := vmutils.ParseUVMReferenceInfo( ctx, vmutils.DefaultLCOWOSBootFilesPath(), @@ -124,17 +140,15 @@ func (s *Service) startSandboxInternal(ctx context.Context, sandboxID string) (* // platformInternal is the implementation for Platform. // -// It returns the guest OS and CPU architecture for the running sandbox. -// An error is returned if the sandbox is not currently in the running state. -func (s *Service) platformInternal(ctx context.Context, sandboxID string) (*sandbox.PlatformResponse, error) { - if s.sandboxID != sandboxID { - return nil, fmt.Errorf("failed to get platform: sandbox ID mismatch, expected %s, got %s", s.sandboxID, sandboxID) +// It returns the guest OS and CPU architecture for the sandbox. +// An error is returned if the sandbox is not currently in the created state. +func (s *Service) platformInternal(_ context.Context, request *sandbox.PlatformRequest) (*sandbox.PlatformResponse, error) { + if s.sandboxID != request.SandboxID { + return nil, fmt.Errorf("failed to get platform: sandbox ID mismatch, expected %s, got %s", s.sandboxID, request.SandboxID) } - ctx, _ = log.WithContext(ctx, logrus.WithField(logfields.UVMID, sandboxID)) - - if s.vmController.State() != vm.StateRunning { - return nil, fmt.Errorf("failed to get platform: sandbox is not running (state: %s)", s.vmController.State()) + if s.vmController.State() == vm.StateNotCreated { + return nil, fmt.Errorf("failed to get platform: sandbox has not been created (state: %s)", s.vmController.State()) } return &sandbox.PlatformResponse{ @@ -148,59 +162,49 @@ func (s *Service) platformInternal(ctx context.Context, sandboxID string) (*sand // stopSandboxInternal is the implementation for StopSandbox. // // It terminates the VM and performs any cleanup, if needed. -func (s *Service) stopSandboxInternal(ctx context.Context, sandboxID string) (*sandbox.StopSandboxResponse, error) { - if s.sandboxID != sandboxID { - return nil, fmt.Errorf("failed to stop sandbox: sandbox ID mismatch, expected %s, got %s", s.sandboxID, sandboxID) +func (s *Service) stopSandboxInternal(ctx context.Context, request *sandbox.StopSandboxRequest) (*sandbox.StopSandboxResponse, error) { + if s.sandboxID != request.SandboxID { + return nil, fmt.Errorf("failed to stop sandbox: sandbox ID mismatch, expected %s, got %s", s.sandboxID, request.SandboxID) } - ctx, _ = log.WithContext(ctx, logrus.WithField(logfields.UVMID, sandboxID)) - err := s.vmController.TerminateVM(ctx) if err != nil { return nil, fmt.Errorf("failed to terminate VM: %w", err) } - if s.vmHcsDocument.VirtualMachine.GuestState != nil { - if err := os.Remove(s.vmHcsDocument.VirtualMachine.GuestState.GuestStateFilePath); err != nil { - log.G(ctx).WithField("VMGS File", s.vmHcsDocument.VirtualMachine.GuestState.GuestStateFilePath). - WithError(err).Error("failed to remove VMGS file") - } - } - return &sandbox.StopSandboxResponse{}, nil } // waitSandboxInternal is the implementation for WaitSandbox. // -// It blocks until the underlying VM has stopped, then maps the stopped status +// It blocks until the underlying VM has been terminated, then maps the exit status // to a sandbox exit code. -func (s *Service) waitSandboxInternal(ctx context.Context, sandboxID string) (*sandbox.WaitSandboxResponse, error) { - if s.sandboxID != sandboxID { - return nil, fmt.Errorf("failed to wait for sandbox: sandbox ID mismatch, expected %s, got %s", s.sandboxID, sandboxID) +func (s *Service) waitSandboxInternal(ctx context.Context, request *sandbox.WaitSandboxRequest) (*sandbox.WaitSandboxResponse, error) { + if s.sandboxID != request.SandboxID { + return nil, fmt.Errorf("failed to wait for sandbox: sandbox ID mismatch, expected %s, got %s", s.sandboxID, request.SandboxID) } - ctx, _ = log.WithContext(ctx, logrus.WithField(logfields.UVMID, sandboxID)) - - // Wait for the VM to stop, then return the exit code. + // Wait for the VM to be terminated, then return the exit code. + // This is a blocking call that will wait until the VM is stopped. err := s.vmController.Wait(ctx) if err != nil { return nil, fmt.Errorf("failed to wait for VM: %w", err) } - stoppedStatus, err := s.vmController.StoppedStatus() + exitStatus, err := s.vmController.ExitStatus() if err != nil { - return nil, fmt.Errorf("failed to get sandbox stopped status: %w", err) + return nil, fmt.Errorf("failed to get sandbox exit status: %w", err) } - exitStatus := 0 + exitStatusCode := 0 // If there was an exit error, set a non-zero exit status. - if stoppedStatus.Err != nil { - exitStatus = int(windows.ERROR_INTERNAL_ERROR) + if exitStatus.Err != nil { + exitStatusCode = int(windows.ERROR_INTERNAL_ERROR) } return &sandbox.WaitSandboxResponse{ - ExitStatus: uint32(exitStatus), - ExitedAt: timestamppb.New(stoppedStatus.StoppedTime), + ExitStatus: uint32(exitStatusCode), + ExitedAt: timestamppb.New(exitStatus.StoppedTime), }, nil } @@ -209,38 +213,37 @@ func (s *Service) waitSandboxInternal(ctx context.Context, sandboxID string) (*s // It synthesizes a status response from the current vmController state. // When verbose is true, the response may be extended with additional // diagnostic information. -func (s *Service) sandboxStatusInternal(_ context.Context, sandboxID string, verbose bool) (*sandbox.SandboxStatusResponse, error) { - if s.sandboxID != sandboxID { - return nil, fmt.Errorf("failed to get sandbox status: sandbox ID mismatch, expected %s, got %s", s.sandboxID, sandboxID) +func (s *Service) sandboxStatusInternal(_ context.Context, request *sandbox.SandboxStatusRequest) (*sandbox.SandboxStatusResponse, error) { + if s.sandboxID != request.SandboxID { + return nil, fmt.Errorf("failed to get sandbox status: sandbox ID mismatch, expected %s, got %s", s.sandboxID, request.SandboxID) } resp := &sandbox.SandboxStatusResponse{ - SandboxID: sandboxID, - State: SandboxStateNotReady, + SandboxID: request.SandboxID, } - if s.vmController.State() == vm.StateNotCreated || s.vmController.State() == vm.StateCreated { + switch vmState := s.vmController.State(); vmState { + case vm.StateNotCreated, vm.StateCreated, vm.StateInvalid: + // VM has not started yet or is in invalid state; return the default not-ready response. + resp.State = SandboxStateNotReady return resp, nil - } - - resp.CreatedAt = timestamppb.New(s.vmController.StartTime()) - - if s.vmController.State() == vm.StateRunning { + case vm.StateRunning: + // VM is running, so we can report the created time and ready state. resp.State = SandboxStateReady - } - - if s.vmController.State() == vm.StateStopped { - stoppedStatus, err := s.vmController.StoppedStatus() + resp.CreatedAt = timestamppb.New(s.vmController.StartTime()) + case vm.StateTerminated: + // VM has stopped, so we can report the created time, exited time, and not-ready state. + resp.State = SandboxStateNotReady + resp.CreatedAt = timestamppb.New(s.vmController.StartTime()) + stoppedStatus, err := s.vmController.ExitStatus() if err != nil { return nil, fmt.Errorf("failed to get sandbox stopped status: %w", err) } resp.ExitedAt = timestamppb.New(stoppedStatus.StoppedTime) } - if verbose { - // Add compat info and any other detail - // resp.Info map[string]string - // resp.Extra any + if request.Verbose { //nolint:staticcheck + // TODO: Add compat info and any other details. } return resp, nil @@ -249,7 +252,7 @@ func (s *Service) sandboxStatusInternal(_ context.Context, sandboxID string, ver // pingSandboxInternal is the implementation for PingSandbox. // // Ping is not yet implemented for this shim. -func (s *Service) pingSandboxInternal(_ context.Context, _ string) (*sandbox.PingResponse, error) { +func (s *Service) pingSandboxInternal(_ context.Context, _ *sandbox.PingRequest) (*sandbox.PingResponse, error) { // This functionality is not yet applicable for this shim. // Best scenario, we can return true if the VM is running. return nil, errdefs.ErrNotImplemented @@ -259,20 +262,27 @@ func (s *Service) pingSandboxInternal(_ context.Context, _ string) (*sandbox.Pin // a shutdown request from containerd. // // The sandbox must already be in the stopped state before shutdown is accepted. -func (s *Service) shutdownSandboxInternal(ctx context.Context, sandboxID string) (*sandbox.ShutdownSandboxResponse, error) { - if sandboxID != s.sandboxID { - return &sandbox.ShutdownSandboxResponse{}, fmt.Errorf("failed to shutdown sandbox: sandbox ID mismatch, expected %s, got %s", s.sandboxID, sandboxID) +func (s *Service) shutdownSandboxInternal(ctx context.Context, request *sandbox.ShutdownSandboxRequest) (*sandbox.ShutdownSandboxResponse, error) { + if s.sandboxID != request.SandboxID { + return &sandbox.ShutdownSandboxResponse{}, fmt.Errorf("failed to shutdown sandbox: sandbox ID mismatch, expected %s, got %s", s.sandboxID, request.SandboxID) } - if s.vmController.State() != vm.StateStopped { - return &sandbox.ShutdownSandboxResponse{}, fmt.Errorf("failed to shutdown sandbox: sandbox is not stopped (state: %s)", s.vmController.State()) + // Ensure the VM is terminated. If the VM is already terminated, + // TerminateVM is a no-op, so this is safe to call regardless of the current VM state. + if state := s.vmController.State(); state != vm.StateTerminated { + err := s.vmController.TerminateVM(ctx) + if err != nil { + // Just log the error instead of returning it since this is a best effort cleanup. + log.G(ctx).WithError(err).Error("failed to terminate VM during shutdown") + } } - ctx, _ = log.WithContext(ctx, logrus.WithField(logfields.UVMID, sandboxID)) - - // Use a goroutine to wait for the context to be done. - // This allows us to return the response of the shutdown call prior to - // the server being shut down. + // With gRPC/TTRPC, the transport later creates a child context for each incoming request, + // and cancels that context when the handler returns or the client-side connection is dropped. + // For the shutdown request, if we call shutdown.Shutdown() directly, the shim process exits + // prior to the response being sent back to containerd, which causes the shutdown call to fail. + // Therefore, use a goroutine to wait for the RPC context to be done after which + // we can safely call shutdown.Shutdown() without risking an early process exit. go func() { <-ctx.Done() time.Sleep(20 * time.Millisecond) // tiny cushion to avoid edge races @@ -286,9 +296,9 @@ func (s *Service) shutdownSandboxInternal(ctx context.Context, sandboxID string) // sandboxMetricsInternal is the implementation for SandboxMetrics. // // It collects and returns runtime statistics from the vmController. -func (s *Service) sandboxMetricsInternal(ctx context.Context, sandboxID string) (*sandbox.SandboxMetricsResponse, error) { - if sandboxID != s.sandboxID { - return &sandbox.SandboxMetricsResponse{}, fmt.Errorf("failed to get sandbox metrics: sandbox ID mismatch, expected %s, got %s", s.sandboxID, sandboxID) +func (s *Service) sandboxMetricsInternal(ctx context.Context, request *sandbox.SandboxMetricsRequest) (*sandbox.SandboxMetricsResponse, error) { + if s.sandboxID != request.SandboxID { + return &sandbox.SandboxMetricsResponse{}, fmt.Errorf("failed to get sandbox metrics: sandbox ID mismatch, expected %s, got %s", s.sandboxID, request.SandboxID) } stats, err := s.vmController.Stats(ctx) @@ -304,7 +314,7 @@ func (s *Service) sandboxMetricsInternal(ctx context.Context, sandboxID string) return &sandbox.SandboxMetricsResponse{ Metrics: &types.Metric{ Timestamp: timestamppb.Now(), - ID: sandboxID, + ID: request.SandboxID, Data: typeurl.MarshalProto(anyStat), }, }, nil diff --git a/cmd/containerd-shim-lcow-v1/service/service_shimdiag.go b/cmd/containerd-shim-lcow-v2/service/service_shimdiag.go similarity index 61% rename from cmd/containerd-shim-lcow-v1/service/service_shimdiag.go rename to cmd/containerd-shim-lcow-v2/service/service_shimdiag.go index 74c240ce6a..503982d59e 100644 --- a/cmd/containerd-shim-lcow-v1/service/service_shimdiag.go +++ b/cmd/containerd-shim-lcow-v2/service/service_shimdiag.go @@ -7,6 +7,7 @@ import ( "os" "strings" + "github.com/Microsoft/hcsshim/internal/logfields" "github.com/Microsoft/hcsshim/internal/oc" "github.com/Microsoft/hcsshim/internal/shimdiag" @@ -18,61 +19,65 @@ import ( var _ shimdiag.ShimDiagService = &Service{} // DiagExecInHost executes a process in the host namespace for diagnostic purposes. +// This method is part of the instrumentation layer and business logic is included in diagExecInHostInternal. func (s *Service) DiagExecInHost(ctx context.Context, request *shimdiag.ExecProcessRequest) (resp *shimdiag.ExecProcessResponse, err error) { ctx, span := oc.StartSpan(ctx, "DiagExecInHost") defer span.End() defer func() { oc.SetSpanStatus(span, err) }() span.AddAttributes( - trace.StringAttribute("sandbox-id", s.sandboxID), - trace.StringAttribute("args", strings.Join(request.Args, " ")), - trace.StringAttribute("workdir", request.Workdir), - trace.BoolAttribute("terminal", request.Terminal), - trace.StringAttribute("stdin", request.Stdin), - trace.StringAttribute("stdout", request.Stdout), - trace.StringAttribute("stderr", request.Stderr)) + trace.StringAttribute(logfields.SandboxID, s.sandboxID), + trace.StringAttribute(logfields.Args, strings.Join(request.Args, " ")), + trace.StringAttribute(logfields.Workdir, request.Workdir), + trace.BoolAttribute(logfields.Terminal, request.Terminal), + trace.StringAttribute(logfields.Stdin, request.Stdin), + trace.StringAttribute(logfields.Stdout, request.Stdout), + trace.StringAttribute(logfields.Stderr, request.Stderr)) r, e := s.diagExecInHostInternal(ctx, request) return r, errgrpc.ToGRPC(e) } // DiagTasks returns information about all tasks in the shim. +// This method is part of the instrumentation layer and business logic is included in diagTasksInternal. func (s *Service) DiagTasks(ctx context.Context, request *shimdiag.TasksRequest) (resp *shimdiag.TasksResponse, err error) { ctx, span := oc.StartSpan(ctx, "DiagTasks") defer span.End() defer func() { oc.SetSpanStatus(span, err) }() span.AddAttributes( - trace.StringAttribute("sandbox-id", s.sandboxID), - trace.BoolAttribute("execs", request.Execs)) + trace.StringAttribute(logfields.SandboxID, s.sandboxID), + trace.BoolAttribute(logfields.Execs, request.Execs)) r, e := s.diagTasksInternal(ctx, request) return r, errgrpc.ToGRPC(e) } // DiagShare shares a directory from the host into the sandbox. +// This method is part of the instrumentation layer and business logic is included in diagShareInternal. func (s *Service) DiagShare(ctx context.Context, request *shimdiag.ShareRequest) (resp *shimdiag.ShareResponse, err error) { ctx, span := oc.StartSpan(ctx, "DiagShare") defer span.End() defer func() { oc.SetSpanStatus(span, err) }() span.AddAttributes( - trace.StringAttribute("sandbox-id", s.sandboxID), - trace.StringAttribute("host-path", request.HostPath), - trace.StringAttribute("uvm-path", request.UvmPath), - trace.BoolAttribute("readonly", request.ReadOnly)) + trace.StringAttribute(logfields.SandboxID, s.sandboxID), + trace.StringAttribute(logfields.HostPath, request.HostPath), + trace.StringAttribute(logfields.UVMPath, request.UvmPath), + trace.BoolAttribute(logfields.ReadOnly, request.ReadOnly)) r, e := s.diagShareInternal(ctx, request) return r, errgrpc.ToGRPC(e) } // DiagStacks returns the stack traces of all goroutines in the shim. +// This method is part of the instrumentation layer and business logic is included in diagStacksInternal. func (s *Service) DiagStacks(ctx context.Context, request *shimdiag.StacksRequest) (resp *shimdiag.StacksResponse, err error) { ctx, span := oc.StartSpan(ctx, "DiagStacks") defer span.End() defer func() { oc.SetSpanStatus(span, err) }() - span.AddAttributes(trace.StringAttribute("sandbox-id", s.sandboxID)) + span.AddAttributes(trace.StringAttribute(logfields.SandboxID, s.sandboxID)) r, e := s.diagStacksInternal(ctx, request) return r, errgrpc.ToGRPC(e) @@ -80,11 +85,11 @@ func (s *Service) DiagStacks(ctx context.Context, request *shimdiag.StacksReques // DiagPid returns the process ID (PID) of the shim for diagnostic purposes. func (s *Service) DiagPid(ctx context.Context, _ *shimdiag.PidRequest) (resp *shimdiag.PidResponse, err error) { - ctx, span := oc.StartSpan(ctx, "DiagPid") + _, span := oc.StartSpan(ctx, "DiagPid") defer span.End() defer func() { oc.SetSpanStatus(span, err) }() - span.AddAttributes(trace.StringAttribute("sandbox-id", s.sandboxID)) + span.AddAttributes(trace.StringAttribute(logfields.SandboxID, s.sandboxID)) return &shimdiag.PidResponse{ Pid: int32(os.Getpid()), diff --git a/cmd/containerd-shim-lcow-v1/service/service_shimdiag_internal.go b/cmd/containerd-shim-lcow-v2/service/service_shimdiag_internal.go similarity index 76% rename from cmd/containerd-shim-lcow-v1/service/service_shimdiag_internal.go rename to cmd/containerd-shim-lcow-v2/service/service_shimdiag_internal.go index 1ef881351a..3a6dc34b67 100644 --- a/cmd/containerd-shim-lcow-v1/service/service_shimdiag_internal.go +++ b/cmd/containerd-shim-lcow-v2/service/service_shimdiag_internal.go @@ -6,7 +6,6 @@ import ( "context" "fmt" - "github.com/Microsoft/hcsshim/internal/controller/vm" "github.com/Microsoft/hcsshim/internal/shimdiag" "github.com/containerd/errdefs" ) @@ -15,14 +14,6 @@ import ( // // It is used to create an exec session into the hosting UVM. func (s *Service) diagExecInHostInternal(ctx context.Context, request *shimdiag.ExecProcessRequest) (*shimdiag.ExecProcessResponse, error) { - if request.Terminal && request.Stderr != "" { - return nil, fmt.Errorf("if using terminal, stderr must be empty: %w", errdefs.ErrFailedPrecondition) - } - - if s.vmController.State() != vm.StateRunning { - return nil, fmt.Errorf("cannot exec in host when vm is not running: %w", errdefs.ErrFailedPrecondition) - } - ec, err := s.vmController.ExecIntoHost(ctx, request) if err != nil { return nil, fmt.Errorf("failed to exec into host: %w", err) diff --git a/cmd/containerd-shim-lcow-v1/service/service_task.go b/cmd/containerd-shim-lcow-v2/service/service_task.go similarity index 52% rename from cmd/containerd-shim-lcow-v1/service/service_task.go rename to cmd/containerd-shim-lcow-v2/service/service_task.go index c75b37076b..e41eeb330b 100644 --- a/cmd/containerd-shim-lcow-v1/service/service_task.go +++ b/cmd/containerd-shim-lcow-v2/service/service_task.go @@ -5,6 +5,7 @@ package service import ( "context" + "github.com/Microsoft/hcsshim/internal/logfields" "github.com/Microsoft/hcsshim/internal/oc" "github.com/containerd/containerd/api/runtime/task/v3" @@ -17,304 +18,321 @@ import ( var _ task.TTRPCTaskService = &Service{} // State returns the current state of a task or process. +// This method is part of the instrumentation layer and business logic is included in stateInternal. func (s *Service) State(ctx context.Context, request *task.StateRequest) (resp *task.StateResponse, err error) { ctx, span := oc.StartSpan(ctx, "State") defer span.End() defer func() { if resp != nil { span.AddAttributes( - trace.StringAttribute("status", resp.Status.String()), - trace.Int64Attribute("exit-status", int64(resp.ExitStatus)), - trace.StringAttribute("exited-at", resp.ExitedAt.String())) + trace.StringAttribute(logfields.Status, resp.Status.String()), + trace.Int64Attribute(logfields.ExitStatus, int64(resp.ExitStatus)), + trace.StringAttribute(logfields.ExitedAt, resp.ExitedAt.String())) } oc.SetSpanStatus(span, err) }() span.AddAttributes( - trace.StringAttribute("sandbox-id", s.sandboxID), - trace.StringAttribute("id", request.ID), - trace.StringAttribute("exec-id", request.ExecID)) + trace.StringAttribute(logfields.SandboxID, s.sandboxID), + trace.StringAttribute(logfields.ID, request.ID), + trace.StringAttribute(logfields.ExecSpanID, request.ExecID)) r, e := s.stateInternal(ctx, request) return r, errgrpc.ToGRPC(e) } // Create creates a new task. +// This method is part of the instrumentation layer and business logic is included in createInternal. func (s *Service) Create(ctx context.Context, request *task.CreateTaskRequest) (resp *task.CreateTaskResponse, err error) { ctx, span := oc.StartSpan(ctx, "Create") defer span.End() defer func() { if resp != nil { - span.AddAttributes(trace.Int64Attribute("pid", int64(resp.Pid))) + span.AddAttributes(trace.Int64Attribute(logfields.ProcessID, int64(resp.Pid))) } oc.SetSpanStatus(span, err) }() span.AddAttributes( - trace.StringAttribute("sandbox-id", s.sandboxID), - trace.StringAttribute("id", request.ID), - trace.StringAttribute("bundle", request.Bundle), - trace.BoolAttribute("terminal", request.Terminal), - trace.StringAttribute("stdin", request.Stdin), - trace.StringAttribute("stdout", request.Stdout), - trace.StringAttribute("stderr", request.Stderr), - trace.StringAttribute("checkpoint", request.Checkpoint), - trace.StringAttribute("parent-checkpoint", request.ParentCheckpoint)) + trace.StringAttribute(logfields.SandboxID, s.sandboxID), + trace.StringAttribute(logfields.ID, request.ID), + trace.StringAttribute(logfields.Bundle, request.Bundle), + trace.BoolAttribute(logfields.Terminal, request.Terminal), + trace.StringAttribute(logfields.Stdin, request.Stdin), + trace.StringAttribute(logfields.Stdout, request.Stdout), + trace.StringAttribute(logfields.Stderr, request.Stderr), + trace.StringAttribute(logfields.Checkpoint, request.Checkpoint), + trace.StringAttribute(logfields.ParentCheckpoint, request.ParentCheckpoint)) r, e := s.createInternal(ctx, request) return r, errgrpc.ToGRPC(e) } // Start starts a previously created task. +// This method is part of the instrumentation layer and business logic is included in startInternal. func (s *Service) Start(ctx context.Context, request *task.StartRequest) (resp *task.StartResponse, err error) { ctx, span := oc.StartSpan(ctx, "Start") defer span.End() defer func() { if resp != nil { - span.AddAttributes(trace.Int64Attribute("pid", int64(resp.Pid))) + span.AddAttributes(trace.Int64Attribute(logfields.ProcessID, int64(resp.Pid))) } oc.SetSpanStatus(span, err) }() span.AddAttributes( - trace.StringAttribute("sandbox-id", s.sandboxID), - trace.StringAttribute("id", request.ID), - trace.StringAttribute("exec-id", request.ExecID)) + trace.StringAttribute(logfields.SandboxID, s.sandboxID), + trace.StringAttribute(logfields.ID, request.ID), + trace.StringAttribute(logfields.ExecSpanID, request.ExecID)) r, e := s.startInternal(ctx, request) return r, errgrpc.ToGRPC(e) } // Delete deletes a task and returns its exit status. +// This method is part of the instrumentation layer and business logic is included in deleteInternal. func (s *Service) Delete(ctx context.Context, request *task.DeleteRequest) (resp *task.DeleteResponse, err error) { ctx, span := oc.StartSpan(ctx, "Delete") defer span.End() defer func() { if resp != nil { span.AddAttributes( - trace.Int64Attribute("pid", int64(resp.Pid)), - trace.Int64Attribute("exit-status", int64(resp.ExitStatus)), - trace.StringAttribute("exited-at", resp.ExitedAt.String())) + trace.Int64Attribute(logfields.ProcessID, int64(resp.Pid)), + trace.Int64Attribute(logfields.ExitStatus, int64(resp.ExitStatus)), + trace.StringAttribute(logfields.ExitedAt, resp.ExitedAt.String())) } oc.SetSpanStatus(span, err) }() span.AddAttributes( - trace.StringAttribute("sandbox-id", s.sandboxID), - trace.StringAttribute("id", request.ID), - trace.StringAttribute("exec-id", request.ExecID)) + trace.StringAttribute(logfields.SandboxID, s.sandboxID), + trace.StringAttribute(logfields.ID, request.ID), + trace.StringAttribute(logfields.ExecSpanID, request.ExecID)) r, e := s.deleteInternal(ctx, request) return r, errgrpc.ToGRPC(e) } // Pids returns all process IDs for a task. +// This method is part of the instrumentation layer and business logic is included in pidsInternal. func (s *Service) Pids(ctx context.Context, request *task.PidsRequest) (resp *task.PidsResponse, err error) { ctx, span := oc.StartSpan(ctx, "Pids") defer span.End() defer func() { oc.SetSpanStatus(span, err) }() span.AddAttributes( - trace.StringAttribute("sandbox-id", s.sandboxID), - trace.StringAttribute("id", request.ID)) + trace.StringAttribute(logfields.SandboxID, s.sandboxID), + trace.StringAttribute(logfields.ID, request.ID)) r, e := s.pidsInternal(ctx, request) return r, errgrpc.ToGRPC(e) } // Pause pauses a task. +// This method is part of the instrumentation layer and business logic is included in pauseInternal. func (s *Service) Pause(ctx context.Context, request *task.PauseRequest) (resp *emptypb.Empty, err error) { ctx, span := oc.StartSpan(ctx, "Pause") defer span.End() defer func() { oc.SetSpanStatus(span, err) }() span.AddAttributes( - trace.StringAttribute("sandbox-id", s.sandboxID), - trace.StringAttribute("id", request.ID)) + trace.StringAttribute(logfields.SandboxID, s.sandboxID), + trace.StringAttribute(logfields.ID, request.ID)) r, e := s.pauseInternal(ctx, request) return r, errgrpc.ToGRPC(e) } // Resume resumes a previously paused task. +// This method is part of the instrumentation layer and business logic is included in resumeInternal. func (s *Service) Resume(ctx context.Context, request *task.ResumeRequest) (resp *emptypb.Empty, err error) { ctx, span := oc.StartSpan(ctx, "Resume") defer span.End() defer func() { oc.SetSpanStatus(span, err) }() span.AddAttributes( - trace.StringAttribute("sandbox-id", s.sandboxID), - trace.StringAttribute("id", request.ID)) + trace.StringAttribute(logfields.SandboxID, s.sandboxID), + trace.StringAttribute(logfields.ID, request.ID)) r, e := s.resumeInternal(ctx, request) return r, errgrpc.ToGRPC(e) } // Checkpoint creates a checkpoint of a task. +// This method is part of the instrumentation layer and business logic is included in checkpointInternal. func (s *Service) Checkpoint(ctx context.Context, request *task.CheckpointTaskRequest) (resp *emptypb.Empty, err error) { ctx, span := oc.StartSpan(ctx, "Checkpoint") defer span.End() defer func() { oc.SetSpanStatus(span, err) }() span.AddAttributes( - trace.StringAttribute("sandbox-id", s.sandboxID), - trace.StringAttribute("id", request.ID), - trace.StringAttribute("path", request.Path)) + trace.StringAttribute(logfields.SandboxID, s.sandboxID), + trace.StringAttribute(logfields.ID, request.ID), + trace.StringAttribute(logfields.Path, request.Path)) r, e := s.checkpointInternal(ctx, request) return r, errgrpc.ToGRPC(e) } // Kill sends a signal to a task or process. +// This method is part of the instrumentation layer and business logic is included in killInternal. func (s *Service) Kill(ctx context.Context, request *task.KillRequest) (resp *emptypb.Empty, err error) { ctx, span := oc.StartSpan(ctx, "Kill") defer span.End() defer func() { oc.SetSpanStatus(span, err) }() span.AddAttributes( - trace.StringAttribute("sandbox-id", s.sandboxID), - trace.StringAttribute("id", request.ID), - trace.StringAttribute("exec-id", request.ExecID), - trace.Int64Attribute("signal", int64(request.Signal)), - trace.BoolAttribute("all", request.All)) + trace.StringAttribute(logfields.SandboxID, s.sandboxID), + trace.StringAttribute(logfields.ID, request.ID), + trace.StringAttribute(logfields.ExecSpanID, request.ExecID), + trace.Int64Attribute(logfields.Signal, int64(request.Signal)), + trace.BoolAttribute(logfields.All, request.All)) r, e := s.killInternal(ctx, request) return r, errgrpc.ToGRPC(e) } // Exec executes an additional process inside a task. +// This method is part of the instrumentation layer and business logic is included in execInternal. func (s *Service) Exec(ctx context.Context, request *task.ExecProcessRequest) (resp *emptypb.Empty, err error) { ctx, span := oc.StartSpan(ctx, "Exec") defer span.End() defer func() { oc.SetSpanStatus(span, err) }() span.AddAttributes( - trace.StringAttribute("sandbox-id", s.sandboxID), - trace.StringAttribute("id", request.ID), - trace.StringAttribute("exec-id", request.ExecID), - trace.BoolAttribute("terminal", request.Terminal), - trace.StringAttribute("stdin", request.Stdin), - trace.StringAttribute("stdout", request.Stdout), - trace.StringAttribute("stderr", request.Stderr)) + trace.StringAttribute(logfields.SandboxID, s.sandboxID), + trace.StringAttribute(logfields.ID, request.ID), + trace.StringAttribute(logfields.ExecSpanID, request.ExecID), + trace.BoolAttribute(logfields.Terminal, request.Terminal), + trace.StringAttribute(logfields.Stdin, request.Stdin), + trace.StringAttribute(logfields.Stdout, request.Stdout), + trace.StringAttribute(logfields.Stderr, request.Stderr)) r, e := s.execInternal(ctx, request) return r, errgrpc.ToGRPC(e) } // ResizePty resizes the terminal of a process. +// This method is part of the instrumentation layer and business logic is included in resizePtyInternal. func (s *Service) ResizePty(ctx context.Context, request *task.ResizePtyRequest) (resp *emptypb.Empty, err error) { ctx, span := oc.StartSpan(ctx, "ResizePty") defer span.End() defer func() { oc.SetSpanStatus(span, err) }() span.AddAttributes( - trace.StringAttribute("sandbox-id", s.sandboxID), - trace.StringAttribute("id", request.ID), - trace.StringAttribute("exec-id", request.ExecID), - trace.Int64Attribute("width", int64(request.Width)), - trace.Int64Attribute("height", int64(request.Height))) + trace.StringAttribute(logfields.SandboxID, s.sandboxID), + trace.StringAttribute(logfields.ID, request.ID), + trace.StringAttribute(logfields.ExecSpanID, request.ExecID), + trace.Int64Attribute(logfields.Width, int64(request.Width)), + trace.Int64Attribute(logfields.Height, int64(request.Height))) r, e := s.resizePtyInternal(ctx, request) return r, errgrpc.ToGRPC(e) } // CloseIO closes the IO for a process. +// This method is part of the instrumentation layer and business logic is included in closeIOInternal. func (s *Service) CloseIO(ctx context.Context, request *task.CloseIORequest) (resp *emptypb.Empty, err error) { ctx, span := oc.StartSpan(ctx, "CloseIO") defer span.End() defer func() { oc.SetSpanStatus(span, err) }() span.AddAttributes( - trace.StringAttribute("sandbox-id", s.sandboxID), - trace.StringAttribute("id", request.ID), - trace.StringAttribute("exec-id", request.ExecID), - trace.BoolAttribute("stdin", request.Stdin)) + trace.StringAttribute(logfields.SandboxID, s.sandboxID), + trace.StringAttribute(logfields.ID, request.ID), + trace.StringAttribute(logfields.ExecSpanID, request.ExecID), + trace.BoolAttribute(logfields.Stdin, request.Stdin)) r, e := s.closeIOInternal(ctx, request) return r, errgrpc.ToGRPC(e) } // Update updates a running task with new resource constraints. +// This method is part of the instrumentation layer and business logic is included in updateInternal. func (s *Service) Update(ctx context.Context, request *task.UpdateTaskRequest) (resp *emptypb.Empty, err error) { ctx, span := oc.StartSpan(ctx, "Update") defer span.End() defer func() { oc.SetSpanStatus(span, err) }() span.AddAttributes( - trace.StringAttribute("sandbox-id", s.sandboxID), - trace.StringAttribute("id", request.ID)) + trace.StringAttribute(logfields.SandboxID, s.sandboxID), + trace.StringAttribute(logfields.ID, request.ID)) r, e := s.updateInternal(ctx, request) return r, errgrpc.ToGRPC(e) } // Wait waits for a task or process to exit. +// This method is part of the instrumentation layer and business logic is included in waitInternal. func (s *Service) Wait(ctx context.Context, request *task.WaitRequest) (resp *task.WaitResponse, err error) { ctx, span := oc.StartSpan(ctx, "Wait") defer span.End() defer func() { if resp != nil { span.AddAttributes( - trace.Int64Attribute("exit-status", int64(resp.ExitStatus)), - trace.StringAttribute("exited-at", resp.ExitedAt.String())) + trace.Int64Attribute(logfields.ExitStatus, int64(resp.ExitStatus)), + trace.StringAttribute(logfields.ExitedAt, resp.ExitedAt.String())) } oc.SetSpanStatus(span, err) }() span.AddAttributes( - trace.StringAttribute("sandbox-id", s.sandboxID), - trace.StringAttribute("id", request.ID), - trace.StringAttribute("exec-id", request.ExecID)) + trace.StringAttribute(logfields.SandboxID, s.sandboxID), + trace.StringAttribute(logfields.ID, request.ID), + trace.StringAttribute(logfields.ExecSpanID, request.ExecID)) r, e := s.waitInternal(ctx, request) return r, errgrpc.ToGRPC(e) } // Stats returns resource usage statistics for a task. +// This method is part of the instrumentation layer and business logic is included in statsInternal. func (s *Service) Stats(ctx context.Context, request *task.StatsRequest) (resp *task.StatsResponse, err error) { ctx, span := oc.StartSpan(ctx, "Stats") defer span.End() defer func() { oc.SetSpanStatus(span, err) }() span.AddAttributes( - trace.StringAttribute("sandbox-id", s.sandboxID), - trace.StringAttribute("id", request.ID)) + trace.StringAttribute(logfields.SandboxID, s.sandboxID), + trace.StringAttribute(logfields.ID, request.ID)) r, e := s.statsInternal(ctx, request) return r, errgrpc.ToGRPC(e) } // Connect reconnects to a running task. +// This method is part of the instrumentation layer and business logic is included in connectInternal. func (s *Service) Connect(ctx context.Context, request *task.ConnectRequest) (resp *task.ConnectResponse, err error) { ctx, span := oc.StartSpan(ctx, "Connect") defer span.End() defer func() { if resp != nil { span.AddAttributes( - trace.Int64Attribute("shim-pid", int64(resp.ShimPid)), - trace.Int64Attribute("task-pid", int64(resp.TaskPid)), - trace.StringAttribute("version", resp.Version)) + trace.Int64Attribute(logfields.ShimPid, int64(resp.ShimPid)), + trace.Int64Attribute(logfields.TaskPid, int64(resp.TaskPid)), + trace.StringAttribute(logfields.Version, resp.Version)) } oc.SetSpanStatus(span, err) }() span.AddAttributes( - trace.StringAttribute("sandbox-id", s.sandboxID), - trace.StringAttribute("id", request.ID)) + trace.StringAttribute(logfields.SandboxID, s.sandboxID), + trace.StringAttribute(logfields.ID, request.ID)) r, e := s.connectInternal(ctx, request) return r, errgrpc.ToGRPC(e) } // Shutdown gracefully shuts down the Service. +// This method is part of the instrumentation layer and business logic is included in shutdownInternal. func (s *Service) Shutdown(ctx context.Context, request *task.ShutdownRequest) (resp *emptypb.Empty, err error) { ctx, span := oc.StartSpan(ctx, "Shutdown") defer span.End() defer func() { oc.SetSpanStatus(span, err) }() span.AddAttributes( - trace.StringAttribute("sandbox-id", s.sandboxID), - trace.StringAttribute("id", request.ID)) + trace.StringAttribute(logfields.SandboxID, s.sandboxID), + trace.StringAttribute(logfields.ID, request.ID)) r, e := s.shutdownInternal(ctx, request) return r, errgrpc.ToGRPC(e) diff --git a/cmd/containerd-shim-lcow-v1/service/service_task_internal.go b/cmd/containerd-shim-lcow-v2/service/service_task_internal.go similarity index 100% rename from cmd/containerd-shim-lcow-v1/service/service_task_internal.go rename to cmd/containerd-shim-lcow-v2/service/service_task_internal.go diff --git a/cmd/containerd-shim-lcow-v1/versioninfo.json b/cmd/containerd-shim-lcow-v2/versioninfo.json similarity index 88% rename from cmd/containerd-shim-lcow-v1/versioninfo.json rename to cmd/containerd-shim-lcow-v2/versioninfo.json index e450a4fe60..11316902d5 100644 --- a/cmd/containerd-shim-lcow-v1/versioninfo.json +++ b/cmd/containerd-shim-lcow-v2/versioninfo.json @@ -26,7 +26,7 @@ "InternalName": "", "LegalCopyright": "", "LegalTrademarks": "", - "OriginalFilename": "containerd-shim-lcow-v1.exe", + "OriginalFilename": "containerd-shim-lcow-v2.exe", "PrivateBuild": "", "ProductName": "lcow shim", "ProductVersion": "v1.0.0.0", @@ -39,6 +39,6 @@ } }, "IconPath": "", - "ManifestPath": "containerd-shim-lcow-v1.exe.manifest" + "ManifestPath": "containerd-shim-lcow-v2.exe.manifest" } diff --git a/internal/builder/vm/lcow/specs.go b/internal/builder/vm/lcow/specs.go index ff67ab6a74..0b28d151c6 100644 --- a/internal/builder/vm/lcow/specs.go +++ b/internal/builder/vm/lcow/specs.go @@ -24,11 +24,13 @@ import ( "github.com/sirupsen/logrus" ) +// shimName is the name of lcow shim implementation. +const shimName = "containerd-shim-lcow-v2" + // BuildSandboxConfig is the primary entry point for generating the HCS ComputeSystem // document used to create an LCOW Utility VM. func BuildSandboxConfig( ctx context.Context, - owner string, bundlePath string, opts *runhcsoptions.Options, spec *vm.Spec, @@ -240,7 +242,7 @@ func BuildSandboxConfig( // Build the document. doc := &hcsschema.ComputeSystem{ - Owner: owner, + Owner: shimName, SchemaVersion: schema, // Terminate the UVM when the last handle is closed. // To support impactless updates this will need to be configurable. diff --git a/internal/builder/vm/lcow/specs_test.go b/internal/builder/vm/lcow/specs_test.go index 54a917b030..d232892d4c 100644 --- a/internal/builder/vm/lcow/specs_test.go +++ b/internal/builder/vm/lcow/specs_test.go @@ -50,7 +50,7 @@ func runTestCases(t *testing.T, ctx context.Context, defaultOpts *runhcsoptions. // Use a temp dir as bundlePath for confidential VM tests bundlePath := t.TempDir() - doc, sandboxOpts, err := BuildSandboxConfig(ctx, "test-owner", bundlePath, opts, spec) + doc, sandboxOpts, err := BuildSandboxConfig(ctx, bundlePath, opts, spec) if tt.wantErr { if err == nil { @@ -1990,7 +1990,7 @@ func TestBuildSandboxConfig_NUMA_OldWindows(t *testing.T) { validBootFilesPath := newBootFilesPath(t) - doc, _, err := BuildSandboxConfig(ctx, "test-owner", t.TempDir(), &runhcsoptions.Options{ + doc, _, err := BuildSandboxConfig(ctx, t.TempDir(), &runhcsoptions.Options{ SandboxPlatform: "linux/amd64", BootFilesRootPath: validBootFilesPath, }, &vm.Spec{ @@ -2018,7 +2018,7 @@ func TestBuildSandboxConfig_CPUClamping(t *testing.T) { hostCount := hostProcessorCount(t) requestedCount := hostCount * 2 - doc, _, err := BuildSandboxConfig(ctx, "test-owner", t.TempDir(), &runhcsoptions.Options{ + doc, _, err := BuildSandboxConfig(ctx, t.TempDir(), &runhcsoptions.Options{ SandboxPlatform: "linux/amd64", BootFilesRootPath: validBootFilesPath, }, &vm.Spec{ diff --git a/internal/controller/vm/doc.go b/internal/controller/vm/doc.go index af6373f282..304e117157 100644 --- a/internal/controller/vm/doc.go +++ b/internal/controller/vm/doc.go @@ -9,19 +9,44 @@ // // # Lifecycle // -// A VM progresses through the following states: +// A VM follows the state machine below. // -// [StateNotCreated] → [StateCreated] → [StateRunning] → [StateStopped] +// ┌─────────────────┐ +// │ StateNotCreated │ +// └────────┬────────┘ +// │ CreateVM ok +// ▼ +// ┌─────────────────┐ StartVM fails / +// │ StateCreated │──────── TerminateVM fails ──────┐ +// └──┬─────┬────────┘ │ +// │ │ StartVM ok ▼ +// │ ▼ ┌───────────────┐ +// │ ┌─────────────────┐ TerminateVM │ StateInvalid │ +// │ │ StateRunning │───── fails ──────►│ │ +// │ └────────┬────────┘ └───────┬───────┘ +// │ │ VM exits / │ TerminateVM ok +// TerminateVM ok │ TerminateVM ok │ +// │ ▼ ▼ +// │ ┌─────────────────────────────────────────────────┐ +// └─►│ StateTerminated │ +// └─────────────────────────────────────────────────┘ // -// - [StateNotCreated]: initial state after [NewController] is called. -// - [StateCreated]: after [Controller.CreateVM] succeeds; the VM process exists but has not started. -// - [StateRunning]: after [Controller.StartVM] succeeds; the guest OS is up and the -// Guest Compute Service (GCS) connection is established. -// - [StateStopped]: terminal state reached after the VM exits or [Controller.TerminateVM] is called. +// State descriptions: +// +// - [StateNotCreated]: initial state after [NewController] is called. +// - [StateCreated]: after [Controller.CreateVM] succeeds; the VM exists but has not started. +// - [StateRunning]: after [Controller.StartVM] succeeds; the guest OS is up and the +// Guest Compute Service (GCS) connection is established. +// - [StateTerminated]: terminal state reached after the VM exits naturally or +// [Controller.TerminateVM] completes successfully. +// - [StateInvalid]: error state entered when [Controller.StartVM] fails after the underlying +// HCS VM has already started, or when [Controller.TerminateVM] fails during uvm.Close +// (from either [StateCreated] or [StateRunning]). +// A VM in this state can only be cleaned up by calling [Controller.TerminateVM]. // // # Platform Variants // -// Certain behaviours differ between LCOW and WCOW guests and are implemented in +// Certain behaviors differ between LCOW and WCOW guests and are implemented in // platform-specific source files selected via build tags (default for lcow shim and "wcow" tag for wcow shim). // // # Usage diff --git a/internal/controller/vm/interface.go b/internal/controller/vm/interface.go index ac42b38722..3629e21e33 100644 --- a/internal/controller/vm/interface.go +++ b/internal/controller/vm/interface.go @@ -11,15 +11,11 @@ import ( "github.com/Microsoft/hcsshim/internal/protocol/guestresource" "github.com/Microsoft/hcsshim/internal/shimdiag" "github.com/Microsoft/hcsshim/internal/vm/guestmanager" - "github.com/Microsoft/hcsshim/internal/vm/vmmanager" "github.com/Microsoft/go-winio/pkg/guid" ) type Controller interface { - // Host returns the vm manager instance for this VM. - Host() *vmmanager.UtilityVM - // Guest returns the guest manager instance for this VM. Guest() *guestmanager.Guest @@ -35,12 +31,12 @@ type Controller interface { // guest-host communication, and transitions the VM to StateRunning. StartVM(context.Context, *StartOptions) error - // AddGuestDrivers adds the specified drivers to the VM. - AddGuestDrivers(ctx context.Context, drivers []string) error - // ExecIntoHost executes a command in the running UVM. ExecIntoHost(ctx context.Context, request *shimdiag.ExecProcessRequest) (int, error) + // DumpStacks dumps the GCS stacks associated with the VM. + DumpStacks(ctx context.Context) (string, error) + // Wait blocks until the VM exits or the context is cancelled. // It also waits for log output processing to complete. Wait(ctx context.Context) error @@ -50,36 +46,12 @@ type Controller interface { TerminateVM(context.Context) error // StartTime returns the timestamp when the VM was started. - // Returns zero value of time.time, if the VM is not in StateRunning or StateStopped. + // Returns zero value of time.time, if the VM is not in StateRunning or StateTerminated. StartTime() time.Time - // StoppedStatus returns information about the stopped VM, including when it - // stopped and any exit error. Returns an error if the VM is not in StateStopped. - StoppedStatus() (*StoppedStatus, error) -} - -// Handle is the subset of Controller that grants a consumer access to the -// VM's host and guest surfaces, and the ability to wait for the VM to exit. -// Accepting this narrow interface instead of the full Controller keeps callers -// (e.g. pod.Controller) decoupled from VM lifecycle management concerns they -// do not own. -type Handle interface { - // Host returns the vm manager instance for this VM. - // It can be used to interact with and modify the UVM host state. - Host() *vmmanager.UtilityVM - - // Guest returns the guest manager instance for this VM. - // It can be used to perform actions within the guest. - Guest() *guestmanager.Guest - - // AddGuestDrivers adds the specified drivers to the VM. - AddGuestDrivers(ctx context.Context, drivers []string) error - - // State returns the current VM state. - State() State - - // Wait blocks until the VM exits or the context is cancelled. - Wait(ctx context.Context) error + // ExitStatus returns information about the stopped VM, including when it + // stopped and any exit error. Returns an error if the VM is not in StateTerminated. + ExitStatus() (*ExitStatus, error) } // CreateOptions contains the configuration needed to create a new VM. @@ -105,8 +77,8 @@ type StartOptions struct { ConfidentialOptions *guestresource.ConfidentialOptions } -// StoppedStatus contains information about a stopped VM's final state. -type StoppedStatus struct { +// ExitStatus contains information about a stopped VM's final state. +type ExitStatus struct { // StoppedTime is the timestamp when the VM stopped. StoppedTime time.Time diff --git a/internal/controller/vm/state.go b/internal/controller/vm/state.go new file mode 100644 index 0000000000..6e98eb4ae1 --- /dev/null +++ b/internal/controller/vm/state.go @@ -0,0 +1,78 @@ +//go:build windows + +package vm + +// State represents the current state of the VM lifecycle. +// +// The normal progression is: +// +// StateNotCreated → StateCreated → StateRunning → StateTerminated +// +// If an unrecoverable error occurs during [Controller.StartVM] or +// [Controller.TerminateVM], the VM transitions to [StateInvalid] instead. +// A VM in [StateInvalid] can only be cleaned up via [Controller.TerminateVM]. +// +// Full state-transition table: +// +// Current State │ Trigger │ Next State +// ─────────────────┼────────────────────────────────────┼───────────────── +// StateNotCreated │ CreateVM succeeds │ StateCreated +// StateCreated │ StartVM succeeds │ StateRunning +// StateCreated │ TerminateVM succeeds │ StateTerminated +// StateCreated │ StartVM fails │ StateInvalid +// StateCreated │ TerminateVM fails │ StateInvalid +// StateRunning │ VM exits or TerminateVM succeeds │ StateTerminated +// StateRunning │ TerminateVM fails (uvm.Close) │ StateInvalid +// StateInvalid │ TerminateVM called │ StateTerminated +// StateTerminated │ (terminal — no further transitions)│ — +type State int32 + +const ( + // StateNotCreated indicates the VM has not been created yet. + // This is the initial state when a Controller is first instantiated via [NewController]. + // Valid transitions: StateNotCreated → StateCreated (via [Controller.CreateVM]) + StateNotCreated State = iota + + // StateCreated indicates the VM has been created but not yet started. + // Valid transitions: + // - StateCreated → StateRunning (via [Controller.StartVM], on success) + // - StateCreated → StateTerminated (via [Controller.TerminateVM], on success) + // - StateCreated → StateInvalid (via [Controller.StartVM], on failure) + StateCreated + + // StateRunning indicates the VM has been started and is running. + // The guest OS is up and the Guest Compute Service (GCS) connection is established. + // Valid transitions: + // - StateRunning → StateTerminated (VM exits naturally or [Controller.TerminateVM] succeeds) + // - StateRunning → StateInvalid ([Controller.TerminateVM] fails during uvm.Close) + StateRunning + + // StateTerminated indicates the VM has exited or been successfully terminated. + // This is a terminal state — once reached, no further state transitions are possible. + StateTerminated + + // StateInvalid indicates that an unrecoverable error has occurred. + // The VM transitions to this state when: + // - [Controller.StartVM] fails after the underlying HCS VM has already started, or + // - [Controller.TerminateVM] fails during uvm.Close (from either [StateCreated] or [StateRunning]). + // A VM in this state can only be cleaned up by calling [Controller.TerminateVM]. + StateInvalid +) + +// String returns a human-readable string representation of the VM State. +func (s State) String() string { + switch s { + case StateNotCreated: + return "NotCreated" + case StateCreated: + return "Created" + case StateRunning: + return "Running" + case StateTerminated: + return "Terminated" + case StateInvalid: + return "Invalid" + default: + return "Unknown" + } +} diff --git a/internal/controller/vm/status.go b/internal/controller/vm/status.go deleted file mode 100644 index f1569ba1af..0000000000 --- a/internal/controller/vm/status.go +++ /dev/null @@ -1,79 +0,0 @@ -//go:build windows - -package vm - -import ( - "fmt" - "sync/atomic" -) - -// State represents the current state of the VM lifecycle. -// The VM progresses through states in the following order: -// StateNotCreated -> StateCreated -> StateRunning -> StateStopped -type State int32 - -const ( - // StateNotCreated indicates the VM has not been created yet. - // This is the initial state when a Controller is first instantiated. - // Valid transitions: StateNotCreated -> StateCreated (via CreateVM) - StateNotCreated State = iota - - // StateCreated indicates the VM has been created but not started. - // Valid transitions: StateCreated -> StateRunning (via StartVM) - StateCreated - - // StateRunning indicates the VM has been started and is running. - // The guest OS is running and the Guest Compute Service (GCS) connection - // is established. - // Valid transitions: StateRunning -> StateStopped (when VM exits or is terminated) - StateRunning - - // StateStopped indicates the VM has exited or been terminated. - // This is a terminal state - once stopped, the VM cannot be restarted. - // No further state transitions are possible. - StateStopped -) - -// String returns a human-readable string representation of the VM State. -func (s State) String() string { - switch s { - case StateNotCreated: - return "NotCreated" - case StateCreated: - return "Created" - case StateRunning: - return "Running" - case StateStopped: - return "Stopped" - default: - return "Unknown" - } -} - -// atomicState is a concurrency-safe VM state holder backed by an atomic int32. -// All reads and writes go through atomic operations, so no mutex is required -// for state itself. -type atomicState struct { - v atomic.Int32 -} - -// load returns the current State with an atomic read. -func (a *atomicState) load() State { - return State(a.v.Load()) -} - -// store unconditionally sets the state with an atomic write. -func (a *atomicState) store(s State) { - a.v.Store(int32(s)) -} - -// transition atomically moves from `from` to `to` using a compare-and-swap. -// It returns an error if the current state is not `from`, leaving the state -// unchanged. This prevents two concurrent callers from both believing they -// performed the same transition. -func (a *atomicState) transition(from, to State) error { - if !a.v.CompareAndSwap(int32(from), int32(to)) { - return fmt.Errorf("unexpected VM state: want %s, got %s", from, a.load()) - } - return nil -} diff --git a/internal/controller/vm/vm.go b/internal/controller/vm/vm.go index 4aef86e390..02e6850b76 100644 --- a/internal/controller/vm/vm.go +++ b/internal/controller/vm/vm.go @@ -21,6 +21,7 @@ import ( "github.com/Microsoft/hcsshim/internal/vm/vmmanager" "github.com/Microsoft/hcsshim/internal/vm/vmutils" iwin "github.com/Microsoft/hcsshim/internal/windows" + "github.com/containerd/errdefs" "github.com/Microsoft/go-winio/pkg/process" "github.com/sirupsen/logrus" @@ -36,7 +37,8 @@ type Manager struct { guest *guestmanager.Guest // vmState tracks the current state of the VM lifecycle. - vmState atomicState + // Access must be guarded by mu. + vmState State // mu guards the concurrent access to the Manager's fields and operations. mu sync.Mutex @@ -57,23 +59,13 @@ type Manager struct { // Ensure both the Controller, and it's subset Handle are implemented by Manager. var _ Controller = (*Manager)(nil) -var _ Handle = (*Manager)(nil) // NewController creates a new Manager instance in the [StateNotCreated] state. func NewController() *Manager { - m := &Manager{ + return &Manager{ logOutputDone: make(chan struct{}), + vmState: StateNotCreated, } - // Default of vmState would always be 0 and hence StateNotCreated, - // but setting it here explicitly for clarity. - m.vmState.store(StateNotCreated) - return m -} - -// Host returns the vm manager instance for this VM. -// It can be used to interact with and modify the UVM host state. -func (c *Manager) Host() *vmmanager.UtilityVM { - return c.uvm } // Guest returns the guest manager instance for this VM. @@ -84,7 +76,10 @@ func (c *Manager) Guest() *guestmanager.Guest { // State returns the current VM state. func (c *Manager) State() State { - return c.vmState.load() + c.mu.Lock() + defer c.mu.Unlock() + + return c.vmState } // CreateVM creates the VM using the HCS document and initializes device state. @@ -94,11 +89,9 @@ func (c *Manager) CreateVM(ctx context.Context, opts *CreateOptions) error { c.mu.Lock() defer c.mu.Unlock() - if c.vmState.load() == StateCreated { - return nil - } - if c.vmState.load() != StateNotCreated { - return fmt.Errorf("cannot create VM: VM is already in state %s", c.vmState.load()) + // In case of duplicate CreateVM call for the same controller, we want to fail. + if c.vmState != StateNotCreated { + return fmt.Errorf("cannot create VM: VM is in incorrect state %s", c.vmState) } // Create the VM via vmmanager. @@ -106,6 +99,8 @@ func (c *Manager) CreateVM(ctx context.Context, opts *CreateOptions) error { if err != nil { return fmt.Errorf("failed to create VM: %w", err) } + + // Set the Manager parameters after successful creation. c.vmID = opts.ID c.uvm = uvm // Determine if the VM is physically backed based on the HCS document configuration. @@ -116,42 +111,43 @@ func (c *Manager) CreateVM(ctx context.Context, opts *CreateOptions) error { // We will create the guest connection via GuestManager during StartVM. c.guest = guestmanager.New(ctx, uvm) - c.vmState.store(StateCreated) + c.vmState = StateCreated return nil } // StartVM starts the VM that was previously created via CreateVM. // It starts the underlying HCS VM, establishes the GCS connection, // and transitions the VM to [StateRunning]. -// On any failure the VM is transitioned to [StateStopped]. +// On any failure the VM is transitioned to [StateInvalid]. func (c *Manager) StartVM(ctx context.Context, opts *StartOptions) (err error) { ctx, _ = log.WithContext(ctx, logrus.WithField(logfields.Operation, "StartVM")) - if c.uvm == nil || c.guest == nil { - return errors.New("VM has not been created") - } - c.mu.Lock() defer c.mu.Unlock() - if c.vmState.load() == StateRunning { + // If the VM is already running, we can skip the start operation and just return. + // This makes StartVM idempotent in the case of duplicate calls. + if c.vmState == StateRunning { return nil } - if c.vmState.load() != StateCreated { - return fmt.Errorf("cannot start VM: VM is already in state %s", c.vmState.load()) + // However, if the VM is in any other state than Created, + // we should fail as StartVM is only valid on a created VM. + if c.vmState != StateCreated { + return fmt.Errorf("cannot start VM: VM is in incorrect state %s", c.vmState) } defer func() { if err != nil { - // If there was an error starting the VM, transition to Stopped. - c.vmState.store(StateStopped) + // If starting the VM fails, we transition to Invalid state to prevent any further operations on the VM. + // The VM can be terminated by invoking TerminateVM. + c.vmState = StateInvalid } }() - // save parent context, without timeout to use in terminate + // save parent context, without timeout to use for wait. pCtx := ctx // For remaining operations, we expect them to complete within the GCS connection timeout, - // otherwise we want to fail and cleanup. + // otherwise we want to fail. ctx, cancel := context.WithTimeout(pCtx, timeout.GCSConnectionTimeout) log.G(ctx).Debugf("using gcs connection timeout: %s\n", timeout.GCSConnectionTimeout) @@ -170,22 +166,12 @@ func (c *Manager) StartVM(ctx context.Context, opts *StartOptions) (err error) { err = c.uvm.Start(ctx) if err != nil { - // use parent context, to prevent 2 minute timout (set above) from overridding terminate operation's - // timeout and erroring out prematurely - _ = c.uvm.Terminate(pCtx) return fmt.Errorf("failed to start VM: %w", err) } // Start waiting on the utility VM in the background. // This goroutine will complete when the VM exits. - go func() { - // the original context may have timeout or propagate a cancellation - // copy the original to prevent it affecting the background wait go routine - cCtx := context.WithoutCancel(pCtx) - _ = c.uvm.Wait(cCtx) - // Once the VM has exited, atomically record the stopped state. - c.vmState.store(StateStopped) - }() + go c.waitForVMExit(pCtx) // Collect any errors from writing entropy or establishing the log // connection. @@ -210,34 +196,48 @@ func (c *Manager) StartVM(ctx context.Context, opts *StartOptions) (err error) { } } - c.vmState.store(StateRunning) + // If all goes well, we can transition the VM to Running state. + c.vmState = StateRunning return nil } -func (c *Manager) AddGuestDrivers(ctx context.Context, drivers []string) error { - ctx, _ = log.WithContext(ctx, logrus.WithField(logfields.Operation, "AddGuestDrivers")) - - if c.uvm == nil { - return errors.New("VM has not been created") +// waitForVMExit blocks until the VM exits and then transitions the VM state to [StateTerminated]. +// This is called in StartVM in a background goroutine. +func (c *Manager) waitForVMExit(ctx context.Context) { + // The original context may have timeout or propagate a cancellation + // copy the original to prevent it affecting the background wait go routine + ctx = context.WithoutCancel(ctx) + _ = c.uvm.Wait(ctx) + // Once the VM has exited, attempt to transition to Terminated. + // This may be a no-op if TerminateVM already ran concurrently and + // transitioned the state first — log the discarded error so that + // concurrent-termination races remain observable. + c.mu.Lock() + if c.vmState != StateTerminated { + c.vmState = StateTerminated + } else { + log.G(ctx).WithField("currentState", c.vmState).Debug("waitForVMExit: state transition to Terminated was a no-op") } + c.mu.Unlock() +} - c.mu.Lock() - defer c.mu.Unlock() +// ExecIntoHost executes a command in the running UVM. +func (c *Manager) ExecIntoHost(ctx context.Context, request *shimdiag.ExecProcessRequest) (int, error) { + ctx, _ = log.WithContext(ctx, logrus.WithField(logfields.Operation, "ExecIntoHost")) - if c.vmState.load() != StateRunning { - return fmt.Errorf("cannot add guest drivers: VM is in state %s", c.vmState.load()) + if request.Terminal && request.Stderr != "" { + return -1, fmt.Errorf("if using terminal, stderr must be empty: %w", errdefs.ErrFailedPrecondition) } - for _, driver := range drivers { - _ = driver + // Validate that the VM is running before allowing exec into it. + c.mu.Lock() + if c.vmState != StateRunning { + c.mu.Unlock() + return -1, fmt.Errorf("cannot exec into VM: VM is in incorrect state %s", c.vmState) } + c.mu.Unlock() - return nil -} - -// ExecIntoHost executes a command in the running UVM. -func (c *Manager) ExecIntoHost(ctx context.Context, request *shimdiag.ExecProcessRequest) (int, error) { // Keep a count of active exec sessions. // This will be used to disallow LM with existing exec sessions, // as that can lead to orphaned processes within UVM. @@ -255,18 +255,41 @@ func (c *Manager) ExecIntoHost(ctx context.Context, request *shimdiag.ExecProces return c.guest.ExecIntoUVM(ctx, cmdReq) } +// DumpStacks dumps the GCS stacks associated with the VM +func (c *Manager) DumpStacks(ctx context.Context) (string, error) { + ctx, _ = log.WithContext(ctx, logrus.WithField(logfields.Operation, "DumpStacks")) + + // Validate that the VM is running before sending dump stacks request to GCS. + c.mu.Lock() + if c.vmState != StateRunning { + c.mu.Unlock() + return "", fmt.Errorf("cannot dump stacks: VM is in incorrect state %s", c.vmState) + } + c.mu.Unlock() + + if c.guest.Capabilities().IsDumpStacksSupported() { + return c.guest.DumpStacks(ctx) + } + + return "", nil +} + // Wait blocks until the VM exits and all log output processing has completed. func (c *Manager) Wait(ctx context.Context) error { ctx, _ = log.WithContext(ctx, logrus.WithField(logfields.Operation, "Wait")) - if c.uvm == nil { - return errors.New("VM has not been created") + // Validate that the VM has been created and can be waited on. + // Terminated VMs can also be waited on where we return immediately. + c.mu.Lock() + if c.vmState == StateNotCreated { + c.mu.Unlock() + return fmt.Errorf("cannot wait on VM: VM is in incorrect state %s", c.vmState) } + c.mu.Unlock() - var err error // Wait for the utility VM to exit. // This will be unblocked when the VM exits or if the context is cancelled. - err = c.uvm.Wait(ctx) + err := c.uvm.Wait(ctx) // Wait for the log output processing to complete, // which ensures all logs are processed before we return. @@ -285,32 +308,20 @@ func (c *Manager) Wait(ctx context.Context) error { func (c *Manager) Stats(ctx context.Context) (*stats.VirtualMachineStatistics, error) { ctx, _ = log.WithContext(ctx, logrus.WithField(logfields.Operation, "Stats")) - if c.uvm == nil { - return nil, errors.New("VM has not been created") - } + c.mu.Lock() + defer c.mu.Unlock() - if c.vmState.load() != StateRunning { - return nil, fmt.Errorf("cannot get stats: VM is in state %s", c.vmState.load()) + if c.vmState != StateRunning { + return nil, fmt.Errorf("cannot get stats: VM is in incorrect state %s", c.vmState) } - // Initialization of vmmemProcess with double-checked locking - // to prevent concurrent lookups. + // Initialization of vmmemProcess to calculate stats properly for VA-backed UVMs. if c.vmmemProcess == 0 { - // At this point in workflow, we are in Running state and - // therefore, c.mu is expected to be uncontended and used only - // in Terminate workflow. - c.mu.Lock() - // Check again after acquiring lock in case another goroutine - // already initialized it - if c.vmmemProcess == 0 { - vmmemHandle, err := vmutils.LookupVMMEM(ctx, c.uvm.RuntimeID(), &iwin.WinAPI{}) - if err != nil { - c.mu.Unlock() - return nil, fmt.Errorf("cannot get stats: %w", err) - } - c.vmmemProcess = vmmemHandle + vmmemHandle, err := vmutils.LookupVMMEM(ctx, c.uvm.RuntimeID(), &iwin.WinAPI{}) + if err != nil { + return nil, fmt.Errorf("cannot get stats: %w", err) } - c.mu.Unlock() + c.vmmemProcess = vmmemHandle } s := &stats.VirtualMachineStatistics{} @@ -358,23 +369,19 @@ func (c *Manager) Stats(ctx context.Context) (*stats.VirtualMachineStatistics, e // and releases HCS resources. // // The context is used for all operations, including waits, so timeouts/cancellations may prevent -// proper uVM cleanup. +// proper UVM cleanup. func (c *Manager) TerminateVM(ctx context.Context) (err error) { ctx, _ = log.WithContext(ctx, logrus.WithField(logfields.Operation, "TerminateVM")) - if c.uvm == nil { - return errors.New("VM has not been created") - } - c.mu.Lock() defer c.mu.Unlock() - if c.vmState.load() == StateStopped { + // If the VM has already terminated, we can skip termination and just return. + // Alternatively, if the VM was never created, we can also skip termination. + // This makes the TerminateVM operation idempotent. + if c.vmState == StateTerminated || c.vmState == StateNotCreated { return nil } - if c.vmState.load() != StateRunning { - return fmt.Errorf("cannot terminate VM: VM is in state %s", c.vmState.load()) - } // Best effort attempt to clean up the open vmmem handle. _ = windows.Close(c.vmmemProcess) @@ -387,43 +394,42 @@ func (c *Manager) TerminateVM(ctx context.Context) (err error) { err = c.uvm.Close(ctx) if err != nil { + // Transition to Invalid so no further active operations can be performed on the VM. + c.vmState = StateInvalid return fmt.Errorf("failed to close utility VM: %w", err) } - // We set the Stopped status at the end and therefore, if any error is encountered during the termination - // or the context was canceled, the VM will not be marked as Stopped. - // In such a case, caller can retry the termination. - c.vmState.store(StateStopped) + // Set the Terminated status at the end. + c.vmState = StateTerminated return nil } // StartTime returns the timestamp when the VM was started. -// Returns zero value of time.time, if the VM is not in StateRunning or StateStopped. +// Returns zero value of time.Time if the VM has not yet reached +// [StateRunning] or [StateTerminated]. func (c *Manager) StartTime() (startTime time.Time) { - if c.uvm == nil { - return startTime - } + c.mu.Lock() + defer c.mu.Unlock() - if c.vmState.load() == StateNotCreated || c.vmState.load() == StateCreated { - return startTime + if c.vmState == StateRunning || c.vmState == StateTerminated { + return c.uvm.StartedTime() } - return c.uvm.StartedTime() + return startTime } -// StoppedStatus returns the final status of the VM once it has reached -// [StateStopped], including the time it stopped and any exit error. +// ExitStatus returns the final status of the VM once it has reached +// [StateTerminated], including the time it stopped and any exit error. // Returns an error if the VM has not yet stopped. -func (c *Manager) StoppedStatus() (*StoppedStatus, error) { - if c.uvm == nil { - return nil, errors.New("VM has not been created") - } +func (c *Manager) ExitStatus() (*ExitStatus, error) { + c.mu.Lock() + defer c.mu.Unlock() - if c.vmState.load() != StateStopped { - return nil, fmt.Errorf("cannot get stopped status: VM is in state %s", c.vmState.load()) + if c.vmState != StateTerminated { + return nil, fmt.Errorf("cannot get exit status: VM is in incorrect state %s", c.vmState) } - return &StoppedStatus{ + return &ExitStatus{ StoppedTime: c.uvm.StoppedTime(), Err: c.uvm.ExitError(), }, nil diff --git a/internal/logfields/fields.go b/internal/logfields/fields.go index cceb3e2d18..3e3e230293 100644 --- a/internal/logfields/fields.go +++ b/internal/logfields/fields.go @@ -8,12 +8,12 @@ const ( Operation = "operation" ID = "id" - SandboxID = "sid" ContainerID = "cid" ExecID = "eid" ProcessID = "pid" TaskID = "tid" UVMID = "uvm-id" + SandboxID = "sandbox-id" // networking and IO @@ -50,6 +50,41 @@ const ( Uint32 = "uint32" Uint64 = "uint64" + // task / process lifecycle + + Bundle = "bundle" + Terminal = "terminal" + Stdin = "stdin" + Stdout = "stdout" + Stderr = "stderr" + Checkpoint = "checkpoint" + ParentCheckpoint = "parent-checkpoint" + Status = "status" + ExitStatus = "exit-status" + ExitedAt = "exited-at" + Signal = "signal" + All = "all" + Width = "width" + Height = "height" + Version = "version" + ShimPid = "shim-pid" + TaskPid = "task-pid" + ExecSpanID = "exec-id" + + // sandbox + + NetNsPath = "net-ns-path" + Verbose = "verbose" + + // shimdiag + + Args = "args" + Workdir = "workdir" + HostPath = "host-path" + UVMPath = "uvm-path" + ReadOnly = "readonly" + Execs = "execs" + // runhcs VMShimOperation = "vmshim-op" diff --git a/internal/vm/vmutils/doc.go b/internal/vm/vmutils/doc.go index e78e4a5809..31ffb541ca 100644 --- a/internal/vm/vmutils/doc.go +++ b/internal/vm/vmutils/doc.go @@ -7,6 +7,6 @@ // (internal/controller). Functions in this package are designed to be decoupled from // specific UVM implementations. // -// This allows different shims (containerd-shim-runhcs-v1, containerd-shim-lcow-v1) +// This allows different shims (containerd-shim-runhcs-v1, containerd-shim-lcow-v2) // to share common logic while maintaining their own orchestration patterns. package vmutils diff --git a/internal/vm/vmutils/utils.go b/internal/vm/vmutils/utils.go index cd710a6bc3..f609f975ff 100644 --- a/internal/vm/vmutils/utils.go +++ b/internal/vm/vmutils/utils.go @@ -9,7 +9,12 @@ import ( "os" "path/filepath" + runhcsoptions "github.com/Microsoft/hcsshim/cmd/containerd-shim-runhcs-v1/options" "github.com/Microsoft/hcsshim/internal/log" + + "github.com/containerd/typeurl/v2" + "github.com/sirupsen/logrus" + "google.golang.org/protobuf/types/known/anypb" ) // ParseUVMReferenceInfo reads the UVM reference info file, and base64 encodes the content if it exists. @@ -30,3 +35,29 @@ func ParseUVMReferenceInfo(ctx context.Context, referenceRoot, referenceName str return base64.StdEncoding.EncodeToString(content), nil } + +// UnmarshalRuntimeOptions decodes the runtime options into runhcsoptions.Options. +// When no options are provided (options == nil) it returns a non-nil, +// zero-value Options struct. +func UnmarshalRuntimeOptions(ctx context.Context, options *anypb.Any) (*runhcsoptions.Options, error) { + opts := &runhcsoptions.Options{} + if options == nil { + return opts, nil + } + + v, err := typeurl.UnmarshalAny(options) + if err != nil { + return nil, fmt.Errorf("failed to unmarshal options: %w", err) + } + + shimOpts, ok := v.(*runhcsoptions.Options) + if !ok { + return nil, fmt.Errorf("failed to unmarshal runtime options: expected *runhcsoptions.Options, got %T", v) + } + + if entry := log.G(ctx); entry.Logger.IsLevelEnabled(logrus.DebugLevel) { + entry.WithField("options", log.Format(ctx, shimOpts)).Debug("parsed runtime options") + } + + return shimOpts, nil +} From 49b2b6646d26c0f12fd03622ee6a68a245f7c3d7 Mon Sep 17 00:00:00 2001 From: Harsh Rawat Date: Tue, 17 Mar 2026 02:39:36 +0530 Subject: [PATCH 3/6] address review comments: 2 Signed-off-by: Harsh Rawat --- cmd/containerd-shim-lcow-v2/main.go | 10 +-- .../service/service_sandbox_internal.go | 2 +- .../service/service_shimdiag_internal.go | 12 +--- .../service/service_task.go | 16 ++--- .../service/service_task_internal.go | 68 +++++-------------- internal/builder/vm/lcow/specs.go | 6 +- internal/builder/vm/lcow/specs_test.go | 6 +- internal/controller/vm/vm.go | 32 ++++----- internal/logfields/fields.go | 1 - internal/vm/vmutils/constants.go | 3 + 10 files changed, 56 insertions(+), 100 deletions(-) diff --git a/cmd/containerd-shim-lcow-v2/main.go b/cmd/containerd-shim-lcow-v2/main.go index 44a3b093ad..7bb297810f 100644 --- a/cmd/containerd-shim-lcow-v2/main.go +++ b/cmd/containerd-shim-lcow-v2/main.go @@ -15,17 +15,13 @@ import ( "github.com/Microsoft/hcsshim/internal/log" "github.com/Microsoft/hcsshim/internal/oc" "github.com/Microsoft/hcsshim/internal/shim" + "github.com/Microsoft/hcsshim/internal/vm/vmutils" "github.com/containerd/errdefs" "github.com/sirupsen/logrus" "go.opencensus.io/trace" ) -const ( - // name is the name of lcow shim implementation. - name = "containerd-shim-lcow-v2" -) - // Add a manifest to get proper Windows version detection. //go:generate go tool github.com/josephspurrier/goversioninfo/cmd/goversioninfo -platform-specific @@ -42,13 +38,13 @@ func main() { // Set the log configuration. // If we encounter an error, we exit with non-zero code. if err := setLogConfiguration(); err != nil { - _, _ = fmt.Fprintf(os.Stderr, "%s: %s", name, err) + _, _ = fmt.Fprintf(os.Stderr, "%s: %s", vmutils.LCOWShimName, err) os.Exit(1) } // Start the shim manager event loop. The manager is responsible for // handling containerd start/stop lifecycle calls for the shim process. - shim.Run(context.Background(), newShimManager(name), func(c *shim.Config) { + shim.Run(context.Background(), newShimManager(vmutils.LCOWShimName), func(c *shim.Config) { // We don't want the shim package to set up logging options. c.NoSetupLogger = true }) diff --git a/cmd/containerd-shim-lcow-v2/service/service_sandbox_internal.go b/cmd/containerd-shim-lcow-v2/service/service_sandbox_internal.go index 8742cada62..364c5807d9 100644 --- a/cmd/containerd-shim-lcow-v2/service/service_sandbox_internal.go +++ b/cmd/containerd-shim-lcow-v2/service/service_sandbox_internal.go @@ -71,7 +71,7 @@ func (s *Service) createSandboxInternal(ctx context.Context, request *sandbox.Cr return nil, fmt.Errorf("failed to create sandbox: sandbox already exists with ID %s", s.sandboxID) } - hcsDocument, sandboxOptions, err := lcow.BuildSandboxConfig(ctx, request.BundlePath, shimOpts, &sandboxSpec) + hcsDocument, sandboxOptions, err := lcow.BuildSandboxConfig(ctx, vmutils.LCOWShimName, request.BundlePath, shimOpts, &sandboxSpec) if err != nil { return nil, fmt.Errorf("failed to parse sandbox spec: %w", err) } diff --git a/cmd/containerd-shim-lcow-v2/service/service_shimdiag_internal.go b/cmd/containerd-shim-lcow-v2/service/service_shimdiag_internal.go index 3a6dc34b67..a835ade320 100644 --- a/cmd/containerd-shim-lcow-v2/service/service_shimdiag_internal.go +++ b/cmd/containerd-shim-lcow-v2/service/service_shimdiag_internal.go @@ -22,20 +22,14 @@ func (s *Service) diagExecInHostInternal(ctx context.Context, request *shimdiag. return &shimdiag.ExecProcessResponse{ExitCode: int32(ec)}, nil } -func (s *Service) diagTasksInternal(ctx context.Context, request *shimdiag.TasksRequest) (*shimdiag.TasksResponse, error) { - _ = ctx - _ = request +func (s *Service) diagTasksInternal(_ context.Context, _ *shimdiag.TasksRequest) (*shimdiag.TasksResponse, error) { return nil, errdefs.ErrNotImplemented } -func (s *Service) diagShareInternal(ctx context.Context, request *shimdiag.ShareRequest) (*shimdiag.ShareResponse, error) { - _ = ctx - _ = request +func (s *Service) diagShareInternal(_ context.Context, _ *shimdiag.ShareRequest) (*shimdiag.ShareResponse, error) { return nil, errdefs.ErrNotImplemented } -func (s *Service) diagStacksInternal(ctx context.Context, request *shimdiag.StacksRequest) (*shimdiag.StacksResponse, error) { - _ = ctx - _ = request +func (s *Service) diagStacksInternal(_ context.Context, _ *shimdiag.StacksRequest) (*shimdiag.StacksResponse, error) { return nil, errdefs.ErrNotImplemented } diff --git a/cmd/containerd-shim-lcow-v2/service/service_task.go b/cmd/containerd-shim-lcow-v2/service/service_task.go index e41eeb330b..f7f7dda5af 100644 --- a/cmd/containerd-shim-lcow-v2/service/service_task.go +++ b/cmd/containerd-shim-lcow-v2/service/service_task.go @@ -35,7 +35,7 @@ func (s *Service) State(ctx context.Context, request *task.StateRequest) (resp * span.AddAttributes( trace.StringAttribute(logfields.SandboxID, s.sandboxID), trace.StringAttribute(logfields.ID, request.ID), - trace.StringAttribute(logfields.ExecSpanID, request.ExecID)) + trace.StringAttribute(logfields.ExecID, request.ExecID)) r, e := s.stateInternal(ctx, request) return r, errgrpc.ToGRPC(e) @@ -83,7 +83,7 @@ func (s *Service) Start(ctx context.Context, request *task.StartRequest) (resp * span.AddAttributes( trace.StringAttribute(logfields.SandboxID, s.sandboxID), trace.StringAttribute(logfields.ID, request.ID), - trace.StringAttribute(logfields.ExecSpanID, request.ExecID)) + trace.StringAttribute(logfields.ExecID, request.ExecID)) r, e := s.startInternal(ctx, request) return r, errgrpc.ToGRPC(e) @@ -107,7 +107,7 @@ func (s *Service) Delete(ctx context.Context, request *task.DeleteRequest) (resp span.AddAttributes( trace.StringAttribute(logfields.SandboxID, s.sandboxID), trace.StringAttribute(logfields.ID, request.ID), - trace.StringAttribute(logfields.ExecSpanID, request.ExecID)) + trace.StringAttribute(logfields.ExecID, request.ExecID)) r, e := s.deleteInternal(ctx, request) return r, errgrpc.ToGRPC(e) @@ -184,7 +184,7 @@ func (s *Service) Kill(ctx context.Context, request *task.KillRequest) (resp *em span.AddAttributes( trace.StringAttribute(logfields.SandboxID, s.sandboxID), trace.StringAttribute(logfields.ID, request.ID), - trace.StringAttribute(logfields.ExecSpanID, request.ExecID), + trace.StringAttribute(logfields.ExecID, request.ExecID), trace.Int64Attribute(logfields.Signal, int64(request.Signal)), trace.BoolAttribute(logfields.All, request.All)) @@ -202,7 +202,7 @@ func (s *Service) Exec(ctx context.Context, request *task.ExecProcessRequest) (r span.AddAttributes( trace.StringAttribute(logfields.SandboxID, s.sandboxID), trace.StringAttribute(logfields.ID, request.ID), - trace.StringAttribute(logfields.ExecSpanID, request.ExecID), + trace.StringAttribute(logfields.ExecID, request.ExecID), trace.BoolAttribute(logfields.Terminal, request.Terminal), trace.StringAttribute(logfields.Stdin, request.Stdin), trace.StringAttribute(logfields.Stdout, request.Stdout), @@ -222,7 +222,7 @@ func (s *Service) ResizePty(ctx context.Context, request *task.ResizePtyRequest) span.AddAttributes( trace.StringAttribute(logfields.SandboxID, s.sandboxID), trace.StringAttribute(logfields.ID, request.ID), - trace.StringAttribute(logfields.ExecSpanID, request.ExecID), + trace.StringAttribute(logfields.ExecID, request.ExecID), trace.Int64Attribute(logfields.Width, int64(request.Width)), trace.Int64Attribute(logfields.Height, int64(request.Height))) @@ -240,7 +240,7 @@ func (s *Service) CloseIO(ctx context.Context, request *task.CloseIORequest) (re span.AddAttributes( trace.StringAttribute(logfields.SandboxID, s.sandboxID), trace.StringAttribute(logfields.ID, request.ID), - trace.StringAttribute(logfields.ExecSpanID, request.ExecID), + trace.StringAttribute(logfields.ExecID, request.ExecID), trace.BoolAttribute(logfields.Stdin, request.Stdin)) r, e := s.closeIOInternal(ctx, request) @@ -279,7 +279,7 @@ func (s *Service) Wait(ctx context.Context, request *task.WaitRequest) (resp *ta span.AddAttributes( trace.StringAttribute(logfields.SandboxID, s.sandboxID), trace.StringAttribute(logfields.ID, request.ID), - trace.StringAttribute(logfields.ExecSpanID, request.ExecID)) + trace.StringAttribute(logfields.ExecID, request.ExecID)) r, e := s.waitInternal(ctx, request) return r, errgrpc.ToGRPC(e) diff --git a/cmd/containerd-shim-lcow-v2/service/service_task_internal.go b/cmd/containerd-shim-lcow-v2/service/service_task_internal.go index b68ddff470..254199873b 100644 --- a/cmd/containerd-shim-lcow-v2/service/service_task_internal.go +++ b/cmd/containerd-shim-lcow-v2/service/service_task_internal.go @@ -10,104 +10,70 @@ import ( "google.golang.org/protobuf/types/known/emptypb" ) -func (s *Service) stateInternal(ctx context.Context, request *task.StateRequest) (*task.StateResponse, error) { - _ = ctx - _ = request +func (s *Service) stateInternal(_ context.Context, _ *task.StateRequest) (*task.StateResponse, error) { return nil, errdefs.ErrNotImplemented } -func (s *Service) createInternal(ctx context.Context, request *task.CreateTaskRequest) (*task.CreateTaskResponse, error) { - _ = ctx - _ = request +func (s *Service) createInternal(_ context.Context, _ *task.CreateTaskRequest) (*task.CreateTaskResponse, error) { return nil, errdefs.ErrNotImplemented } -func (s *Service) startInternal(ctx context.Context, request *task.StartRequest) (*task.StartResponse, error) { - _ = ctx - _ = request +func (s *Service) startInternal(_ context.Context, _ *task.StartRequest) (*task.StartResponse, error) { return nil, errdefs.ErrNotImplemented } -func (s *Service) deleteInternal(ctx context.Context, request *task.DeleteRequest) (*task.DeleteResponse, error) { - _ = ctx - _ = request +func (s *Service) deleteInternal(_ context.Context, _ *task.DeleteRequest) (*task.DeleteResponse, error) { return nil, errdefs.ErrNotImplemented } -func (s *Service) pidsInternal(ctx context.Context, request *task.PidsRequest) (*task.PidsResponse, error) { - _ = ctx - _ = request +func (s *Service) pidsInternal(_ context.Context, _ *task.PidsRequest) (*task.PidsResponse, error) { return nil, errdefs.ErrNotImplemented } -func (s *Service) pauseInternal(ctx context.Context, request *task.PauseRequest) (*emptypb.Empty, error) { - _ = ctx - _ = request +func (s *Service) pauseInternal(_ context.Context, _ *task.PauseRequest) (*emptypb.Empty, error) { return nil, errdefs.ErrNotImplemented } -func (s *Service) resumeInternal(ctx context.Context, request *task.ResumeRequest) (*emptypb.Empty, error) { - _ = ctx - _ = request +func (s *Service) resumeInternal(_ context.Context, _ *task.ResumeRequest) (*emptypb.Empty, error) { return nil, errdefs.ErrNotImplemented } -func (s *Service) checkpointInternal(ctx context.Context, request *task.CheckpointTaskRequest) (*emptypb.Empty, error) { - _ = ctx - _ = request +func (s *Service) checkpointInternal(_ context.Context, _ *task.CheckpointTaskRequest) (*emptypb.Empty, error) { return nil, errdefs.ErrNotImplemented } -func (s *Service) killInternal(ctx context.Context, request *task.KillRequest) (*emptypb.Empty, error) { - _ = ctx - _ = request +func (s *Service) killInternal(_ context.Context, _ *task.KillRequest) (*emptypb.Empty, error) { return nil, errdefs.ErrNotImplemented } -func (s *Service) execInternal(ctx context.Context, request *task.ExecProcessRequest) (*emptypb.Empty, error) { - _ = ctx - _ = request +func (s *Service) execInternal(_ context.Context, _ *task.ExecProcessRequest) (*emptypb.Empty, error) { return nil, errdefs.ErrNotImplemented } -func (s *Service) resizePtyInternal(ctx context.Context, request *task.ResizePtyRequest) (*emptypb.Empty, error) { - _ = ctx - _ = request +func (s *Service) resizePtyInternal(_ context.Context, _ *task.ResizePtyRequest) (*emptypb.Empty, error) { return nil, errdefs.ErrNotImplemented } -func (s *Service) closeIOInternal(ctx context.Context, request *task.CloseIORequest) (*emptypb.Empty, error) { - _ = ctx - _ = request +func (s *Service) closeIOInternal(_ context.Context, _ *task.CloseIORequest) (*emptypb.Empty, error) { return nil, errdefs.ErrNotImplemented } -func (s *Service) updateInternal(ctx context.Context, request *task.UpdateTaskRequest) (*emptypb.Empty, error) { - _ = ctx - _ = request +func (s *Service) updateInternal(_ context.Context, _ *task.UpdateTaskRequest) (*emptypb.Empty, error) { return nil, errdefs.ErrNotImplemented } -func (s *Service) waitInternal(ctx context.Context, request *task.WaitRequest) (*task.WaitResponse, error) { - _ = ctx - _ = request +func (s *Service) waitInternal(_ context.Context, _ *task.WaitRequest) (*task.WaitResponse, error) { return nil, errdefs.ErrNotImplemented } -func (s *Service) statsInternal(ctx context.Context, request *task.StatsRequest) (*task.StatsResponse, error) { - _ = ctx - _ = request +func (s *Service) statsInternal(_ context.Context, _ *task.StatsRequest) (*task.StatsResponse, error) { return nil, errdefs.ErrNotImplemented } -func (s *Service) connectInternal(ctx context.Context, request *task.ConnectRequest) (*task.ConnectResponse, error) { - _ = ctx - _ = request +func (s *Service) connectInternal(_ context.Context, _ *task.ConnectRequest) (*task.ConnectResponse, error) { return nil, errdefs.ErrNotImplemented } -func (s *Service) shutdownInternal(ctx context.Context, request *task.ShutdownRequest) (*emptypb.Empty, error) { - _ = ctx - _ = request +func (s *Service) shutdownInternal(_ context.Context, _ *task.ShutdownRequest) (*emptypb.Empty, error) { return nil, errdefs.ErrNotImplemented } diff --git a/internal/builder/vm/lcow/specs.go b/internal/builder/vm/lcow/specs.go index 0b28d151c6..ff67ab6a74 100644 --- a/internal/builder/vm/lcow/specs.go +++ b/internal/builder/vm/lcow/specs.go @@ -24,13 +24,11 @@ import ( "github.com/sirupsen/logrus" ) -// shimName is the name of lcow shim implementation. -const shimName = "containerd-shim-lcow-v2" - // BuildSandboxConfig is the primary entry point for generating the HCS ComputeSystem // document used to create an LCOW Utility VM. func BuildSandboxConfig( ctx context.Context, + owner string, bundlePath string, opts *runhcsoptions.Options, spec *vm.Spec, @@ -242,7 +240,7 @@ func BuildSandboxConfig( // Build the document. doc := &hcsschema.ComputeSystem{ - Owner: shimName, + Owner: owner, SchemaVersion: schema, // Terminate the UVM when the last handle is closed. // To support impactless updates this will need to be configurable. diff --git a/internal/builder/vm/lcow/specs_test.go b/internal/builder/vm/lcow/specs_test.go index d232892d4c..54a917b030 100644 --- a/internal/builder/vm/lcow/specs_test.go +++ b/internal/builder/vm/lcow/specs_test.go @@ -50,7 +50,7 @@ func runTestCases(t *testing.T, ctx context.Context, defaultOpts *runhcsoptions. // Use a temp dir as bundlePath for confidential VM tests bundlePath := t.TempDir() - doc, sandboxOpts, err := BuildSandboxConfig(ctx, bundlePath, opts, spec) + doc, sandboxOpts, err := BuildSandboxConfig(ctx, "test-owner", bundlePath, opts, spec) if tt.wantErr { if err == nil { @@ -1990,7 +1990,7 @@ func TestBuildSandboxConfig_NUMA_OldWindows(t *testing.T) { validBootFilesPath := newBootFilesPath(t) - doc, _, err := BuildSandboxConfig(ctx, t.TempDir(), &runhcsoptions.Options{ + doc, _, err := BuildSandboxConfig(ctx, "test-owner", t.TempDir(), &runhcsoptions.Options{ SandboxPlatform: "linux/amd64", BootFilesRootPath: validBootFilesPath, }, &vm.Spec{ @@ -2018,7 +2018,7 @@ func TestBuildSandboxConfig_CPUClamping(t *testing.T) { hostCount := hostProcessorCount(t) requestedCount := hostCount * 2 - doc, _, err := BuildSandboxConfig(ctx, t.TempDir(), &runhcsoptions.Options{ + doc, _, err := BuildSandboxConfig(ctx, "test-owner", t.TempDir(), &runhcsoptions.Options{ SandboxPlatform: "linux/amd64", BootFilesRootPath: validBootFilesPath, }, &vm.Spec{ diff --git a/internal/controller/vm/vm.go b/internal/controller/vm/vm.go index 02e6850b76..a63b6c2d89 100644 --- a/internal/controller/vm/vm.go +++ b/internal/controller/vm/vm.go @@ -41,7 +41,7 @@ type Manager struct { vmState State // mu guards the concurrent access to the Manager's fields and operations. - mu sync.Mutex + mu sync.RWMutex // logOutputDone is closed when the GCS log output processing goroutine completes. logOutputDone chan struct{} @@ -76,8 +76,8 @@ func (c *Manager) Guest() *guestmanager.Guest { // State returns the current VM state. func (c *Manager) State() State { - c.mu.Lock() - defer c.mu.Unlock() + c.mu.RLock() + defer c.mu.RUnlock() return c.vmState } @@ -231,12 +231,12 @@ func (c *Manager) ExecIntoHost(ctx context.Context, request *shimdiag.ExecProces } // Validate that the VM is running before allowing exec into it. - c.mu.Lock() + c.mu.RLock() if c.vmState != StateRunning { - c.mu.Unlock() + c.mu.RUnlock() return -1, fmt.Errorf("cannot exec into VM: VM is in incorrect state %s", c.vmState) } - c.mu.Unlock() + c.mu.RUnlock() // Keep a count of active exec sessions. // This will be used to disallow LM with existing exec sessions, @@ -259,13 +259,13 @@ func (c *Manager) ExecIntoHost(ctx context.Context, request *shimdiag.ExecProces func (c *Manager) DumpStacks(ctx context.Context) (string, error) { ctx, _ = log.WithContext(ctx, logrus.WithField(logfields.Operation, "DumpStacks")) - // Validate that the VM is running before sending dump stacks request to GCS. c.mu.Lock() + defer c.mu.Unlock() + + // Validate that the VM is running before sending dump stacks request to GCS. if c.vmState != StateRunning { - c.mu.Unlock() return "", fmt.Errorf("cannot dump stacks: VM is in incorrect state %s", c.vmState) } - c.mu.Unlock() if c.guest.Capabilities().IsDumpStacksSupported() { return c.guest.DumpStacks(ctx) @@ -280,12 +280,12 @@ func (c *Manager) Wait(ctx context.Context) error { // Validate that the VM has been created and can be waited on. // Terminated VMs can also be waited on where we return immediately. - c.mu.Lock() + c.mu.RLock() if c.vmState == StateNotCreated { - c.mu.Unlock() + c.mu.RUnlock() return fmt.Errorf("cannot wait on VM: VM is in incorrect state %s", c.vmState) } - c.mu.Unlock() + c.mu.RUnlock() // Wait for the utility VM to exit. // This will be unblocked when the VM exits or if the context is cancelled. @@ -408,8 +408,8 @@ func (c *Manager) TerminateVM(ctx context.Context) (err error) { // Returns zero value of time.Time if the VM has not yet reached // [StateRunning] or [StateTerminated]. func (c *Manager) StartTime() (startTime time.Time) { - c.mu.Lock() - defer c.mu.Unlock() + c.mu.RLock() + defer c.mu.RUnlock() if c.vmState == StateRunning || c.vmState == StateTerminated { return c.uvm.StartedTime() @@ -422,8 +422,8 @@ func (c *Manager) StartTime() (startTime time.Time) { // [StateTerminated], including the time it stopped and any exit error. // Returns an error if the VM has not yet stopped. func (c *Manager) ExitStatus() (*ExitStatus, error) { - c.mu.Lock() - defer c.mu.Unlock() + c.mu.RLock() + defer c.mu.RUnlock() if c.vmState != StateTerminated { return nil, fmt.Errorf("cannot get exit status: VM is in incorrect state %s", c.vmState) diff --git a/internal/logfields/fields.go b/internal/logfields/fields.go index 3e3e230293..dac5a708e5 100644 --- a/internal/logfields/fields.go +++ b/internal/logfields/fields.go @@ -69,7 +69,6 @@ const ( Version = "version" ShimPid = "shim-pid" TaskPid = "task-pid" - ExecSpanID = "exec-id" // sandbox diff --git a/internal/vm/vmutils/constants.go b/internal/vm/vmutils/constants.go index a332fb6a99..6276a17595 100644 --- a/internal/vm/vmutils/constants.go +++ b/internal/vm/vmutils/constants.go @@ -10,6 +10,9 @@ import ( ) const ( + // LCOWShimName is the name of the LCOW shim implementation. + LCOWShimName = "containerd-shim-lcow-v2" + // MaxVPMEMCount is the maximum number of VPMem devices that may be added to an LCOW // utility VM. MaxVPMEMCount = 128 From fb7c159148dcfb7f0f0c0a8d3e06b512765397f7 Mon Sep 17 00:00:00 2001 From: Harsh Rawat Date: Tue, 17 Mar 2026 15:58:14 +0530 Subject: [PATCH 4/6] address review comments: 3 Signed-off-by: Harsh Rawat --- cmd/containerd-shim-lcow-v2/manager.go | 3 +-- internal/controller/vm/vm.go | 14 ++++++++++---- internal/controller/vm/vm_wcow.go | 12 ++++++++---- internal/uvm/start.go | 5 ++--- 4 files changed, 21 insertions(+), 13 deletions(-) diff --git a/cmd/containerd-shim-lcow-v2/manager.go b/cmd/containerd-shim-lcow-v2/manager.go index 8fd37a96f3..e0a997dcc5 100644 --- a/cmd/containerd-shim-lcow-v2/manager.go +++ b/cmd/containerd-shim-lcow-v2/manager.go @@ -7,7 +7,6 @@ import ( "errors" "fmt" "io" - "log" "os" "os/exec" "path/filepath" @@ -148,7 +147,7 @@ func (m *shimManager) Start(ctx context.Context, id string, opts shim.StartOpts) // Create the named event handle, err := windows.CreateEvent(nil, 0, 0, eventName) if err != nil { - log.Fatalf("Failed to create event: %v", err) + return params, fmt.Errorf("failed to create event: %w", err) } defer func() { _ = windows.CloseHandle(handle) diff --git a/internal/controller/vm/vm.go b/internal/controller/vm/vm.go index a63b6c2d89..e4c8c42736 100644 --- a/internal/controller/vm/vm.go +++ b/internal/controller/vm/vm.go @@ -259,8 +259,11 @@ func (c *Manager) ExecIntoHost(ctx context.Context, request *shimdiag.ExecProces func (c *Manager) DumpStacks(ctx context.Context) (string, error) { ctx, _ = log.WithContext(ctx, logrus.WithField(logfields.Operation, "DumpStacks")) - c.mu.Lock() - defer c.mu.Unlock() + // Take read lock at this place. + // The state change cannot happen until we release the lock, + // so we are sure that the state remains consistent throughout the method. + c.mu.RLock() + defer c.mu.RUnlock() // Validate that the VM is running before sending dump stacks request to GCS. if c.vmState != StateRunning { @@ -308,8 +311,11 @@ func (c *Manager) Wait(ctx context.Context) error { func (c *Manager) Stats(ctx context.Context) (*stats.VirtualMachineStatistics, error) { ctx, _ = log.WithContext(ctx, logrus.WithField(logfields.Operation, "Stats")) - c.mu.Lock() - defer c.mu.Unlock() + // Take read lock at this place. + // The state change cannot happen until we release the lock, + // so we are sure that the state remains consistent throughout the method. + c.mu.RLock() + defer c.mu.RUnlock() if c.vmState != StateRunning { return nil, fmt.Errorf("cannot get stats: VM is in incorrect state %s", c.vmState) diff --git a/internal/controller/vm/vm_wcow.go b/internal/controller/vm/vm_wcow.go index 62afd94f6a..de6053be8e 100644 --- a/internal/controller/vm/vm_wcow.go +++ b/internal/controller/vm/vm_wcow.go @@ -48,7 +48,13 @@ func (c *Manager) setupLoggingListener(ctx context.Context, _ *errgroup.Group) { ServiceID: prot.WindowsLoggingHvsockServiceID, }) if err != nil { - logrus.WithError(err).Fatal("failed to listen for windows logging connections") + // Close the output done channel to signal that logging setup + // has failed and no logs will be processed. + close(c.logOutputDone) + logrus.WithError(err).Error("failed to listen for windows logging connections") + + // Return early due to error. + return } // Use a WaitGroup to track active log processing goroutines. @@ -84,9 +90,7 @@ func (c *Manager) setupLoggingListener(ctx context.Context, _ *errgroup.Group) { wg.Wait() // Signal that log output processing has completed. - if _, ok := <-c.logOutputDone; ok { - close(c.logOutputDone) - } + close(c.logOutputDone) }() } diff --git a/internal/uvm/start.go b/internal/uvm/start.go index c6ba805304..2c18db56b9 100644 --- a/internal/uvm/start.go +++ b/internal/uvm/start.go @@ -129,9 +129,8 @@ func (uvm *UtilityVM) Start(ctx context.Context) (err error) { e.Info("uvm output handler finished") } wg.Wait() - if _, ok := <-uvm.outputProcessingDone; ok { - close(uvm.outputProcessingDone) - } + // Signal that log output processing has completed. + close(uvm.outputProcessingDone) }() default: // Default handling From 75c9b3110539bd9c79be2452705aefb9a9b1c955 Mon Sep 17 00:00:00 2001 From: Harsh Rawat Date: Tue, 17 Mar 2026 22:35:57 +0530 Subject: [PATCH 5/6] address review comments: 4 Signed-off-by: Harsh Rawat --- cmd/containerd-shim-lcow-v2/main.go | 6 +++--- .../service/service.go | 5 +++++ .../service/service_sandbox.go | 20 +++++++++---------- .../service/service_sandbox_internal.go | 20 +++++++++---------- internal/controller/vm/vm_lcow.go | 2 ++ internal/uvm/start.go | 5 +++-- internal/vm/vmutils/constants.go | 3 --- 7 files changed, 33 insertions(+), 28 deletions(-) diff --git a/cmd/containerd-shim-lcow-v2/main.go b/cmd/containerd-shim-lcow-v2/main.go index 7bb297810f..9951144a8a 100644 --- a/cmd/containerd-shim-lcow-v2/main.go +++ b/cmd/containerd-shim-lcow-v2/main.go @@ -10,12 +10,12 @@ import ( "io" "os" + "github.com/Microsoft/hcsshim/cmd/containerd-shim-lcow-v2/service" _ "github.com/Microsoft/hcsshim/cmd/containerd-shim-lcow-v2/service/plugin" runhcsopts "github.com/Microsoft/hcsshim/cmd/containerd-shim-runhcs-v1/options" "github.com/Microsoft/hcsshim/internal/log" "github.com/Microsoft/hcsshim/internal/oc" "github.com/Microsoft/hcsshim/internal/shim" - "github.com/Microsoft/hcsshim/internal/vm/vmutils" "github.com/containerd/errdefs" "github.com/sirupsen/logrus" @@ -38,13 +38,13 @@ func main() { // Set the log configuration. // If we encounter an error, we exit with non-zero code. if err := setLogConfiguration(); err != nil { - _, _ = fmt.Fprintf(os.Stderr, "%s: %s", vmutils.LCOWShimName, err) + _, _ = fmt.Fprintf(os.Stderr, "%s: %s", service.ShimName, err) os.Exit(1) } // Start the shim manager event loop. The manager is responsible for // handling containerd start/stop lifecycle calls for the shim process. - shim.Run(context.Background(), newShimManager(vmutils.LCOWShimName), func(c *shim.Config) { + shim.Run(context.Background(), newShimManager(service.ShimName), func(c *shim.Config) { // We don't want the shim package to set up logging options. c.NoSetupLogger = true }) diff --git a/cmd/containerd-shim-lcow-v2/service/service.go b/cmd/containerd-shim-lcow-v2/service/service.go index bd9b33cd76..3c2968c53f 100644 --- a/cmd/containerd-shim-lcow-v2/service/service.go +++ b/cmd/containerd-shim-lcow-v2/service/service.go @@ -20,6 +20,11 @@ import ( "github.com/containerd/ttrpc" ) +const ( + // ShimName is the name of the LCOW shim implementation. + ShimName = "containerd-shim-lcow-v2" +) + // Service is the shared Service struct that implements all TTRPC Service interfaces. // All Service methods (sandbox, task, and shimdiag) operate on this shared struct. type Service struct { diff --git a/cmd/containerd-shim-lcow-v2/service/service_sandbox.go b/cmd/containerd-shim-lcow-v2/service/service_sandbox.go index a82aa6b467..3e2a3ea437 100644 --- a/cmd/containerd-shim-lcow-v2/service/service_sandbox.go +++ b/cmd/containerd-shim-lcow-v2/service/service_sandbox.go @@ -10,7 +10,7 @@ import ( "github.com/Microsoft/hcsshim/internal/oc" "github.com/containerd/containerd/api/runtime/sandbox/v1" - errdefs2 "github.com/containerd/errdefs/pkg/errgrpc" + "github.com/containerd/errdefs/pkg/errgrpc" "github.com/sirupsen/logrus" "go.opencensus.io/trace" ) @@ -37,7 +37,7 @@ func (s *Service) CreateSandbox(ctx context.Context, request *sandbox.CreateSand ctx, _ = log.WithContext(ctx, logrus.WithField(logfields.SandboxID, request.SandboxID)) r, e := s.createSandboxInternal(ctx, request) - return r, errdefs2.ToGRPC(e) + return r, errgrpc.ToGRPC(e) } // StartSandbox transitions a previously created sandbox to the "running" state. @@ -53,7 +53,7 @@ func (s *Service) StartSandbox(ctx context.Context, request *sandbox.StartSandbo ctx, _ = log.WithContext(ctx, logrus.WithField(logfields.SandboxID, request.SandboxID)) r, e := s.startSandboxInternal(ctx, request) - return r, errdefs2.ToGRPC(e) + return r, errgrpc.ToGRPC(e) } // Platform returns the platform details for the sandbox ("windows/amd64" or "linux/amd64"). @@ -66,7 +66,7 @@ func (s *Service) Platform(ctx context.Context, request *sandbox.PlatformRequest span.AddAttributes(trace.StringAttribute(logfields.SandboxID, request.SandboxID)) r, e := s.platformInternal(ctx, request) - return r, errdefs2.ToGRPC(e) + return r, errgrpc.ToGRPC(e) } // StopSandbox attempts a graceful stop of the sandbox within the specified timeout. @@ -83,7 +83,7 @@ func (s *Service) StopSandbox(ctx context.Context, request *sandbox.StopSandboxR ctx, _ = log.WithContext(ctx, logrus.WithField(logfields.SandboxID, request.SandboxID)) r, e := s.stopSandboxInternal(ctx, request) - return r, errdefs2.ToGRPC(e) + return r, errgrpc.ToGRPC(e) } // WaitSandbox blocks until the sandbox reaches a terminal state (stopped/errored) and returns the outcome. @@ -99,7 +99,7 @@ func (s *Service) WaitSandbox(ctx context.Context, request *sandbox.WaitSandboxR ctx, _ = log.WithContext(ctx, logrus.WithField(logfields.SandboxID, request.SandboxID)) r, e := s.waitSandboxInternal(ctx, request) - return r, errdefs2.ToGRPC(e) + return r, errgrpc.ToGRPC(e) } // SandboxStatus returns current status for the sandbox, optionally verbose. @@ -116,7 +116,7 @@ func (s *Service) SandboxStatus(ctx context.Context, request *sandbox.SandboxSta ctx, _ = log.WithContext(ctx, logrus.WithField(logfields.SandboxID, request.SandboxID)) r, e := s.sandboxStatusInternal(ctx, request) - return r, errdefs2.ToGRPC(e) + return r, errgrpc.ToGRPC(e) } // PingSandbox performs a minimal liveness check on the sandbox and returns quickly. @@ -132,7 +132,7 @@ func (s *Service) PingSandbox(ctx context.Context, request *sandbox.PingRequest) ctx, _ = log.WithContext(ctx, logrus.WithField(logfields.SandboxID, request.SandboxID)) r, e := s.pingSandboxInternal(ctx, request) - return r, errdefs2.ToGRPC(e) + return r, errgrpc.ToGRPC(e) } // ShutdownSandbox requests a full shim + sandbox shutdown (stronger than StopSandbox), @@ -149,7 +149,7 @@ func (s *Service) ShutdownSandbox(ctx context.Context, request *sandbox.Shutdown ctx, _ = log.WithContext(ctx, logrus.WithField(logfields.SandboxID, request.SandboxID)) r, e := s.shutdownSandboxInternal(ctx, request) - return r, errdefs2.ToGRPC(e) + return r, errgrpc.ToGRPC(e) } // SandboxMetrics returns runtime metrics for the sandbox (e.g., CPU/memory/IO), @@ -166,5 +166,5 @@ func (s *Service) SandboxMetrics(ctx context.Context, request *sandbox.SandboxMe ctx, _ = log.WithContext(ctx, logrus.WithField(logfields.SandboxID, request.SandboxID)) r, e := s.sandboxMetricsInternal(ctx, request) - return r, errdefs2.ToGRPC(e) + return r, errgrpc.ToGRPC(e) } diff --git a/cmd/containerd-shim-lcow-v2/service/service_sandbox_internal.go b/cmd/containerd-shim-lcow-v2/service/service_sandbox_internal.go index 364c5807d9..f8cd3dfd97 100644 --- a/cmd/containerd-shim-lcow-v2/service/service_sandbox_internal.go +++ b/cmd/containerd-shim-lcow-v2/service/service_sandbox_internal.go @@ -68,10 +68,10 @@ func (s *Service) createSandboxInternal(ctx context.Context, request *sandbox.Cr defer s.mu.Unlock() if s.sandboxID != "" { - return nil, fmt.Errorf("failed to create sandbox: sandbox already exists with ID %s", s.sandboxID) + return nil, fmt.Errorf("sandbox already exists with ID %s", s.sandboxID) } - hcsDocument, sandboxOptions, err := lcow.BuildSandboxConfig(ctx, vmutils.LCOWShimName, request.BundlePath, shimOpts, &sandboxSpec) + hcsDocument, sandboxOptions, err := lcow.BuildSandboxConfig(ctx, ShimName, request.BundlePath, shimOpts, &sandboxSpec) if err != nil { return nil, fmt.Errorf("failed to parse sandbox spec: %w", err) } @@ -102,7 +102,7 @@ func (s *Service) createSandboxInternal(ctx context.Context, request *sandbox.Cr // applied to the VM after starting. func (s *Service) startSandboxInternal(ctx context.Context, request *sandbox.StartSandboxRequest) (*sandbox.StartSandboxResponse, error) { if s.sandboxID != request.SandboxID { - return nil, fmt.Errorf("failed to start sandbox: sandbox ID mismatch, expected %s, got %s", s.sandboxID, request.SandboxID) + return nil, fmt.Errorf("sandbox ID mismatch, expected %s, got %s", s.sandboxID, request.SandboxID) } // If we successfully got past the above check, it means the sandbox was created and @@ -144,11 +144,11 @@ func (s *Service) startSandboxInternal(ctx context.Context, request *sandbox.Sta // An error is returned if the sandbox is not currently in the created state. func (s *Service) platformInternal(_ context.Context, request *sandbox.PlatformRequest) (*sandbox.PlatformResponse, error) { if s.sandboxID != request.SandboxID { - return nil, fmt.Errorf("failed to get platform: sandbox ID mismatch, expected %s, got %s", s.sandboxID, request.SandboxID) + return nil, fmt.Errorf("sandbox ID mismatch, expected %s, got %s", s.sandboxID, request.SandboxID) } if s.vmController.State() == vm.StateNotCreated { - return nil, fmt.Errorf("failed to get platform: sandbox has not been created (state: %s)", s.vmController.State()) + return nil, fmt.Errorf("sandbox has not been created (state: %s)", s.vmController.State()) } return &sandbox.PlatformResponse{ @@ -164,7 +164,7 @@ func (s *Service) platformInternal(_ context.Context, request *sandbox.PlatformR // It terminates the VM and performs any cleanup, if needed. func (s *Service) stopSandboxInternal(ctx context.Context, request *sandbox.StopSandboxRequest) (*sandbox.StopSandboxResponse, error) { if s.sandboxID != request.SandboxID { - return nil, fmt.Errorf("failed to stop sandbox: sandbox ID mismatch, expected %s, got %s", s.sandboxID, request.SandboxID) + return nil, fmt.Errorf("sandbox ID mismatch, expected %s, got %s", s.sandboxID, request.SandboxID) } err := s.vmController.TerminateVM(ctx) @@ -181,7 +181,7 @@ func (s *Service) stopSandboxInternal(ctx context.Context, request *sandbox.Stop // to a sandbox exit code. func (s *Service) waitSandboxInternal(ctx context.Context, request *sandbox.WaitSandboxRequest) (*sandbox.WaitSandboxResponse, error) { if s.sandboxID != request.SandboxID { - return nil, fmt.Errorf("failed to wait for sandbox: sandbox ID mismatch, expected %s, got %s", s.sandboxID, request.SandboxID) + return nil, fmt.Errorf("sandbox ID mismatch, expected %s, got %s", s.sandboxID, request.SandboxID) } // Wait for the VM to be terminated, then return the exit code. @@ -215,7 +215,7 @@ func (s *Service) waitSandboxInternal(ctx context.Context, request *sandbox.Wait // diagnostic information. func (s *Service) sandboxStatusInternal(_ context.Context, request *sandbox.SandboxStatusRequest) (*sandbox.SandboxStatusResponse, error) { if s.sandboxID != request.SandboxID { - return nil, fmt.Errorf("failed to get sandbox status: sandbox ID mismatch, expected %s, got %s", s.sandboxID, request.SandboxID) + return nil, fmt.Errorf("sandbox ID mismatch, expected %s, got %s", s.sandboxID, request.SandboxID) } resp := &sandbox.SandboxStatusResponse{ @@ -264,7 +264,7 @@ func (s *Service) pingSandboxInternal(_ context.Context, _ *sandbox.PingRequest) // The sandbox must already be in the stopped state before shutdown is accepted. func (s *Service) shutdownSandboxInternal(ctx context.Context, request *sandbox.ShutdownSandboxRequest) (*sandbox.ShutdownSandboxResponse, error) { if s.sandboxID != request.SandboxID { - return &sandbox.ShutdownSandboxResponse{}, fmt.Errorf("failed to shutdown sandbox: sandbox ID mismatch, expected %s, got %s", s.sandboxID, request.SandboxID) + return &sandbox.ShutdownSandboxResponse{}, fmt.Errorf("sandbox ID mismatch, expected %s, got %s", s.sandboxID, request.SandboxID) } // Ensure the VM is terminated. If the VM is already terminated, @@ -298,7 +298,7 @@ func (s *Service) shutdownSandboxInternal(ctx context.Context, request *sandbox. // It collects and returns runtime statistics from the vmController. func (s *Service) sandboxMetricsInternal(ctx context.Context, request *sandbox.SandboxMetricsRequest) (*sandbox.SandboxMetricsResponse, error) { if s.sandboxID != request.SandboxID { - return &sandbox.SandboxMetricsResponse{}, fmt.Errorf("failed to get sandbox metrics: sandbox ID mismatch, expected %s, got %s", s.sandboxID, request.SandboxID) + return &sandbox.SandboxMetricsResponse{}, fmt.Errorf("sandbox ID mismatch, expected %s, got %s", s.sandboxID, request.SandboxID) } stats, err := s.vmController.Stats(ctx) diff --git a/internal/controller/vm/vm_lcow.go b/internal/controller/vm/vm_lcow.go index 269871a316..e8c5b51194 100644 --- a/internal/controller/vm/vm_lcow.go +++ b/internal/controller/vm/vm_lcow.go @@ -66,12 +66,14 @@ func (c *Manager) setupLoggingListener(ctx context.Context, group *errgroup.Grou ServiceID: winio.VsockServiceID(vmutils.LinuxLogVsockPort), }) if err != nil { + close(c.logOutputDone) return fmt.Errorf("failed to listen on hvSocket for logs: %w", err) } // Accept the connection from the GCS. conn, err := vmmanager.AcceptConnection(ctx, c.uvm, logConn, true) if err != nil { + close(c.logOutputDone) return fmt.Errorf("failed to accept connection on hvSocket for logs: %w", err) } diff --git a/internal/uvm/start.go b/internal/uvm/start.go index 2c18db56b9..c6ba805304 100644 --- a/internal/uvm/start.go +++ b/internal/uvm/start.go @@ -129,8 +129,9 @@ func (uvm *UtilityVM) Start(ctx context.Context) (err error) { e.Info("uvm output handler finished") } wg.Wait() - // Signal that log output processing has completed. - close(uvm.outputProcessingDone) + if _, ok := <-uvm.outputProcessingDone; ok { + close(uvm.outputProcessingDone) + } }() default: // Default handling diff --git a/internal/vm/vmutils/constants.go b/internal/vm/vmutils/constants.go index 6276a17595..a332fb6a99 100644 --- a/internal/vm/vmutils/constants.go +++ b/internal/vm/vmutils/constants.go @@ -10,9 +10,6 @@ import ( ) const ( - // LCOWShimName is the name of the LCOW shim implementation. - LCOWShimName = "containerd-shim-lcow-v2" - // MaxVPMEMCount is the maximum number of VPMem devices that may be added to an LCOW // utility VM. MaxVPMEMCount = 128 From 1ac30a53e74930af4968dbe6745b35595982cd3f Mon Sep 17 00:00:00 2001 From: Harsh Rawat Date: Thu, 19 Mar 2026 21:03:52 +0530 Subject: [PATCH 6/6] address review comments: 5 Signed-off-by: Harsh Rawat --- cmd/containerd-shim-lcow-v2/service/plugin/plugin.go | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/cmd/containerd-shim-lcow-v2/service/plugin/plugin.go b/cmd/containerd-shim-lcow-v2/service/plugin/plugin.go index 560b8de316..dd7c1124ae 100644 --- a/cmd/containerd-shim-lcow-v2/service/plugin/plugin.go +++ b/cmd/containerd-shim-lcow-v2/service/plugin/plugin.go @@ -23,7 +23,7 @@ import ( const ( // etwProviderName is the ETW provider name for lcow shim. - etwProviderName = "Microsoft.Virtualization.RunHCSLCOW" + etwProviderName = "Microsoft.Virtualization.containerd-shim-lcow-v2" ) // svc holds the single Service instance created during plugin initialization.