From 94e13cd5d4d619d2924422fbb58cca5afbaafcaa Mon Sep 17 00:00:00 2001 From: Emanuel Oprea <2664342+EmanuelOprea@users.noreply.github.com> Date: Thu, 2 Jul 2026 13:39:07 +0300 Subject: [PATCH] cmd/containerd-shim-runhcs-v1: serialize confidential container bring-up Starting the containers of a multi-container container group in a single confidential WCOW UVM concurrently reliably crashes the guest: the guest resets and the GCS bridge drops ("bridge closed: use of closed network connection"), and all containers end up Exited. Root cause is concurrent container bring-up into one confidential UVM. createContainer runs on its own goroutine per container, so two containers' hcsoci.CreateContainer calls overlap. CreateContainer performs the container bring-up: block-CIM mount, scratch SCSI attach, CombineLayers/hive-merge and the guest container create. The host-side device hot-adds (uvm.modify -> hcsSystem.Modify) go straight to the VM worker and do not travel over the GCS bridge, so nothing serializes them; overlapping mount/device operations into the confidential guest put it into a bad state and it resets. Serialize the bring-up with a per-UVM lock held across CreateContainer, taken only for confidential UVMs (HasConfidentialPolicy). This makes the container starts effectively one-at-a-time into a given confidential UVM. createContainer is shared with LCOW, hence the generic name and the confidential-only guard. Validated on a confidential WCOW UVM (VBS): concurrent multi-container groups that previously crashed every time (2x nanoserver, 2x mount-host, 1x mount-host + 2x nanoserver) now come up cleanly and repeatably; removing the lock reproduces the crash on the same image. Signed-off-by: Emanuel Oprea <2664342+EmanuelOprea@users.noreply.github.com> --- cmd/containerd-shim-runhcs-v1/task_hcs.go | 12 ++++++++++++ internal/uvm/combine_layers.go | 16 ++++++++++++++++ internal/uvm/types.go | 9 +++++++++ 3 files changed, 37 insertions(+) diff --git a/cmd/containerd-shim-runhcs-v1/task_hcs.go b/cmd/containerd-shim-runhcs-v1/task_hcs.go index 0e00292e70..87b922b494 100644 --- a/cmd/containerd-shim-runhcs-v1/task_hcs.go +++ b/cmd/containerd-shim-runhcs-v1/task_hcs.go @@ -171,6 +171,18 @@ func createContainer( if shimOpts != nil { opts.ScaleCPULimitsToSandbox = shimOpts.ScaleCpuLimitsToSandbox } + // Serialize the per-container bring-up (layer mount + hive-merge + guest + // container create) into a single confidential WCOW UVM. Concurrent + // container starts in a multi-container CG otherwise issue overlapping + // guest mount/create operations that the confidential guest cannot + // handle, triggering a guest reset that drops the GCS bridge + // ("bridge closed: use of closed network connection"). Serializing makes + // the starts effectively one-at-a-time, which is reliable. Only taken for + // confidential containers; createContainer is shared with LCOW. + if parent != nil && parent.HasConfidentialPolicy() { + parent.LockContainerCreate() + defer parent.UnlockContainerCreate() + } container, resources, err = hcsoci.CreateContainer(ctx, opts) if err != nil { return nil, nil, err diff --git a/internal/uvm/combine_layers.go b/internal/uvm/combine_layers.go index 760f20dbc1..bca882d8f7 100644 --- a/internal/uvm/combine_layers.go +++ b/internal/uvm/combine_layers.go @@ -10,6 +10,22 @@ import ( "github.com/Microsoft/hcsshim/internal/protocol/guestresource" ) +// LockContainerCreate acquires the per-UVM lock that serializes a container's +// bring-up (block-CIM mount + scratch SCSI attach + CombineLayers/hive-merge + +// guest container create). It is intended for confidential containers: concurrent +// container starts otherwise issue overlapping guest mount/create operations into +// the single confidential UVM, which the guest cannot handle and responds to with +// a guest reset that drops the GCS bridge. +// Callers must pair this with UnlockContainerCreate (typically via defer). +func (uvm *UtilityVM) LockContainerCreate() { + uvm.containerCreateLock.Lock() +} + +// UnlockContainerCreate releases the lock acquired by LockContainerCreate. +func (uvm *UtilityVM) UnlockContainerCreate() { + uvm.containerCreateLock.Unlock() +} + // CombineLayersWCOW combines `layerPaths` with `containerRootPath` into the // container file system. // diff --git a/internal/uvm/types.go b/internal/uvm/types.go index 45755b8643..d7938ed3bd 100644 --- a/internal/uvm/types.go +++ b/internal/uvm/types.go @@ -144,6 +144,15 @@ type UtilityVM struct { blockCIMMounts map[string]*UVMMountedBlockCIMs blockCIMMountLock sync.Mutex + // containerCreateLock serializes the per-container bring-up (block-CIM + // mount + scratch SCSI attach + CombineLayers/hive-merge + guest container + // create) into a single UVM. It is only taken for confidential containers + // (see createContainer): concurrent container starts in a confidential WCOW + // UVM otherwise issue overlapping guest mount/create operations that the + // guest cannot handle, causing a guest reset that drops the GCS bridge + // ("bridge closed: use of closed network connection"). + containerCreateLock sync.Mutex + logForwardingEnabled bool // Indicates whether to forward logs from the UVM to the host defaultLogSourcesEnabled bool // Specifies whether addition of default list of ETW providers should be disabled logSources string // ETW providers to enable for log forwarding