diff --git a/cmd/containerd-shim-runhcs-v1/task_hcs.go b/cmd/containerd-shim-runhcs-v1/task_hcs.go index 0e00292e70..87b922b494 100644 --- a/cmd/containerd-shim-runhcs-v1/task_hcs.go +++ b/cmd/containerd-shim-runhcs-v1/task_hcs.go @@ -171,6 +171,18 @@ func createContainer( if shimOpts != nil { opts.ScaleCPULimitsToSandbox = shimOpts.ScaleCpuLimitsToSandbox } + // Serialize the per-container bring-up (layer mount + hive-merge + guest + // container create) into a single confidential WCOW UVM. Concurrent + // container starts in a multi-container CG otherwise issue overlapping + // guest mount/create operations that the confidential guest cannot + // handle, triggering a guest reset that drops the GCS bridge + // ("bridge closed: use of closed network connection"). Serializing makes + // the starts effectively one-at-a-time, which is reliable. Only taken for + // confidential containers; createContainer is shared with LCOW. + if parent != nil && parent.HasConfidentialPolicy() { + parent.LockContainerCreate() + defer parent.UnlockContainerCreate() + } container, resources, err = hcsoci.CreateContainer(ctx, opts) if err != nil { return nil, nil, err diff --git a/internal/uvm/combine_layers.go b/internal/uvm/combine_layers.go index 760f20dbc1..bca882d8f7 100644 --- a/internal/uvm/combine_layers.go +++ b/internal/uvm/combine_layers.go @@ -10,6 +10,22 @@ import ( "github.com/Microsoft/hcsshim/internal/protocol/guestresource" ) +// LockContainerCreate acquires the per-UVM lock that serializes a container's +// bring-up (block-CIM mount + scratch SCSI attach + CombineLayers/hive-merge + +// guest container create). It is intended for confidential containers: concurrent +// container starts otherwise issue overlapping guest mount/create operations into +// the single confidential UVM, which the guest cannot handle and responds to with +// a guest reset that drops the GCS bridge. +// Callers must pair this with UnlockContainerCreate (typically via defer). +func (uvm *UtilityVM) LockContainerCreate() { + uvm.containerCreateLock.Lock() +} + +// UnlockContainerCreate releases the lock acquired by LockContainerCreate. +func (uvm *UtilityVM) UnlockContainerCreate() { + uvm.containerCreateLock.Unlock() +} + // CombineLayersWCOW combines `layerPaths` with `containerRootPath` into the // container file system. // diff --git a/internal/uvm/types.go b/internal/uvm/types.go index 45755b8643..d7938ed3bd 100644 --- a/internal/uvm/types.go +++ b/internal/uvm/types.go @@ -144,6 +144,15 @@ type UtilityVM struct { blockCIMMounts map[string]*UVMMountedBlockCIMs blockCIMMountLock sync.Mutex + // containerCreateLock serializes the per-container bring-up (block-CIM + // mount + scratch SCSI attach + CombineLayers/hive-merge + guest container + // create) into a single UVM. It is only taken for confidential containers + // (see createContainer): concurrent container starts in a confidential WCOW + // UVM otherwise issue overlapping guest mount/create operations that the + // guest cannot handle, causing a guest reset that drops the GCS bridge + // ("bridge closed: use of closed network connection"). + containerCreateLock sync.Mutex + logForwardingEnabled bool // Indicates whether to forward logs from the UVM to the host defaultLogSourcesEnabled bool // Specifies whether addition of default list of ETW providers should be disabled logSources string // ETW providers to enable for log forwarding