From 04d2930d6eab004ac2fef9051fe28eb62dccaae3 Mon Sep 17 00:00:00 2001 From: Daniil Antoshin Date: Tue, 14 Apr 2026 15:56:51 +0200 Subject: [PATCH 1/6] fix(module): add idempotency for usb cleanup in virtualization-dra Signed-off-by: Daniil Antoshin --- .../internal/usb-gateway/attach_record.go | 25 ++++++++++++++++++- .../internal/usb-gateway/usbgateway.go | 8 +++++- .../virtualization-dra/internal/usb/store.go | 18 +++++++++---- 3 files changed, 44 insertions(+), 7 deletions(-) diff --git a/images/virtualization-dra/internal/usb-gateway/attach_record.go b/images/virtualization-dra/internal/usb-gateway/attach_record.go index fd7ec836d0..8371483497 100644 --- a/images/virtualization-dra/internal/usb-gateway/attach_record.go +++ b/images/virtualization-dra/internal/usb-gateway/attach_record.go @@ -163,8 +163,31 @@ func (r *attachRecordManager) AddEntry(e AttachEntry) error { newEntries := slices.Clone(r.record.Entries) newEntries = append(newEntries, e) - record := attachRecord{Entries: newEntries} + return r.storeLocked(attachRecord{Entries: newEntries}) +} + +func (r *attachRecordManager) RemoveEntryByDeviceName(deviceName string) error { + r.mu.Lock() + defer r.mu.Unlock() + + newEntries := make([]AttachEntry, 0, len(r.record.Entries)) + removed := false + for _, entry := range r.record.Entries { + if entry.DeviceName == deviceName { + removed = true + continue + } + newEntries = append(newEntries, entry) + } + + if !removed { + return nil + } + + return r.storeLocked(attachRecord{Entries: newEntries}) +} +func (r *attachRecordManager) storeLocked(record attachRecord) error { b, err := json.Marshal(record) if err != nil { return err diff --git a/images/virtualization-dra/internal/usb-gateway/usbgateway.go b/images/virtualization-dra/internal/usb-gateway/usbgateway.go index 2bb329f5ed..984b17faf7 100644 --- a/images/virtualization-dra/internal/usb-gateway/usbgateway.go +++ b/images/virtualization-dra/internal/usb-gateway/usbgateway.go @@ -101,7 +101,9 @@ func (c *USBGatewayController) Detach(deviceName string) error { } entry := c.findEntry(deviceName) - if entry != nil { + if entry == nil { + log.Info("Device is already detached") + } else { log.Info("Detaching USB device") err = c.usbIP.Detach(entry.Rhport) if err != nil { @@ -115,6 +117,10 @@ func (c *USBGatewayController) Detach(deviceName string) error { return fmt.Errorf("failed to unexport device %s: %w", deviceName, err) } + if err := c.attachRecordManager.RemoveEntryByDeviceName(deviceName); err != nil { + return fmt.Errorf("failed to remove attach record for device %s: %w", deviceName, err) + } + return nil } diff --git a/images/virtualization-dra/internal/usb/store.go b/images/virtualization-dra/internal/usb/store.go index 58eaaf6fb2..938ff3a8ca 100644 --- a/images/virtualization-dra/internal/usb/store.go +++ b/images/virtualization-dra/internal/usb/store.go @@ -436,22 +436,30 @@ func (s *AllocationStore) Unprepare(_ context.Context, claimUID types.UID) error return fmt.Errorf("unprepare called before synchronize NRI Hook") } + allocatedDevices, exists := s.resourceClaimAllocations[claimUID] + if !exists || len(allocatedDevices) == 0 { + s.log.Info("Claim is already unprepared", slog.String("claimUID", string(claimUID))) + return nil + } + usbGatewayEnabled := featuregates.Default().USBGatewayEnabled() - allocatedDevices := s.resourceClaimAllocations[claimUID] s.log.Info("Unpreparing devices", slog.Any("devices", allocatedDevices), slog.String("claimUID", string(claimUID))) for _, device := range allocatedDevices { if usbGatewayEnabled { - count := s.usbipAllocatedDevicesCount[device] + count, hasCount := s.usbipAllocatedDevicesCount[device] s.log.Info("Device attached by USBGateway", slog.String("device", device), slog.Int("count", count)) - if count <= 1 { - s.log.Info("Detaching device because has only one consumer", slog.String("device", device)) + switch { + case !hasCount || count <= 0: + s.log.Info("Device is already detached or has no consumers", slog.String("device", device)) + case count == 1: + s.log.Info("Detaching device because it has the last consumer", slog.String("device", device)) if err := s.usbGateway.Detach(device); err != nil { return fmt.Errorf("failed to detach device %s: %w", device, err) } delete(s.usbipAllocatedDevicesCount, device) - } else { + default: s.log.Info("Decrementing device consumer count", slog.String("device", device), slog.Int("newCount", count-1)) s.usbipAllocatedDevicesCount[device]-- } From 413d2b5d7157d1e365b4a558f5f112893d49af25 Mon Sep 17 00:00:00 2001 From: Daniil Antoshin Date: Tue, 14 Apr 2026 17:31:18 +0200 Subject: [PATCH 2/6] fix(module): make usbip unexport idempotent Signed-off-by: Daniil Antoshin --- images/virtualization-dra/pkg/usbip/exporter.go | 7 ++++--- 1 file changed, 4 insertions(+), 3 deletions(-) diff --git a/images/virtualization-dra/pkg/usbip/exporter.go b/images/virtualization-dra/pkg/usbip/exporter.go index 9956a0efff..f70d576cb0 100644 --- a/images/virtualization-dra/pkg/usbip/exporter.go +++ b/images/virtualization-dra/pkg/usbip/exporter.go @@ -98,11 +98,12 @@ func (e *usbExporter) Unexport(host, busID string, port int) error { return fmt.Errorf("unsupported USBIP version: %d", unExportReply.Version) } - if unExportReply.Status != protocol.OpStatusOk { + switch unExportReply.Status { + case protocol.OpStatusOk, protocol.OpStatusNoDev: + return nil + default: return fmt.Errorf("reply failed: %s", unExportReply.Status.String()) } - - return nil } func (e *usbExporter) usbipNetTCPConnect(host string, port int) (*net.TCPConn, error) { From decde0a811b5449c759837c4e8236a87ce2545d6 Mon Sep 17 00:00:00 2001 From: Daniil Antoshin Date: Tue, 14 Apr 2026 17:46:36 +0200 Subject: [PATCH 3/6] fix(core): persist usb gateway attach record cleanup Signed-off-by: Daniil Antoshin --- .../internal/usb-gateway/attach_record.go | 34 ++++--------------- .../internal/usb-gateway/usbgateway.go | 4 +-- 2 files changed, 9 insertions(+), 29 deletions(-) diff --git a/images/virtualization-dra/internal/usb-gateway/attach_record.go b/images/virtualization-dra/internal/usb-gateway/attach_record.go index 8371483497..c7fac2fe78 100644 --- a/images/virtualization-dra/internal/usb-gateway/attach_record.go +++ b/images/virtualization-dra/internal/usb-gateway/attach_record.go @@ -118,19 +118,20 @@ func (r *attachRecordManager) Refresh() error { return err } - // keep only real entries - var newEntries []AttachEntry + newEntries := make([]AttachEntry, 0, len(record.Entries)) for _, e := range record.Entries { if _, ok := ports[e.Rhport]; ok { newEntries = append(newEntries, e) } } - record.Entries = newEntries - - r.record = record + newRecord := attachRecord{Entries: newEntries} + if slices.Equal(record.Entries, newRecord.Entries) { + r.record = newRecord + return nil + } - return nil + return r.storeLocked(newRecord) } func (r *attachRecordManager) GetEntries() []AttachEntry { @@ -166,27 +167,6 @@ func (r *attachRecordManager) AddEntry(e AttachEntry) error { return r.storeLocked(attachRecord{Entries: newEntries}) } -func (r *attachRecordManager) RemoveEntryByDeviceName(deviceName string) error { - r.mu.Lock() - defer r.mu.Unlock() - - newEntries := make([]AttachEntry, 0, len(r.record.Entries)) - removed := false - for _, entry := range r.record.Entries { - if entry.DeviceName == deviceName { - removed = true - continue - } - newEntries = append(newEntries, entry) - } - - if !removed { - return nil - } - - return r.storeLocked(attachRecord{Entries: newEntries}) -} - func (r *attachRecordManager) storeLocked(record attachRecord) error { b, err := json.Marshal(record) if err != nil { diff --git a/images/virtualization-dra/internal/usb-gateway/usbgateway.go b/images/virtualization-dra/internal/usb-gateway/usbgateway.go index 984b17faf7..f85f6706f0 100644 --- a/images/virtualization-dra/internal/usb-gateway/usbgateway.go +++ b/images/virtualization-dra/internal/usb-gateway/usbgateway.go @@ -117,8 +117,8 @@ func (c *USBGatewayController) Detach(deviceName string) error { return fmt.Errorf("failed to unexport device %s: %w", deviceName, err) } - if err := c.attachRecordManager.RemoveEntryByDeviceName(deviceName); err != nil { - return fmt.Errorf("failed to remove attach record for device %s: %w", deviceName, err) + if err := c.attachRecordManager.Refresh(); err != nil { + return fmt.Errorf("failed to Refresh attach record after detach for device %s: %w", deviceName, err) } return nil From 3fe992f3c7c50e52fd17122332e2cdaa41e9560c Mon Sep 17 00:00:00 2001 From: Daniil Antoshin Date: Tue, 14 Apr 2026 17:51:38 +0200 Subject: [PATCH 4/6] fix(usb): harden usb unprepare cleanup Signed-off-by: Daniil Antoshin --- .../virtualization-dra/internal/usb/store.go | 49 +++++++++---------- 1 file changed, 23 insertions(+), 26 deletions(-) diff --git a/images/virtualization-dra/internal/usb/store.go b/images/virtualization-dra/internal/usb/store.go index 938ff3a8ca..bf33e4d395 100644 --- a/images/virtualization-dra/internal/usb/store.go +++ b/images/virtualization-dra/internal/usb/store.go @@ -438,35 +438,32 @@ func (s *AllocationStore) Unprepare(_ context.Context, claimUID types.UID) error allocatedDevices, exists := s.resourceClaimAllocations[claimUID] if !exists || len(allocatedDevices) == 0 { - s.log.Info("Claim is already unprepared", slog.String("claimUID", string(claimUID))) - return nil - } - - usbGatewayEnabled := featuregates.Default().USBGatewayEnabled() - - s.log.Info("Unpreparing devices", slog.Any("devices", allocatedDevices), slog.String("claimUID", string(claimUID))) - - for _, device := range allocatedDevices { - if usbGatewayEnabled { - count, hasCount := s.usbipAllocatedDevicesCount[device] - s.log.Info("Device attached by USBGateway", slog.String("device", device), slog.Int("count", count)) - switch { - case !hasCount || count <= 0: - s.log.Info("Device is already detached or has no consumers", slog.String("device", device)) - case count == 1: - s.log.Info("Detaching device because it has the last consumer", slog.String("device", device)) - if err := s.usbGateway.Detach(device); err != nil { - return fmt.Errorf("failed to detach device %s: %w", device, err) + s.log.Info("Claim has no tracked allocations, skipping device cleanup", slog.String("claimUID", string(claimUID))) + } else { + usbGatewayEnabled := featuregates.Default().USBGatewayEnabled() + + s.log.Info("Unpreparing devices", slog.Any("devices", allocatedDevices), slog.String("claimUID", string(claimUID))) + + for _, device := range allocatedDevices { + if usbGatewayEnabled { + count, hasCount := s.usbipAllocatedDevicesCount[device] + s.log.Info("Device attached by USBGateway", slog.String("device", device), slog.Int("count", count)) + switch { + case !hasCount || count <= 1: + s.log.Info("Device has no tracked consumers, attempting detach cleanup", slog.String("device", device), slog.Int("count", count)) + if err := s.usbGateway.Detach(device); err != nil { + return fmt.Errorf("failed to detach device %s: %w", device, err) + } + delete(s.usbipAllocatedDevicesCount, device) + default: + s.log.Info("Decrementing device consumer count", slog.String("device", device), slog.Int("newCount", count-1)) + s.usbipAllocatedDevicesCount[device]-- } - delete(s.usbipAllocatedDevicesCount, device) - default: - s.log.Info("Decrementing device consumer count", slog.String("device", device), slog.Int("newCount", count-1)) - s.usbipAllocatedDevicesCount[device]-- } - } - s.log.Info("Deleting device from allocated devices", slog.String("device", device)) - s.allocatedDevices.Delete(device) + s.log.Info("Deleting device from allocated devices", slog.String("device", device)) + s.allocatedDevices.Delete(device) + } } s.log.Info("Deleting CDI claim spec file", slog.String("claimUID", string(claimUID))) From b0cbfc7b6ceea25259d5bd111c756002bff54ad9 Mon Sep 17 00:00:00 2001 From: Daniil Antoshin Date: Thu, 16 Apr 2026 17:04:57 +0200 Subject: [PATCH 5/6] fix(core): log resource claim unprepare errors in dra driver Signed-off-by: Daniil Antoshin --- images/virtualization-dra/internal/plugin/driver.go | 2 ++ 1 file changed, 2 insertions(+) diff --git a/images/virtualization-dra/internal/plugin/driver.go b/images/virtualization-dra/internal/plugin/driver.go index 0179b10180..ffff51afe0 100644 --- a/images/virtualization-dra/internal/plugin/driver.go +++ b/images/virtualization-dra/internal/plugin/driver.go @@ -205,6 +205,8 @@ func (d *Driver) UnprepareResourceClaims(ctx context.Context, claims []kubeletpl func (d *Driver) unprepareResourceClaim(ctx context.Context, claim kubeletplugin.NamespacedObject) error { if err := d.allocator.Unprepare(ctx, claim.UID); err != nil { + d.log.Error("error unpreparing devices for claim", slog.Any("error", err), slog.String("uid", string(claim.UID))) + return fmt.Errorf("error unpreparing devices for claim %v: %w", claim.UID, err) } From c3e8b505609ef62992d5c3b917b749e46bf5fd24 Mon Sep 17 00:00:00 2001 From: Daniil Antoshin Date: Wed, 22 Apr 2026 13:00:51 +0200 Subject: [PATCH 6/6] refactor(virtualization-dra): inline attach record persistence Signed-off-by: Daniil Antoshin --- .../internal/usb-gateway/attach_record.go | 17 +++++++++++++---- 1 file changed, 13 insertions(+), 4 deletions(-) diff --git a/images/virtualization-dra/internal/usb-gateway/attach_record.go b/images/virtualization-dra/internal/usb-gateway/attach_record.go index c7fac2fe78..07b4900079 100644 --- a/images/virtualization-dra/internal/usb-gateway/attach_record.go +++ b/images/virtualization-dra/internal/usb-gateway/attach_record.go @@ -118,6 +118,7 @@ func (r *attachRecordManager) Refresh() error { return err } + // keep only real entries newEntries := make([]AttachEntry, 0, len(record.Entries)) for _, e := range record.Entries { if _, ok := ports[e.Rhport]; ok { @@ -131,7 +132,17 @@ func (r *attachRecordManager) Refresh() error { return nil } - return r.storeLocked(newRecord) + b, err = json.Marshal(newRecord) + if err != nil { + return err + } + + if err = os.WriteFile(r.recordFile, b, 0o600); err != nil { + return err + } + + r.record = newRecord + return nil } func (r *attachRecordManager) GetEntries() []AttachEntry { @@ -164,10 +175,8 @@ func (r *attachRecordManager) AddEntry(e AttachEntry) error { newEntries := slices.Clone(r.record.Entries) newEntries = append(newEntries, e) - return r.storeLocked(attachRecord{Entries: newEntries}) -} + record := attachRecord{Entries: newEntries} -func (r *attachRecordManager) storeLocked(record attachRecord) error { b, err := json.Marshal(record) if err != nil { return err