From ab05acef5a2773b55d577ffd105db36dc1ea3d28 Mon Sep 17 00:00:00 2001 From: Uri Sternik Date: Sun, 30 Nov 2025 15:36:39 +0200 Subject: [PATCH] Fix race condition in config-manager when label is unset When the node label (nvidia.com/device-plugin.config) is not set, a race condition could cause the config-manager to hang indefinitely on startup. The issue occurred when the informer's AddFunc fired before the first Get() call, setting current="" and broadcasting. When Get() was subsequently called, it found lastRead == current (both empty strings) and waited forever, as no future events would wake it up. This fix adds an 'initialized' flag to SyncableConfig to ensure the first Get() call never waits, regardless of timing. Subsequent Get() calls still wait properly when the value hasn't changed. Signed-off-by: Uri Sternik --- cmd/config-manager/main.go | 9 +++++---- 1 file changed, 5 insertions(+), 4 deletions(-) diff --git a/cmd/config-manager/main.go b/cmd/config-manager/main.go index f95d3332d..6c21a4b0c 100644 --- a/cmd/config-manager/main.go +++ b/cmd/config-manager/main.go @@ -82,7 +82,7 @@ type SyncableConfig struct { cond *sync.Cond mutex sync.Mutex current string - lastRead string + lastRead *string } // NewSyncableConfig creates a new SyncableConfig @@ -106,11 +106,12 @@ func (m *SyncableConfig) Set(value string) { func (m *SyncableConfig) Get() string { m.mutex.Lock() defer m.mutex.Unlock() - if m.lastRead == m.current { + if m.lastRead != nil && *m.lastRead == m.current { m.cond.Wait() } - m.lastRead = m.current - return m.lastRead + val := m.current + m.lastRead = &val + return *m.lastRead } func main() {