diff --git a/src/control/server/engine/config.go b/src/control/server/engine/config.go index e8c16ee15fb..b3224bde361 100644 --- a/src/control/server/engine/config.go +++ b/src/control/server/engine/config.go @@ -31,8 +31,9 @@ const ( envLogDbgStreams = "DD_MASK" envLogSubsystems = "DD_SUBSYS" - minABTThreadStackSizeDCPM = 20480 - minABTThreadStackSizeUCX = 32768 + minABTThreadStackSizeDCPM = 20480 + minABTThreadStackSizeUCX = 32768 + minABTThreadStackSizeMdOnSsd = 24576 ) // FabricConfig encapsulates networking fabric configuration. @@ -419,6 +420,37 @@ func (c *Config) UpdatePMDKEnvars() error { return nil } +// Ensure at least 24KiB ABT stack size for md_on_ssd. +func (c *Config) UpdateMdOnSsdStackSize() error { + stackSizeStr, err := c.GetEnvVar("ABT_THREAD_STACKSIZE") + if err != nil { + c.EnvVars = append(c.EnvVars, fmt.Sprintf("ABT_THREAD_STACKSIZE=%d", + minABTThreadStackSizeMdOnSsd)) + return nil + } + // Ensure at least 24KiB ABT stack size for an engine in md_on_ssd mode. + stackSizeValue, err := strconv.Atoi(stackSizeStr) + if err != nil { + return errors.Errorf("env_var ABT_THREAD_STACKSIZE has invalid value: %s", + stackSizeStr) + } + if stackSizeValue < minABTThreadStackSizeMdOnSsd { + return errors.Errorf("env_var ABT_THREAD_STACKSIZE should be >= %d "+ + "for MD on SSD, found %d", minABTThreadStackSizeMdOnSsd, + stackSizeValue) + } + return nil +} + +// Ensure 24k for md_on_ssd configuration +func (c *Config) UpdateABTEnvarsMdOnSsd() error { + + if c.Storage.Tiers.HasBdevRoleMeta() { + return c.UpdateMdOnSsdStackSize() + } + return nil +} + // Increase ABT stack size for UCX provider. func (c *Config) UpdateABTEnvarsUCX() error { diff --git a/src/control/server/engine/config_test.go b/src/control/server/engine/config_test.go index 48c29a7b794..1c771cfe8b5 100644 --- a/src/control/server/engine/config_test.go +++ b/src/control/server/engine/config_test.go @@ -1110,7 +1110,7 @@ func TestConfig_UpdatePMDKEnvarsStackSizeDCPM(t *testing.T) { validConfig := func() *Config { return MockConfig().WithStorage( storage.NewTierConfig(). - WithStorageClass("dcpm")) + WithStorageClass(storage.ClassDcpm.String())) } for name, tc := range map[string]struct { @@ -1162,6 +1162,76 @@ func TestConfig_UpdatePMDKEnvarsStackSizeDCPM(t *testing.T) { } } +func TestConfig_UpdateMdOnSsdStackSize(t *testing.T) { + validConfig := func() *Config { + return MockConfig().WithStorage( + storage.NewTierConfig(). + WithStorageClass(storage.ClassRam.String()), + storage.NewTierConfig(). + WithStorageClass(storage.ClassNvme.String()). + WithBdevDeviceRoles(storage.BdevRoleMeta)) + } + for name, tc := range map[string]struct { + cfg *Config + expErr error + expABTthreadStackSize int + }{ + "empty config should not set ABT_THREAD_STACKSIZE": { + cfg: MockConfig(), + expABTthreadStackSize: 0, + }, + "non-md_on_ssd config should not set ABT_THREAD_STACKSIZE": { + cfg: MockConfig().WithStorage( + storage.NewTierConfig(). + WithStorageClass(storage.ClassRam.String())), + expABTthreadStackSize: 0, + }, + "valid config for md_on_ssd should set ABT_THREAD_STACKSIZE": { + cfg: validConfig(). + WithEnvVarAbtThreadStackSize(minABTThreadStackSizeMdOnSsd), + expABTthreadStackSize: minABTThreadStackSizeMdOnSsd, + }, + "config for md_on_ssd without thread size should set ABT_THREAD_STACKSIZE": { + cfg: validConfig(), + expABTthreadStackSize: minABTThreadStackSizeMdOnSsd, + }, + "config for md_on_ssd with stack size big enough should not change ABT_THREAD_STACKSIZE": { + cfg: validConfig(). + WithEnvVarAbtThreadStackSize(minABTThreadStackSizeMdOnSsd + 1), + expABTthreadStackSize: minABTThreadStackSizeMdOnSsd + 1, + }, + "config for md_on_ssd with stack size too small should fail": { + cfg: validConfig(). + WithEnvVarAbtThreadStackSize(minABTThreadStackSizeMdOnSsd - 1), + expErr: errors.New(fmt.Sprintf("env_var ABT_THREAD_STACKSIZE "+ + "should be >= %d for MD on SSD, found %d", + minABTThreadStackSizeMdOnSsd, minABTThreadStackSizeMdOnSsd-1)), + }, + "config for md_on_ssd with invalid ABT_THREAD_STACKSIZE value should fail": { + cfg: validConfig().WithEnvVars("ABT_THREAD_STACKSIZE=foo_bar"), + expErr: errors.New("env_var ABT_THREAD_STACKSIZE has invalid value: foo_bar"), + }, + } { + t.Run(name, func(t *testing.T) { + err := tc.cfg.UpdateABTEnvarsMdOnSsd() + test.CmpErr(t, tc.expErr, err) + if err != nil { + return + } + stackSizeStr, err := tc.cfg.GetEnvVar("ABT_THREAD_STACKSIZE") + if tc.expABTthreadStackSize == 0 { + test.AssertTrue(t, err != nil, "Unexpected env var ABT_THREAD_STACKSIZE") + return + } + test.AssertTrue(t, err == nil, "Missing env var ABT_THREAD_STACKSIZE") + stackSizeVal, err := strconv.Atoi(stackSizeStr) + test.AssertTrue(t, err == nil, "Invalid env var ABT_THREAD_STACKSIZE") + test.AssertEqual(t, tc.expABTthreadStackSize, stackSizeVal, + "Invalid ABT_THREAD_STACKSIZE value") + }) + } +} + func TestConfig_UpdateABTEnvarsUCX(t *testing.T) { validConfig := func() *Config { return MockConfig(). @@ -1223,7 +1293,7 @@ func TestConfig_UpdateABTEnvarsUCX(t *testing.T) { func TestConfig_UpdatePMDKEnvarsPMemobjConfDCPM(t *testing.T) { validConfig := func() *Config { return MockConfig().WithStorage( - storage.NewTierConfig().WithStorageClass("dcpm")) + storage.NewTierConfig().WithStorageClass(storage.ClassDcpm.String())) } for name, tc := range map[string]struct { diff --git a/src/control/server/server.go b/src/control/server/server.go index 4ea72603f3d..a35b83be964 100644 --- a/src/control/server/server.go +++ b/src/control/server/server.go @@ -109,12 +109,14 @@ func processConfig(log logging.Logger, cfg *config.Server, fis *hardware.FabricI if err := ec.UpdateABTEnvarsUCX(); err != nil { return err } - } - for _, ec := range cfg.Engines { if err := ec.UpdatePMDKEnvars(); err != nil { return err } + + if err := ec.UpdateABTEnvarsMdOnSsd(); err != nil { + return err + } } return nil