From 62ddd0c9c12dc3a82a2cd6896b07a1b6af4a8ddf Mon Sep 17 00:00:00 2001 From: Tomasz Gromadzki Date: Wed, 11 Mar 2026 06:39:43 +0100 Subject: [PATCH 1/7] DAOS-18495 md: 24k ABT stack size for md_on_ssd Increase min ABT stack size to 24k for md_on_ssd Signed-off-by: Tomasz Gromadzki Priority: 2 --- src/control/server/engine/config.go | 42 ++++++++++++++++++++++-- src/control/server/engine/config_test.go | 4 +-- src/control/server/server.go | 6 ++++ 3 files changed, 48 insertions(+), 4 deletions(-) diff --git a/src/control/server/engine/config.go b/src/control/server/engine/config.go index e8c16ee15fb..759ec47e985 100644 --- a/src/control/server/engine/config.go +++ b/src/control/server/engine/config.go @@ -31,8 +31,9 @@ const ( envLogDbgStreams = "DD_MASK" envLogSubsystems = "DD_SUBSYS" - minABTThreadStackSizeDCPM = 20480 - minABTThreadStackSizeUCX = 32768 + minABTThreadStackSizeDCPM = 20480 + minABTThreadStackSizeUCX = 32768 + minABTThreadStackSizeMdOnSsd = 24576 ) // FabricConfig encapsulates networking fabric configuration. @@ -367,6 +368,28 @@ func (c *Config) UpdatePMDKEnvarsStackSizeDCPM() error { return nil } +// Ensure at least 24KiB ABT stack size for md_on_ssd. +func (c *Config) UpdateMdOnSsdStackSize() error { + stackSizeStr, err := c.GetEnvVar("ABT_THREAD_STACKSIZE") + if err != nil { + c.EnvVars = append(c.EnvVars, fmt.Sprintf("ABT_THREAD_STACKSIZE=%d", + minABTThreadStackSizeMdOnSsd)) + return nil + } + // Ensure at least 24KiB ABT stack size for an engine in md_on_ssd mode. + stackSizeValue, err := strconv.Atoi(stackSizeStr) + if err != nil { + return errors.Errorf("env_var ABT_THREAD_STACKSIZE has invalid value: %s", + stackSizeStr) + } + if stackSizeValue < minABTThreadStackSizeMdOnSsd { + return errors.Errorf("env_var ABT_THREAD_STACKSIZE should be >= %d "+ + "for MD on SSD, found %d", minABTThreadStackSizeMdOnSsd, + stackSizeValue) + } + return nil +} + // Ensure proper configuration of shutdown (SDS) state func (c *Config) UpdatePMDKEnvarsPMemobjConf(isDCPM bool) error { pmemobjConfStr, pmemobjConfErr := c.GetEnvVar("PMEMOBJ_CONF") @@ -419,6 +442,21 @@ func (c *Config) UpdatePMDKEnvars() error { return nil } +// Ensure 24k for md_on_ssd configuration +func (c *Config) UpdateABTEnvarsMdOnSsd() error { + + if len(c.Storage.Tiers) == 0 { + return errors.New("Invalid config - no tier 0 defined") + } + + isDCPM := c.Storage.Tiers[0].Class == storage.ClassDcpm + + if !isDCPM { + return c.UpdateMdOnSsdStackSize() + } + return nil +} + // Increase ABT stack size for UCX provider. func (c *Config) UpdateABTEnvarsUCX() error { diff --git a/src/control/server/engine/config_test.go b/src/control/server/engine/config_test.go index 48c29a7b794..d8b462ce7b1 100644 --- a/src/control/server/engine/config_test.go +++ b/src/control/server/engine/config_test.go @@ -1110,7 +1110,7 @@ func TestConfig_UpdatePMDKEnvarsStackSizeDCPM(t *testing.T) { validConfig := func() *Config { return MockConfig().WithStorage( storage.NewTierConfig(). - WithStorageClass("dcpm")) + WithStorageClass(storage.ClassDcpm.String())) } for name, tc := range map[string]struct { @@ -1223,7 +1223,7 @@ func TestConfig_UpdateABTEnvarsUCX(t *testing.T) { func TestConfig_UpdatePMDKEnvarsPMemobjConfDCPM(t *testing.T) { validConfig := func() *Config { return MockConfig().WithStorage( - storage.NewTierConfig().WithStorageClass("dcpm")) + storage.NewTierConfig().WithStorageClass(storage.ClassDcpm.String())) } for name, tc := range map[string]struct { diff --git a/src/control/server/server.go b/src/control/server/server.go index 4ea72603f3d..de401670bee 100644 --- a/src/control/server/server.go +++ b/src/control/server/server.go @@ -117,6 +117,12 @@ func processConfig(log logging.Logger, cfg *config.Server, fis *hardware.FabricI } } + for _, ec := range cfg.Engines { + if err := ec.UpdateABTEnvarsMdOnSsd(); err != nil { + return err + } + } + return nil } From 71e905946ec578fbde97460301effc6572e6621c Mon Sep 17 00:00:00 2001 From: Tomasz Gromadzki Date: Wed, 11 Mar 2026 06:58:38 +0100 Subject: [PATCH 2/7] Unit tests for UpdateMdOnSsdStackSize() Priority: 2 Signed-off-by: Tomasz Gromadzki --- src/control/server/engine/config_test.go | 57 ++++++++++++++++++++++++ 1 file changed, 57 insertions(+) diff --git a/src/control/server/engine/config_test.go b/src/control/server/engine/config_test.go index d8b462ce7b1..8a8d8d45705 100644 --- a/src/control/server/engine/config_test.go +++ b/src/control/server/engine/config_test.go @@ -1162,6 +1162,63 @@ func TestConfig_UpdatePMDKEnvarsStackSizeDCPM(t *testing.T) { } } +func TestConfig_UpdateMdOnSsdStackSize(t *testing.T) { + validConfig := func() *Config { + return MockConfig().WithStorage( + storage.NewTierConfig(). + WithStorageClass(storage.ClassDcpm.String())) + } + + for name, tc := range map[string]struct { + cfg *Config + expErr error + expABTthreadStackSize int + }{ + "empty config should not fail": { + cfg: MockConfig(), + expABTthreadStackSize: minABTThreadStackSizeDCPM, + }, + "valid config for md_on_ssd should not fail": { + cfg: validConfig(). + WithEnvVarAbtThreadStackSize(minABTThreadStackSizeMdOnSsd), + expABTthreadStackSize: minABTThreadStackSizeMdOnSsd, + }, + "config for md_on_ssd without thread size should not fail": { + cfg: validConfig(), + expABTthreadStackSize: minABTThreadStackSizeMdOnSsd, + }, + "config for md_on_ssd with stack size big enough should not fail": { + cfg: validConfig(). + WithEnvVarAbtThreadStackSize(minABTThreadStackSizeMdOnSsd + 1), + expABTthreadStackSize: minABTThreadStackSizeMdOnSsd + 1, + }, + "config for md_on_ssd with stack size too small should fail": { + cfg: validConfig(). + WithEnvVarAbtThreadStackSize(minABTThreadStackSizeMdOnSsd - 1), + expErr: errors.New(fmt.Sprintf("env_var ABT_THREAD_STACKSIZE "+ + "should be >= %d for MD on SSD, found %d", + minABTThreadStackSizeMdOnSsd, minABTThreadStackSizeMdOnSsd-1)), + }, + "config for md_on_ssd with invalid ABT_THREAD_STACKSIZE value should fail": { + cfg: validConfig().WithEnvVars("ABT_THREAD_STACKSIZE=foo_bar"), + expErr: errors.New("env_var ABT_THREAD_STACKSIZE has invalid value: foo_bar"), + }, + } { + t.Run(name, func(t *testing.T) { + err := tc.cfg.UpdateMdOnSsdStackSize() + test.CmpErr(t, tc.expErr, err) + if err == nil { + stackSizeStr, err := tc.cfg.GetEnvVar("ABT_THREAD_STACKSIZE") + test.AssertTrue(t, err == nil, "Missing env var ABT_THREAD_STACKSIZE") + stackSizeVal, err := strconv.Atoi(stackSizeStr) + test.AssertTrue(t, err == nil, "Invalid env var ABT_THREAD_STACKSIZE") + test.AssertEqual(t, tc.expABTthreadStackSize, stackSizeVal, + "Invalid ABT_THREAD_STACKSIZE value") + } + }) + } +} + func TestConfig_UpdateABTEnvarsUCX(t *testing.T) { validConfig := func() *Config { return MockConfig(). From 5f117f8cd9f5431e60705f05bbe6003d9d33bb1d Mon Sep 17 00:00:00 2001 From: Tomasz Gromadzki Date: Wed, 11 Mar 2026 09:19:18 +0100 Subject: [PATCH 3/7] Fix test Signed-off-by: Tomasz Gromadzki Priority: 2 --- src/control/server/engine/config.go | 44 ++++++++++++------------ src/control/server/engine/config_test.go | 2 +- 2 files changed, 23 insertions(+), 23 deletions(-) diff --git a/src/control/server/engine/config.go b/src/control/server/engine/config.go index 759ec47e985..6ca2cbb9d2e 100644 --- a/src/control/server/engine/config.go +++ b/src/control/server/engine/config.go @@ -368,28 +368,6 @@ func (c *Config) UpdatePMDKEnvarsStackSizeDCPM() error { return nil } -// Ensure at least 24KiB ABT stack size for md_on_ssd. -func (c *Config) UpdateMdOnSsdStackSize() error { - stackSizeStr, err := c.GetEnvVar("ABT_THREAD_STACKSIZE") - if err != nil { - c.EnvVars = append(c.EnvVars, fmt.Sprintf("ABT_THREAD_STACKSIZE=%d", - minABTThreadStackSizeMdOnSsd)) - return nil - } - // Ensure at least 24KiB ABT stack size for an engine in md_on_ssd mode. - stackSizeValue, err := strconv.Atoi(stackSizeStr) - if err != nil { - return errors.Errorf("env_var ABT_THREAD_STACKSIZE has invalid value: %s", - stackSizeStr) - } - if stackSizeValue < minABTThreadStackSizeMdOnSsd { - return errors.Errorf("env_var ABT_THREAD_STACKSIZE should be >= %d "+ - "for MD on SSD, found %d", minABTThreadStackSizeMdOnSsd, - stackSizeValue) - } - return nil -} - // Ensure proper configuration of shutdown (SDS) state func (c *Config) UpdatePMDKEnvarsPMemobjConf(isDCPM bool) error { pmemobjConfStr, pmemobjConfErr := c.GetEnvVar("PMEMOBJ_CONF") @@ -442,6 +420,28 @@ func (c *Config) UpdatePMDKEnvars() error { return nil } +// Ensure at least 24KiB ABT stack size for md_on_ssd. +func (c *Config) UpdateMdOnSsdStackSize() error { + stackSizeStr, err := c.GetEnvVar("ABT_THREAD_STACKSIZE") + if err != nil { + c.EnvVars = append(c.EnvVars, fmt.Sprintf("ABT_THREAD_STACKSIZE=%d", + minABTThreadStackSizeMdOnSsd)) + return nil + } + // Ensure at least 24KiB ABT stack size for an engine in md_on_ssd mode. + stackSizeValue, err := strconv.Atoi(stackSizeStr) + if err != nil { + return errors.Errorf("env_var ABT_THREAD_STACKSIZE has invalid value: %s", + stackSizeStr) + } + if stackSizeValue < minABTThreadStackSizeMdOnSsd { + return errors.Errorf("env_var ABT_THREAD_STACKSIZE should be >= %d "+ + "for MD on SSD, found %d", minABTThreadStackSizeMdOnSsd, + stackSizeValue) + } + return nil +} + // Ensure 24k for md_on_ssd configuration func (c *Config) UpdateABTEnvarsMdOnSsd() error { diff --git a/src/control/server/engine/config_test.go b/src/control/server/engine/config_test.go index 8a8d8d45705..604beb59b16 100644 --- a/src/control/server/engine/config_test.go +++ b/src/control/server/engine/config_test.go @@ -1176,7 +1176,7 @@ func TestConfig_UpdateMdOnSsdStackSize(t *testing.T) { }{ "empty config should not fail": { cfg: MockConfig(), - expABTthreadStackSize: minABTThreadStackSizeDCPM, + expABTthreadStackSize: minABTThreadStackSizeMdOnSsd, }, "valid config for md_on_ssd should not fail": { cfg: validConfig(). From b0a85f7fdc12e65b414f1efc0daf36e1bc9a0702 Mon Sep 17 00:00:00 2001 From: Tomasz Gromadzki Date: Wed, 11 Mar 2026 12:36:02 +0100 Subject: [PATCH 4/7] Final implementation Final implementation based on HasBdevRoleMeta(). Tests improvements. Signed-off-by: Tomasz Gromadzki Priority: 2 Allow-unstable-test: true --- src/control/server/engine/config.go | 8 +---- src/control/server/engine/config_test.go | 41 ++++++++++++++++-------- 2 files changed, 28 insertions(+), 21 deletions(-) diff --git a/src/control/server/engine/config.go b/src/control/server/engine/config.go index 6ca2cbb9d2e..b3224bde361 100644 --- a/src/control/server/engine/config.go +++ b/src/control/server/engine/config.go @@ -445,13 +445,7 @@ func (c *Config) UpdateMdOnSsdStackSize() error { // Ensure 24k for md_on_ssd configuration func (c *Config) UpdateABTEnvarsMdOnSsd() error { - if len(c.Storage.Tiers) == 0 { - return errors.New("Invalid config - no tier 0 defined") - } - - isDCPM := c.Storage.Tiers[0].Class == storage.ClassDcpm - - if !isDCPM { + if c.Storage.Tiers.HasBdevRoleMeta() { return c.UpdateMdOnSsdStackSize() } return nil diff --git a/src/control/server/engine/config_test.go b/src/control/server/engine/config_test.go index 604beb59b16..c6fe3f65c6b 100644 --- a/src/control/server/engine/config_test.go +++ b/src/control/server/engine/config_test.go @@ -1166,28 +1166,36 @@ func TestConfig_UpdateMdOnSsdStackSize(t *testing.T) { validConfig := func() *Config { return MockConfig().WithStorage( storage.NewTierConfig(). - WithStorageClass(storage.ClassDcpm.String())) + WithStorageClass(storage.ClassRam.String()), + storage.NewTierConfig(). + WithStorageClass(storage.ClassNvme.String()). + WithBdevDeviceRoles(storage.BdevRoleMeta)) } - for name, tc := range map[string]struct { cfg *Config expErr error expABTthreadStackSize int }{ - "empty config should not fail": { + "empty config should not set ABT_THREAD_STACKSIZE": { cfg: MockConfig(), - expABTthreadStackSize: minABTThreadStackSizeMdOnSsd, + expABTthreadStackSize: 0, }, - "valid config for md_on_ssd should not fail": { + "non-md_on_ssd config should not set ABT_THREAD_STACKSIZE": { + cfg: MockConfig().WithStorage( + storage.NewTierConfig(). + WithStorageClass(storage.ClassRam.String())), + expABTthreadStackSize: 0, + }, + "valid config for md_on_ssd should set ABT_THREAD_STACKSIZE": { cfg: validConfig(). WithEnvVarAbtThreadStackSize(minABTThreadStackSizeMdOnSsd), expABTthreadStackSize: minABTThreadStackSizeMdOnSsd, }, - "config for md_on_ssd without thread size should not fail": { + "config for md_on_ssd without thread size should sed ABT_THREAD_STACKSIZE": { cfg: validConfig(), expABTthreadStackSize: minABTThreadStackSizeMdOnSsd, }, - "config for md_on_ssd with stack size big enough should not fail": { + "config for md_on_ssd with stack size big enough should not change ABT_THREAD_STACKSIZE": { cfg: validConfig(). WithEnvVarAbtThreadStackSize(minABTThreadStackSizeMdOnSsd + 1), expABTthreadStackSize: minABTThreadStackSizeMdOnSsd + 1, @@ -1205,15 +1213,20 @@ func TestConfig_UpdateMdOnSsdStackSize(t *testing.T) { }, } { t.Run(name, func(t *testing.T) { - err := tc.cfg.UpdateMdOnSsdStackSize() + err := tc.cfg.UpdateABTEnvarsMdOnSsd() test.CmpErr(t, tc.expErr, err) if err == nil { - stackSizeStr, err := tc.cfg.GetEnvVar("ABT_THREAD_STACKSIZE") - test.AssertTrue(t, err == nil, "Missing env var ABT_THREAD_STACKSIZE") - stackSizeVal, err := strconv.Atoi(stackSizeStr) - test.AssertTrue(t, err == nil, "Invalid env var ABT_THREAD_STACKSIZE") - test.AssertEqual(t, tc.expABTthreadStackSize, stackSizeVal, - "Invalid ABT_THREAD_STACKSIZE value") + if tc.expABTthreadStackSize == 0 { + _, err := tc.cfg.GetEnvVar("ABT_THREAD_STACKSIZE") + test.AssertTrue(t, err != nil, "Unexpected env var ABT_THREAD_STACKSIZE") + } else { + stackSizeStr, err := tc.cfg.GetEnvVar("ABT_THREAD_STACKSIZE") + test.AssertTrue(t, err == nil, "Missing env var ABT_THREAD_STACKSIZE") + stackSizeVal, err := strconv.Atoi(stackSizeStr) + test.AssertTrue(t, err == nil, "Invalid env var ABT_THREAD_STACKSIZE") + test.AssertEqual(t, tc.expABTthreadStackSize, stackSizeVal, + "Invalid ABT_THREAD_STACKSIZE value") + } } }) } From 94a2f4f1869c53ce22937fcf34d020257c80575f Mon Sep 17 00:00:00 2001 From: Tomasz Gromadzki Date: Wed, 11 Mar 2026 14:33:10 +0100 Subject: [PATCH 5/7] Unit Test tuning Only Unit Tests and NLT to be re-run to confirm that logic has not been changed. Signed-off-by: Tomasz Gromadzki Priority: 2 Cancel-prev-build: false Skip-unit-test-memcheck: true Skip-test: true Skip-func-test: true Skip-func-vm: true Skip-func-hw-test: true --- src/control/server/engine/config_test.go | 26 ++++++++++++------------ 1 file changed, 13 insertions(+), 13 deletions(-) diff --git a/src/control/server/engine/config_test.go b/src/control/server/engine/config_test.go index c6fe3f65c6b..1c771cfe8b5 100644 --- a/src/control/server/engine/config_test.go +++ b/src/control/server/engine/config_test.go @@ -1191,7 +1191,7 @@ func TestConfig_UpdateMdOnSsdStackSize(t *testing.T) { WithEnvVarAbtThreadStackSize(minABTThreadStackSizeMdOnSsd), expABTthreadStackSize: minABTThreadStackSizeMdOnSsd, }, - "config for md_on_ssd without thread size should sed ABT_THREAD_STACKSIZE": { + "config for md_on_ssd without thread size should set ABT_THREAD_STACKSIZE": { cfg: validConfig(), expABTthreadStackSize: minABTThreadStackSizeMdOnSsd, }, @@ -1215,19 +1215,19 @@ func TestConfig_UpdateMdOnSsdStackSize(t *testing.T) { t.Run(name, func(t *testing.T) { err := tc.cfg.UpdateABTEnvarsMdOnSsd() test.CmpErr(t, tc.expErr, err) - if err == nil { - if tc.expABTthreadStackSize == 0 { - _, err := tc.cfg.GetEnvVar("ABT_THREAD_STACKSIZE") - test.AssertTrue(t, err != nil, "Unexpected env var ABT_THREAD_STACKSIZE") - } else { - stackSizeStr, err := tc.cfg.GetEnvVar("ABT_THREAD_STACKSIZE") - test.AssertTrue(t, err == nil, "Missing env var ABT_THREAD_STACKSIZE") - stackSizeVal, err := strconv.Atoi(stackSizeStr) - test.AssertTrue(t, err == nil, "Invalid env var ABT_THREAD_STACKSIZE") - test.AssertEqual(t, tc.expABTthreadStackSize, stackSizeVal, - "Invalid ABT_THREAD_STACKSIZE value") - } + if err != nil { + return + } + stackSizeStr, err := tc.cfg.GetEnvVar("ABT_THREAD_STACKSIZE") + if tc.expABTthreadStackSize == 0 { + test.AssertTrue(t, err != nil, "Unexpected env var ABT_THREAD_STACKSIZE") + return } + test.AssertTrue(t, err == nil, "Missing env var ABT_THREAD_STACKSIZE") + stackSizeVal, err := strconv.Atoi(stackSizeStr) + test.AssertTrue(t, err == nil, "Invalid env var ABT_THREAD_STACKSIZE") + test.AssertEqual(t, tc.expABTthreadStackSize, stackSizeVal, + "Invalid ABT_THREAD_STACKSIZE value") }) } } From 9265e2c76f58bf84197dac4b88ac467f0463b7c7 Mon Sep 17 00:00:00 2001 From: Tomasz Gromadzki Date: Thu, 12 Mar 2026 08:34:34 +0100 Subject: [PATCH 6/7] Trigger full validation Signed-off-by: Tomasz Gromadzki Priority: 2 Allow-unstable-test: true From e58b35b110b40cfdabba3918f6bb857f81192f25 Mon Sep 17 00:00:00 2001 From: Tomasz Gromadzki Date: Sat, 14 Mar 2026 23:16:45 +0100 Subject: [PATCH 7/7] Small code optimization based on review feedback. Signed-off-by: Tomasz Gromadzki Priority: 2 --- src/control/server/server.go | 4 ---- 1 file changed, 4 deletions(-) diff --git a/src/control/server/server.go b/src/control/server/server.go index de401670bee..a35b83be964 100644 --- a/src/control/server/server.go +++ b/src/control/server/server.go @@ -109,15 +109,11 @@ func processConfig(log logging.Logger, cfg *config.Server, fis *hardware.FabricI if err := ec.UpdateABTEnvarsUCX(); err != nil { return err } - } - for _, ec := range cfg.Engines { if err := ec.UpdatePMDKEnvars(); err != nil { return err } - } - for _, ec := range cfg.Engines { if err := ec.UpdateABTEnvarsMdOnSsd(); err != nil { return err }