daos-stack · jolivier23 · May 8, 2024 · Jul 25, 2023 · May 8, 2024 · May 8, 2024
diff --git a/docs/admin/deployment.md b/docs/admin/deployment.md
@@ -1377,6 +1377,12 @@ per four target threads, for example `targets: 16` and `nr_xs_helpers: 4`.
 The server should have sufficiently many physical cores to support the
 number of targets plus the additional service threads.
 
+The 'targets:' and 'nr_xs_helpers:' requirement are mandatory, if the number
+of physical cores are not enough it will fail the starting of the daos engine
+(notes that 2 cores reserved for system service), or configures with ENV
+"DAOS_TARGET_OVERSUBSCRIBE=1" to force starting daos engine (possibly hurts
+performance as multiple XS compete on same core).
+
 
 ## Storage Formatting
 

diff --git a/docs/admin/env_variables.md b/docs/admin/env_variables.md
@@ -52,6 +52,7 @@ Environment variables in this section only apply to the server side.
 |DAOS\_DTX\_AGG\_THD\_AGE|DTX aggregation age threshold in seconds. The valid range is [210, 1830]. The default value is 630.|
 |DAOS\_DTX\_RPC\_HELPER\_THD|DTX RPC helper threshold. The valid range is [18, unlimited). The default value is 513.|
 |DAOS\_DTX\_BATCHED\_ULT\_MAX|The max count of DTX batched commit ULTs. The valid range is [0, unlimited). 0 means to commit DTX synchronously. The default value is 32.|
+|DAOS\_FORWARD\_NEIGHBOR|Set to enable I/O forwarding on neighbor xstream in the absence of helper threads.|
 
 ## Server and Client environment variables
 

diff --git a/src/control/cmd/daos_server/start.go b/src/control/cmd/daos_server/start.go
@@ -1,5 +1,5 @@
 //
-// (C) Copyright 2019-2023 Intel Corporation.
+// (C) Copyright 2019-2024 Intel Corporation.
 //
 // SPDX-License-Identifier: BSD-2-Clause-Patent
 //
@@ -29,7 +29,7 @@ type startCmd struct {
 	Modules             *string `short:"m" long:"modules" description:"List of server modules to load"`
 	Targets             uint16  `short:"t" long:"targets" description:"Number of targets to use (default use all cores)"`
 	NrXsHelpers         *uint16 `short:"x" long:"xshelpernr" description:"Number of helper XS per VOS target"`
-	FirstCore           uint16  `short:"f" long:"firstcore" default:"0" description:"Index of first core for service thread"`
+	FirstCore           *uint16 `short:"f" long:"firstcore" description:"Index of first core for service thread"`
 	Group               string  `short:"g" long:"group" description:"Server group name"`
 	SocketDir           string  `short:"d" long:"socket_dir" description:"Location for all daos_server & daos_engine sockets"`
 	Insecure            bool    `short:"i" long:"insecure" description:"Allow for insecure connections"`
@@ -76,8 +76,8 @@ func (cmd *startCmd) setCLIOverrides() error {
 		if cmd.NrXsHelpers != nil {
 			srv.WithHelperStreamCount(int(*cmd.NrXsHelpers))
 		}
-		if cmd.FirstCore > 0 {
-			srv.WithServiceThreadCore(int(cmd.FirstCore))
+		if cmd.FirstCore != nil {
+			srv.WithServiceThreadCore(int(*cmd.FirstCore))
 		}
 	}
 

diff --git a/src/control/cmd/dmg/auto_test.go b/src/control/cmd/dmg/auto_test.go
@@ -580,7 +580,6 @@ transport_config:
 engines:
 - targets: 12
   nr_xs_helpers: 2
-  first_core: 0
   log_file: /tmp/daos_engine.0.log
   storage:
   - class: dcpm
@@ -599,7 +598,6 @@ engines:
   pinned_numa_node: 0
 - targets: 6
   nr_xs_helpers: 0
-  first_core: 0
   log_file: /tmp/daos_engine.1.log
   storage:
   - class: dcpm

diff --git a/src/control/server/config/server.go b/src/control/server/config/server.go
@@ -863,7 +863,11 @@ func (cfg *Server) SetEngineAffinities(log logging.Logger, affSources ...EngineA
 	// Detect legacy mode by checking if first_core is being used.
 	legacyMode := false
 	for _, engineCfg := range cfg.Engines {
-		if engineCfg.ServiceThreadCore != 0 {
+		if engineCfg.ServiceThreadCore != nil {
+			if *engineCfg.ServiceThreadCore == 0 && engineCfg.PinnedNumaNode != nil {
+				// Both are set but we don't know yet which to use
+				continue
+			}
 			legacyMode = true
 			break
 		}
@@ -872,9 +876,15 @@ func (cfg *Server) SetEngineAffinities(log logging.Logger, affSources ...EngineA
 	// Fail if any engine has an explicit pin and non-zero first_core.
 	for idx, engineCfg := range cfg.Engines {
 		if legacyMode {
+			if engineCfg.PinnedNumaNode != nil {
+				log.Infof("pinned_numa_node setting ignored on engine %d", idx)
+				engineCfg.PinnedNumaNode = nil
+			}
 			log.Debugf("setting legacy core allocation algorithm on engine %d", idx)
-			engineCfg.PinnedNumaNode = nil
 			continue
+		} else if engineCfg.ServiceThreadCore != nil {
+			log.Infof("first_core setting ignored on engine %d", idx)
+			engineCfg.ServiceThreadCore = nil
 		}
 
 		numaAffinity, err := detectEngineAffinity(log, engineCfg, affSources...)

diff --git a/src/control/server/ctl_storage_rpc_test.go b/src/control/server/ctl_storage_rpc_test.go
@@ -1569,7 +1569,10 @@ func TestServer_CtlSvc_StorageScan_PostEngineStart(t *testing.T) {
 			var engineCfgs []*engine.Config
 			for i, sc := range tc.storageCfgs {
 				log.Debugf("storage cfg contains bdevs %v for engine %d", sc.Bdevs(), i)
-				engineCfgs = append(engineCfgs, engine.MockConfig().WithStorage(sc...))
+				engineCfgs = append(engineCfgs,
+					engine.MockConfig().
+						WithStorage(sc...).
+						WithTargetCount(tc.engineTargetCount[i]))
 			}
 			sCfg := config.DefaultServer().WithEngines(engineCfgs...)
 			cs := mockControlService(t, log, sCfg, csbmbc, tc.smbc, tc.smsc)
@@ -1625,7 +1628,6 @@ func TestServer_CtlSvc_StorageScan_PostEngineStart(t *testing.T) {
 				}
 				te.setDrpcClient(newMockDrpcClient(dcc))
 				te._superblock.Rank = ranklist.NewRankPtr(uint32(idx + 1))
-				te.setTargetCount(tc.engineTargetCount[idx])
 				for _, tc := range te.storage.GetBdevConfigs() {
 					tc.Bdev.DeviceRoles.OptionBits = storage.OptionBits(storage.BdevRoleAll)
 				}

diff --git a/src/control/server/engine/config.go b/src/control/server/engine/config.go
@@ -115,7 +115,7 @@ type Config struct {
 	Modules           string         `yaml:"modules,omitempty" cmdLongFlag:"--modules" cmdShortFlag:"-m"`
 	TargetCount       int            `yaml:"targets,omitempty" cmdLongFlag:"--targets,nonzero" cmdShortFlag:"-t,nonzero"`
 	HelperStreamCount int            `yaml:"nr_xs_helpers" cmdLongFlag:"--xshelpernr" cmdShortFlag:"-x"`
-	ServiceThreadCore int            `yaml:"first_core" cmdLongFlag:"--firstcore,nonzero" cmdShortFlag:"-f,nonzero"`
+	ServiceThreadCore *int           `yaml:"first_core,omitempty" cmdLongFlag:"--firstcore" cmdShortFlag:"-f"`
 	SystemName        string         `yaml:"-" cmdLongFlag:"--group" cmdShortFlag:"-g"`
 	SocketDir         string         `yaml:"-" cmdLongFlag:"--socket_dir" cmdShortFlag:"-d"`
 	LogMask           string         `yaml:"log_mask,omitempty" cmdEnv:"D_LOG_MASK"`
@@ -160,10 +160,29 @@ func (c *Config) ReadLogSubsystems() (string, error) {
 
 // Validate ensures that the configuration meets minimum standards.
 func (c *Config) Validate() error {
-	if c.PinnedNumaNode != nil && c.ServiceThreadCore != 0 {
+	if c.PinnedNumaNode != nil && c.ServiceThreadCore != nil && *c.ServiceThreadCore != 0 {
 		return errors.New("cannot specify both pinned_numa_node and first_core")
 	}
 
+	errNegative := func(s string) error {
+		return errors.Errorf("%s must not be negative", s)
+	}
+	if c.TargetCount < 0 {
+		return errNegative("target count")
+	}
+	if c.HelperStreamCount < 0 {
+		return errNegative("helper stream count")
+	}
+	if c.ServiceThreadCore != nil && *c.ServiceThreadCore < 0 {
+		return errNegative("service thread core index")
+	}
+	if c.MemSize < 0 {
+		return errNegative("mem size")
+	}
+	if c.HugepageSz < 0 {
+		return errNegative("hugepage size")
+	}
+
 	if c.TargetCount == 0 {
 		return errors.New("target count must be nonzero")
 	}
@@ -222,7 +241,7 @@ func IsNUMAMismatch(err error) bool {
 // SetNUMAAffinity sets the NUMA affinity for the engine,
 // if not already set in the configuration.
 func (c *Config) SetNUMAAffinity(node uint) error {
-	if c.PinnedNumaNode != nil && c.ServiceThreadCore != 0 {
+	if c.PinnedNumaNode != nil && c.ServiceThreadCore != nil && *c.ServiceThreadCore != 0 {
 		return errors.New("cannot set both NUMA node and service core")
 	}
 
@@ -464,7 +483,7 @@ func (c *Config) WithHelperStreamCount(count int) *Config {
 
 // WithServiceThreadCore sets the core index to be used for running DAOS service threads.
 func (c *Config) WithServiceThreadCore(idx int) *Config {
-	c.ServiceThreadCore = idx
+	c.ServiceThreadCore = &idx
 	return c
 }
 

diff --git a/src/control/server/instance.go b/src/control/server/instance.go
@@ -338,14 +338,6 @@ func (ei *EngineInstance) setHugepageSz(hpSizeMb int) {
 	ei.runner.GetConfig().HugepageSz = hpSizeMb
 }
 
-// setTargetCount updates target count in engine config.
-func (ei *EngineInstance) setTargetCount(numTargets int) {
-	ei.Lock()
-	defer ei.Unlock()
-
-	ei.runner.GetConfig().TargetCount = numTargets
-}
-
 // GetTargetCount returns the target count set for this instance.
 func (ei *EngineInstance) GetTargetCount() int {
 	ei.RLock()

diff --git a/src/control/server/instance_exec.go b/src/control/server/instance_exec.go
@@ -90,12 +90,6 @@ func (ei *EngineInstance) finishStartup(ctx context.Context, ready *srvpb.Notify
 	if err := ei.handleReady(ctx, ready); err != nil {
 		return err
 	}
-	// update engine target count to reflect allocated number of targets, not number requested
-	// when starting
-	// NOTE: Engine mem_size passed on engine invocation is based on the number of targets
-	//       requested in config so if number of targets allocated doesn't match the number of
-	//       targets requested the mem_size value may be inappropriate.
-	ei.setTargetCount(int(ready.GetNtgts()))
 
 	ei.ready.SetTrue()