-
Notifications
You must be signed in to change notification settings - Fork 310
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
DAOS-13380 engine: refine tgt_nr check #12405
Changes from 1 commit
c5ce565
de91aa4
dbc6585
8f5088a
3a2037e
3256230
0af5fc8
5621d09
2b7c10f
3d3ac8c
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -36,8 +36,7 @@ | |
static char modules[MAX_MODULE_OPTIONS + 1]; | ||
|
||
/** | ||
* Number of target threads the user would like to start | ||
* 0 means default value, see dss_tgt_nr_get(); | ||
* Number of target threads the user would like to start. | ||
*/ | ||
static unsigned int nr_threads; | ||
|
||
|
@@ -246,56 +245,61 @@ modules_load(void) | |
return rc; | ||
} | ||
|
||
static unsigned int | ||
ncores_needed(unsigned int tgt_nr, unsigned int nr_helpers) | ||
{ | ||
return DAOS_TGT0_OFFSET + tgt_nr + nr_helpers; | ||
} | ||
|
||
/** | ||
* Get the appropriate number of main XS based on the number of cores and | ||
* passed in preferred number of threads. | ||
* Check if the #targets and #nr_xs_helpers is valid to start server, the #nr_xs_helpers possibly | ||
* be reduced. | ||
*/ | ||
static int | ||
dss_tgt_nr_get(unsigned int ncores, unsigned int nr, bool oversubscribe) | ||
dss_tgt_nr_check(unsigned int ncores, unsigned int tgt_nr, bool oversubscribe) | ||
{ | ||
int tgt_nr; | ||
|
||
D_ASSERT(ncores >= 1); | ||
|
||
/* at most 2 helper XS per target */ | ||
if (dss_tgt_offload_xs_nr > 2 * nr) | ||
dss_tgt_offload_xs_nr = 2 * nr; | ||
else if (dss_tgt_offload_xs_nr == 0) | ||
if (dss_tgt_offload_xs_nr > 2 * tgt_nr) { | ||
D_PRINT("#nr_xs_helpers(%d) cannot exceed 2 times #targets (2 x %d = %d).\n", | ||
dss_tgt_offload_xs_nr, tgt_nr, 2 * tgt_nr); | ||
dss_tgt_offload_xs_nr = 2 * tgt_nr; | ||
} else if (dss_tgt_offload_xs_nr == 0) { | ||
D_WARN("Suggest to config at least 1 helper XS per DAOS engine\n"); | ||
} | ||
|
||
/* Each system XS uses one core, and with dss_tgt_offload_xs_nr | ||
* offload XS. Calculate the tgt_nr as the number of main XS based | ||
* on number of cores. | ||
*/ | ||
retry: | ||
tgt_nr = ncores - DAOS_TGT0_OFFSET - dss_tgt_offload_xs_nr; | ||
if (tgt_nr <= 0) | ||
tgt_nr = 1; | ||
|
||
/* If user requires less target threads then set it as dss_tgt_nr, | ||
* if user oversubscribes, then: | ||
* . if oversubscribe is enabled, use the required number | ||
* . if oversubscribe is disabled(default), | ||
* use the number calculated above | ||
* Note: oversubscribing may hurt performance. | ||
*/ | ||
if (nr >= 1 && ((nr < tgt_nr) || oversubscribe)) { | ||
tgt_nr = nr; | ||
if (dss_tgt_offload_xs_nr > 2 * tgt_nr) | ||
dss_tgt_offload_xs_nr = 2 * tgt_nr; | ||
} else if (dss_tgt_offload_xs_nr > 2 * tgt_nr) { | ||
dss_tgt_offload_xs_nr--; | ||
goto retry; | ||
if (oversubscribe) { | ||
if (ncores_needed(tgt_nr, dss_tgt_offload_xs_nr) > ncores) { | ||
if (ncores > DAOS_TGT0_OFFSET + tgt_nr) | ||
dss_tgt_offload_xs_nr = ncores - DAOS_TGT0_OFFSET - tgt_nr; | ||
else | ||
dss_tgt_offload_xs_nr = 0; | ||
|
||
D_PRINT("Force to start engine with %d targets on %d cores, #nr_xs_helpers " | ||
"set as %d.\n", | ||
tgt_nr, ncores, dss_tgt_offload_xs_nr); | ||
} | ||
goto out; | ||
} | ||
|
||
if (tgt_nr != nr) | ||
D_PRINT("%d target XS(xstream) requested (#cores %d); " | ||
"use (%d) target XS\n", nr, ncores, tgt_nr); | ||
if (ncores_needed(tgt_nr, dss_tgt_offload_xs_nr) > ncores) { | ||
if (ncores < DAOS_TGT0_OFFSET + tgt_nr) { | ||
D_ERROR("cannot start engine with %d targets on %d cores, may try with " | ||
"DAOS_TARGET_OVERSUBSCRIBE=1\n", | ||
tgt_nr, ncores); | ||
return -DER_INVAL; | ||
} | ||
dss_tgt_offload_xs_nr = ncores - DAOS_TGT0_OFFSET - tgt_nr; | ||
D_PRINT("Start engine with %d targets on %d cores, #nr_xs_helpers set as %d.\n", | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. The message should also indicate that the number of helpers have been forced. I was expecting that updating this value could only be done if |
||
tgt_nr, ncores, dss_tgt_offload_xs_nr); | ||
} | ||
|
||
out: | ||
if (dss_tgt_offload_xs_nr % tgt_nr != 0) | ||
dss_helper_pool = true; | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. If there is some performance impact, it could be useful to indicate that the helper threads will be shared. There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. whether or not helper XS is shared by targets is based on the #helpers and #targets. for example if with 8 tgts and 4 helpers, it will be shared. There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. In fact, I was just wondering that it could be useful for the end user and the support to know if the helper XS is shared or not. |
||
|
||
return tgt_nr; | ||
return 0; | ||
} | ||
|
||
static int | ||
|
@@ -317,22 +321,21 @@ dss_topo_init() | |
depth = hwloc_get_type_depth(dss_topo, HWLOC_OBJ_NUMANODE); | ||
numa_node_nr = hwloc_get_nbobjs_by_depth(dss_topo, depth); | ||
d_getenv_bool("DAOS_TARGET_OVERSUBSCRIBE", &tgt_oversub); | ||
dss_tgt_nr = nr_threads; | ||
|
||
/* if no NUMA node was specified, or NUMA data unavailable */ | ||
/* fall back to the legacy core allocation algorithm */ | ||
if (dss_numa_node == -1 || numa_node_nr <= 0) { | ||
D_PRINT("Using legacy core allocation algorithm\n"); | ||
dss_tgt_nr = dss_tgt_nr_get(dss_core_nr, nr_threads, | ||
tgt_oversub); | ||
|
||
if (dss_core_offset >= dss_core_nr) { | ||
D_ERROR("invalid dss_core_offset %u " | ||
"(set by \"-f\" option)," | ||
" should within range [0, %u]", | ||
dss_core_offset, dss_core_nr - 1); | ||
return -DER_INVAL; | ||
} | ||
return 0; | ||
|
||
return dss_tgt_nr_check(dss_core_nr, dss_tgt_nr, tgt_oversub); | ||
} | ||
|
||
if (dss_numa_node > numa_node_nr) { | ||
|
@@ -377,17 +380,15 @@ dss_topo_init() | |
hwloc_bitmap_asprintf(&cpuset, core_allocation_bitmap); | ||
free(cpuset); | ||
|
||
dss_tgt_nr = dss_tgt_nr_get(dss_num_cores_numa_node, nr_threads, | ||
tgt_oversub); | ||
if (dss_core_offset >= dss_num_cores_numa_node) { | ||
D_ERROR("invalid dss_core_offset %d (set by \"-f\" option), " | ||
"should within range [0, %d]", dss_core_offset, | ||
dss_num_cores_numa_node - 1); | ||
return -DER_INVAL; | ||
} | ||
|
||
D_PRINT("Using NUMA core allocation algorithm\n"); | ||
return 0; | ||
|
||
return dss_tgt_nr_check(dss_num_cores_numa_node, dss_tgt_nr, tgt_oversub); | ||
} | ||
|
||
static ABT_mutex server_init_state_mutex; | ||
|
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -6,6 +6,7 @@ server_config: | |
engines_per_host: 1 | ||
engines: | ||
0: | ||
targets: 4 | ||
storage: | ||
0: | ||
class: ram | ||
|
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -5,6 +5,7 @@ server_config: | |
engines_per_host: 1 | ||
engines: | ||
0: | ||
targets: 4 | ||
storage: | ||
0: | ||
class: ram | ||
|
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -32,7 +32,7 @@ server_config: | |
engines_per_host: 1 | ||
engines: | ||
0: | ||
targets: 5 | ||
targets: 4 | ||
nr_xs_helpers: 0 | ||
storage: | ||
0: | ||
|
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
I am not used to the engine code, but it seems a little bit odd that a check function change the value of the global variables
dss_tgt_offload_xs_nr
anddss_helper_pool
.There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Yeah, I'm wondering if we should treat nr_offload in the same of how we treat nr_target? I mean should we silently 'correct' user's invalid config or should we explicitly reject the invalid config? Should we apply 'oversubscribe' to nr_offload too?
@Michael-Hennecke , @tanabarr , any thoughts?
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
ah, just see this comment.
helper threads is not must needed to start daos engine, and start more #helpers than #cores could hurt performance, that is different with #targets, so I select to reduce #helpers if #cores is not enough but #cores are enough to start with #targets.
For example even if DAOS_TARGET_OVERSUBSCRIBE not specified, on a node with 16 cores, user configured with #target=12 and #helpers=12, I think the #target=12 is mandatory but #helpers=12 is not mandatory, so why not just start server with #targets=12 and #helpers=2 in this case? (another 2 cores used for system XS).
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
I understand the argument above but I would be favour of being explicit and refusing to start with #helpers=12 if there are not enough cores (assuming oversubscribe is not set). Printing explicit error that is easy to understand and forces the user to reduce #helpers and restart the server. This should make behaviour easier to understand and avoid the situation where the user is running an engine unaware that the number of actual helper XSs isn't what they requested in the config.
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
I tend to think that the "nr_xs_helpers" is not a mandatory requirement, that is different with "targets". Imagine the case that #cores is not enough for helpers but #cores are enough to start with #targets, if fails the engine starting it just add some trouble for user to change yaml file and restart. For a system with different nodes that with different #cores, user have to provide different yml files for different node.
we may need to change document (admin/deployment.md) to explain the "nr_xs_helpers" is not a mandatory requirement and possibly be reduced if the hardware core resource is not enough.
How do you think? @tanabarr @Michael-Hennecke
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
@liuxuezhao sorry for delay in response, I'm on leave. I think that explanation makes sense to me so let's leave it as it is for the moment with the loose requirement special case for
nr_xs_helpers
. We should get this PR landed.There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Fair enough for me.
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Hi no problem at all, I just refreshed it to add a few other small changes.