Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

feature/multiprovider #8427

Draft
wants to merge 154 commits into
base: master
Choose a base branch
from
Draft
Show file tree
Hide file tree
Changes from 11 commits
Commits
Show all changes
154 commits
Select commit Hold shift + click to select a range
cacd732
DAOS-9623 control: Enable multi-provider in server (#8396)
kjacque Mar 9, 2022
0394f24
Merge branch 'master' into feature/multiprovider
kjacque Mar 9, 2022
06dc8a9
Merge branch 'master' into feature/multiprovider
kjacque Mar 11, 2022
7ddb133
DAOS-9623 agent: Support configurable provider (#8452)
kjacque Mar 15, 2022
1d421ab
Merge branch 'master' into feature/multiprovider
kjacque Mar 15, 2022
7d8861c
Merge branch 'master' into feature/multiprovider
kjacque Mar 18, 2022
ffaa379
DAOS-9928 srv: xstreams for secondary cart contexts (#8413)
NiuYawei Mar 22, 2022
839fbff
Merge branch 'master' into feature/multiprovider
kjacque Mar 22, 2022
1f1d0d3
Merge branch 'master' into feature/multiprovider
kjacque Mar 23, 2022
802e223
Merge branch 'master' into feature/multiprovider
kjacque Mar 30, 2022
c647c56
DAOS-9623 control: Use comma separator for providers (#8578)
kjacque Apr 1, 2022
2c4e7eb
Merge branch 'master' into feature/multiprovider
kjacque Apr 1, 2022
2cbaa40
DAOS-9623 control: Add secondary context count to config (#8516)
kjacque Apr 4, 2022
54593fa
Merge branch 'master' into feature/multiprovider
kjacque Apr 4, 2022
89b906e
DAOS-9623 control: Check multiprovider interfaces (#8580)
kjacque Apr 4, 2022
4e77479
DAOS-9623 agent: Let client specify desired fabric iface (#8619)
kjacque Apr 6, 2022
997ad70
Merge branch 'master' into feature/multiprovider
kjacque Apr 7, 2022
cf1f081
Merge branch 'master' into feature/multiprovider
kjacque Apr 19, 2022
42f9819
Merge branch 'master' into feature/multiprovider
kjacque Apr 26, 2022
e0bb96e
Merge branch 'master' into feature/multiprovider
kjacque Apr 26, 2022
26b6d06
Merge branch 'master' into feature/multiprovider
kjacque May 3, 2022
edc2b76
Merge branch 'master' into feature/multiprovider
kjacque May 11, 2022
fd04661
Merge branch 'master' into feature/multiprovider
kjacque May 17, 2022
514121a
Merge branch 'master' into feature/multiprovider
kjacque May 18, 2022
569c3aa
Merge branch 'master' into feature/multiprovider
kjacque May 19, 2022
984d20b
Merge branch 'master' into feature/multiprovider
kjacque May 24, 2022
5e6311f
Merge branch 'master' into feature/multiprovider
kjacque Jun 3, 2022
b98906a
CART-89 cart: Cart multiprov changes (#8952)
frostedcmos Jun 6, 2022
02bd010
Merge branch 'master' into feature/multiprovider
kjacque Jun 6, 2022
fb77cc6
Merge branch 'master' into feature/multiprovider
kjacque Jun 9, 2022
610dc4e
Merge branch 'master' into feature/multiprovider
kjacque Jun 10, 2022
fb30733
Merge branch 'master' into feature/multiprovider
kjacque Jun 13, 2022
e4a7ce0
Merge branch 'master' into feature/multiprovider
kjacque Jun 14, 2022
b9ed282
DAOS-9928 object: RPC & bulk hanlder for secondary provider (#8974)
NiuYawei Jun 14, 2022
b44dcd1
Merge branch 'master' into feature/multiprovider
kjacque Jun 17, 2022
e9e998f
DAOS-10876: Fix merges from master (#9411)
kjacque Jun 21, 2022
9483a33
CART-89 multiprovider: Add new api to query original src provider (#9…
frostedcmos Jun 23, 2022
dcf5e99
Merge branch 'master' into feature/multiprovider
kjacque Jun 23, 2022
ceb116a
CART-89 multiprovider: Fix issues, add new api (#9485)
frostedcmos Jun 23, 2022
b068f36
DAOS-9928 engine: incorporate new Cart API (#9484)
NiuYawei Jun 27, 2022
954cee8
Merge branch 'master' into feature/multiprovider
kjacque Jun 27, 2022
3fe5b69
CART-89 multi_prov: new api, fixes (#9514)
frostedcmos Jun 29, 2022
5aacc13
Merge branch 'master' into feature/multiprovider
kjacque Jul 6, 2022
10833df
DAOS-10897 control: Add plumbing for secondary URIs (#9483)
kjacque Jul 7, 2022
7fcd5be
CART-89 bug: Fix provider settings (#9644)
frostedcmos Jul 8, 2022
5cda9a7
DAOS-11027 control: Set client secondary provider env (#9559)
kjacque Jul 11, 2022
352712b
Merge branch 'master' into feature/multiprovider
kjacque Jul 11, 2022
635c0ac
DAOS-11038 control: Switch to using provider_idx (#9648)
kjacque Jul 13, 2022
51dbe49
CART-89 multiprov: Auto-tag replacement for secondary providers (#9519)
frostedcmos Jul 15, 2022
12623b7
Merge branch 'master' into feature/multiprovider
kjacque Jul 18, 2022
0404051
Merge branch 'master' into feature/multiprovider
kjacque Jul 22, 2022
f9830cd
Merge branch 'master' into feature/multiprovider
kjacque Aug 1, 2022
388d8ae
Merge branch 'master' into feature/multiprovider
kjacque Aug 8, 2022
b5ea81f
Merge branch 'master' into feature/multiprovider
kjacque Aug 15, 2022
b5bf0df
Merge branch 'master' into feature/multiprovider
kjacque Aug 19, 2022
a1ae454
Merge branch 'master' into feature/multiprovider
kjacque Aug 22, 2022
8d4289a
Merge branch 'master' into feature/multiprovider
kjacque Aug 29, 2022
896a6c2
Merge branch 'master' into feature/multiprovider
kjacque Sep 2, 2022
3dddc64
Merge branch 'master' into feature/multiprovider
kjacque Sep 6, 2022
85f385c
Merge branch 'master' into feature/multiprovider
kjacque Sep 12, 2022
cbd4cf1
Merge branch 'master' into feature/multiprovider
kjacque Sep 19, 2022
d18cb5b
Merge branch 'master' into feature/multiprovider
kjacque Sep 26, 2022
a463f5c
Merge branch 'master' into feature/multiprovider
kjacque Oct 4, 2022
2ae3f0f
Merge branch 'master' into feature/multiprovider
kjacque Oct 11, 2022
20e9fa7
DAOS-11884 cart: Add new API to set a number of remote endpoints (#1…
frostedcmos Oct 13, 2022
780d65b
DAOS-11893 control: Include num ctxs in GetAttachInfo (#10544)
kjacque Oct 14, 2022
854773e
Merge branch 'master' into feature/multiprovider
kjacque Oct 14, 2022
8155934
Merge branch 'master' into feature/multiprovider
kjacque Oct 18, 2022
843a825
Merge branch 'master' into feature/multiprovider
kjacque Oct 24, 2022
493565d
Merge branch 'master' into feature/multiprovider
kjacque Oct 28, 2022
791d795
Merge branch 'master' into feature/multiprovider
kjacque Nov 15, 2022
afaa547
DAOS-12108 cart: Fix segfault over secondary provider (#10860)
frostedcmos Nov 18, 2022
b6979b8
Merge branch 'master' into feature/multiprovider
kjacque Nov 29, 2022
8207c13
DAOS-12111 object: fix race in obj_bulk_inflights() (#10949)
NiuYawei Dec 3, 2022
5a3d955
Merge branch 'master' into feature/multiprovider
kjacque Dec 13, 2022
4f48c63
Merge branch 'master' into feature/multiprovider
kjacque Jan 3, 2023
b5862f4
Merge branch 'master' into feature/multiprovider
kjacque Jan 25, 2023
3080abe
Merge branch 'master' into feature/multiprovider
kjacque Feb 6, 2023
caaf087
Merge branch 'master' into feature/multiprovider
kjacque Feb 10, 2023
b059ab5
Merge branch 'master' into feature/multiprovider
kjacque Feb 23, 2023
8745e29
Merge branch 'master' into feature/multiprovider
kjacque Mar 7, 2023
f209fc8
Merge branch 'master' into feature/multiprovider
kjacque Mar 13, 2023
7361e0d
Merge branch 'master' into feature/multiprovider
kjacque Mar 21, 2023
1184fd6
Merge branch 'master' into feature/multiprovider
kjacque Mar 24, 2023
f06fe3d
Merge branch 'master' into feature/multiprovider
kjacque Mar 28, 2023
31abc38
Merge branch 'master' into feature/multiprovider
kjacque Apr 3, 2023
b9d0596
Merge branch 'master' into feature/multiprovider
kjacque Apr 4, 2023
0465847
Merge branch 'master' into feature/multiprovider
kjacque Apr 4, 2023
9a0a9c2
Merge branch 'master' into feature/multiprovider
kjacque Apr 10, 2023
3222b0a
Merge branch 'master' into feature/multiprovider
kjacque Apr 13, 2023
07e10f6
DAOS-13088 control: Fix provider query with multiprovider (#11936)
kjacque Apr 14, 2023
4ea2394
Merge branch 'master' into feature/multiprovider
kjacque Apr 17, 2023
0f2e046
DAOS-13134 pool: use primary context to do bcast (#11923)
NiuYawei Apr 18, 2023
fd2c538
Merge branch 'master' into feature/multiprovider
kjacque Apr 25, 2023
5f47460
Merge branch 'master' into feature/multiprovider
kjacque May 9, 2023
e2cc532
Merge branch 'master' into feature/multiprovider
kjacque May 16, 2023
153e5e7
Merge branch 'master' into feature/multiprovider
kjacque May 23, 2023
83f24dd
Merge branch 'master' into feature/multiprovider
kjacque May 30, 2023
9839c64
DAOS-13539 control: Fix network related commands for multiprovider (#…
kjacque Jun 2, 2023
adeecf7
Merge branch 'master' into feature/multiprovider
kjacque Jun 5, 2023
578aa37
DAOS-7029 control: Add refresh methods for agent cache (#12370)
kjacque Jun 14, 2023
b36367b
Merge branch 'master' into feature/multiprovider
kjacque Jun 14, 2023
4f3ccef
Merge branch 'master' into feature/multiprovider
kjacque Jun 21, 2023
607f682
Merge branch 'master' into feature/multiprovider
kjacque Jun 26, 2023
3c33a37
Merge branch 'master' into feature/multiprovider
kjacque Jul 6, 2023
cf20d32
Merge branch 'master' into feature/multiprovider
kjacque Jul 10, 2023
a0e4c9b
Merge branch 'master' into feature/multiprovider
kjacque Jul 18, 2023
4374c0c
Merge branch 'master' into feature/multiprovider
kjacque Aug 3, 2023
e8a1981
Merge branch 'master' into feature/multiprovider
kjacque Aug 11, 2023
300afc8
Merge branch 'master' into feature/multiprovider
kjacque Aug 23, 2023
04a8a6d
Merge branch 'master' into feature/multiprovider
kjacque Aug 28, 2023
9e1304f
Merge branch 'master' into feature/multiprovider
kjacque Sep 5, 2023
4483c73
Merge branch 'master' into feature/multiprovider
kjacque Sep 11, 2023
ab04444
Merge branch 'master' into feature/multiprovider
kjacque Sep 21, 2023
566d27a
Merge branch 'master' into feature/multiprovider
kjacque Sep 25, 2023
f41d396
Merge branch 'master' into feature/multiprovider
kjacque Oct 2, 2023
99cc48b
Merge branch 'master' into feature/multiprovider
kjacque Oct 16, 2023
705f624
Merge branch 'master' into feature/multiprovider
kjacque Oct 23, 2023
1388374
Merge branch 'master' into feature/multiprovider
kjacque Oct 31, 2023
6db03d2
Merge branch 'master' into niu/multiprovider-merge
NiuYawei Nov 15, 2023
513815c
Merge branch 'master' into feature/multiprovider
kjacque Dec 8, 2023
1095fe1
Merge branch 'master' into feature/multiprovider
kjacque Dec 12, 2023
06c70a3
Merge branch 'master' into feature/multiprovider
kjacque Dec 20, 2023
474adb0
Merge branch 'master' into feature/multiprovider
kjacque Jan 29, 2024
d271f17
Merge branch 'master' into feature/multiprovider
kjacque Feb 14, 2024
cea3585
Merge branch 'master' into feature/multiprovider
kjacque Mar 19, 2024
a90999e
Merge branch 'master' into feature/multiprovider
kjacque Mar 19, 2024
0a0807a
Fix src/rdb/raft reference
kjacque Mar 19, 2024
98dc057
Merge remote-tracking branch 'origin/master' into niu/multiprovider-m…
NiuYawei Mar 27, 2024
65e76b1
Merge branch 'master' into feature/multiprovider
kjacque Apr 8, 2024
6fe90ac
Merge branch 'master' into feature/multiprovider
kjacque Apr 16, 2024
3e294bc
Merge branch 'master' into feature/multiprovider
kjacque Apr 22, 2024
5cc67c4
Merge remote-tracking branch 'origin/master' into niu/multiprovider-m…
NiuYawei May 15, 2024
4d1db74
Merge pull request #14373 from daos-stack/niu/multiprovider-merge
kjacque May 16, 2024
80136ee
Merge branch 'master' into feature/multiprovider
kjacque May 20, 2024
a4da1e2
Merge branch 'master' into feature/multiprovider
kjacque May 30, 2024
9617c8f
Merge branch 'master' into feature/multiprovider
kjacque Jun 10, 2024
3b1d834
Merge branch 'master' into feature/multiprovider
kjacque Jun 17, 2024
cc9036d
Merge branch 'master' into feature/multiprovider
kjacque Jul 10, 2024
d778b9a
Merge branch 'master' into feature/multiprovider
kjacque Jul 10, 2024
c1bca41
Merge branch 'master' into feature/multiprovider
kjacque Jul 17, 2024
8417661
Merge branch 'master' into feature/multiprovider
kjacque Jul 24, 2024
2c50c30
Merge remote-tracking branch 'origin/master' into niu/multiprovider-m…
NiuYawei Aug 22, 2024
61392e6
Merge pull request #14981 from daos-stack/niu/multiprovider-merge
kjacque Aug 22, 2024
42b9717
Merge remote-tracking branch 'origin/master' into niu/multiprovider/m…
NiuYawei Sep 20, 2024
578d9d0
Merge pull request #15162 from daos-stack/niu/multiprovider/merge-0920
kjacque Sep 20, 2024
f9e4f82
Merge branch 'master' into feature/multiprovider
kjacque Sep 23, 2024
fc3bba5
Merge branch 'master' into feature/multiprovider
kjacque Oct 4, 2024
21c914d
Merge branch 'master' into feature/multiprovider
kjacque Oct 14, 2024
f3c05b4
Merge branch 'master' into feature/multiprovider
kjacque Oct 30, 2024
6f5e9bb
Merge branch 'master' into feature/multiprovider
kjacque Jan 10, 2025
b88b2e3
Merge branch 'master' into feature/multiprovider
kjacque Feb 19, 2025
d10025a
Merge branch 'master' into feature/multiprovider
kjacque Mar 4, 2025
540bb21
Merge branch 'master' into feature/multiprovider
kjacque Mar 10, 2025
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions src/control/cmd/daos_agent/config.go
Original file line number Diff line number Diff line change
Expand Up @@ -34,6 +34,7 @@ type Config struct {
LogLevel common.ControlLogLevel `yaml:"control_log_mask,omitempty"`
TransportConfig *security.TransportConfig `yaml:"transport_config"`
FabricInterfaces []*NUMAFabricConfig `yaml:"fabric_ifaces,omitempty"`
Provider string `yaml:"provider,omitempty"`
}

// NUMAFabricConfig defines a list of fabric interfaces that belong to a NUMA
Expand Down
2 changes: 2 additions & 0 deletions src/control/cmd/daos_agent/config_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -62,6 +62,7 @@ fabric_ifaces:
-
iface: ib3
domain: mlx5_3
provider: ofi+tcp
`)

badLogMaskCfg := common.CreateTestFile(t, dir, `
Expand Down Expand Up @@ -155,6 +156,7 @@ transport_config:
},
},
},
Provider: "ofi+tcp",
},
},
} {
Expand Down
42 changes: 40 additions & 2 deletions src/control/cmd/daos_agent/mgmt_rpc.go
Original file line number Diff line number Diff line change
Expand Up @@ -35,6 +35,7 @@ type mgmtModule struct {
monitor *procMon
useDefaultNUMA bool
numaGetter hardware.ProcessNUMAProvider
provider string
}

func (mod *mgmtModule) HandleCall(ctx context.Context, session *drpc.Session, method drpc.Method, req []byte) ([]byte, error) {
Expand Down Expand Up @@ -143,13 +144,19 @@ func (mod *mgmtModule) getNUMANode(ctx context.Context, pid int32) (uint, error)
}

func (mod *mgmtModule) getAttachInfo(ctx context.Context, numaNode int, sys string) (*mgmtpb.GetAttachInfoResp, error) {
resp, err := mod.getAttachInfoResp(ctx, numaNode, sys)
rawResp, err := mod.getAttachInfoResp(ctx, numaNode, sys)
if err != nil {
mod.log.Errorf("failed to fetch remote AttachInfo: %s", err.Error())
return nil, err
}

fabricIF, err := mod.getFabricInterface(ctx, numaNode, hardware.NetDevClass(resp.ClientNetHint.NetDevClass), resp.ClientNetHint.Provider)
resp, err := mod.getProviderAttachInfo(rawResp)
if err != nil {
return nil, err
}

fabricIF, err := mod.getFabricInterface(ctx, numaNode, hardware.NetDevClass(resp.ClientNetHint.NetDevClass),
resp.ClientNetHint.Provider)
if err != nil {
mod.log.Errorf("failed to fetch fabric interface of type %s: %s",
hardware.NetDevClass(resp.ClientNetHint.NetDevClass), err.Error())
Expand All @@ -171,6 +178,37 @@ func (mod *mgmtModule) getAttachInfoResp(ctx context.Context, numaNode int, sys
return mod.attachInfo.Get(ctx, numaNode, sys, mod.getAttachInfoRemote)
}

func (mod *mgmtModule) getProviderAttachInfo(srvResp *mgmtpb.GetAttachInfoResp) (*mgmtpb.GetAttachInfoResp, error) {
if mod.provider == "" || mod.provider == srvResp.ClientNetHint.Provider {
return srvResp, nil
}

uris := []*mgmtpb.GetAttachInfoResp_RankUri{}
for _, uri := range srvResp.SecondaryRankUris {
if uri.Provider == mod.provider {
uris = append(uris, uri)
}
}

if len(uris) == 0 {
return nil, errors.Errorf("no rank URIs for provider %q", mod.provider)
}

for _, hint := range srvResp.SecondaryClientNetHints {
if hint.Provider == mod.provider {

return &mgmtpb.GetAttachInfoResp{
Status: srvResp.Status,
RankUris: uris,
MsRanks: srvResp.MsRanks,
ClientNetHint: hint,
}, nil
}
}

return nil, errors.Errorf("no ClientNetHint for provider %q", mod.provider)
}

func (mod *mgmtModule) getAttachInfoRemote(ctx context.Context, numaNode int, sys string) (*mgmtpb.GetAttachInfoResp, error) {
// Ask the MS for _all_ info, regardless of pbReq.AllRanks, so that the
// cache can serve future "pbReq.AllRanks == true" requests.
Expand Down
Loading