Skip to content

Commit 9dc9e85

Browse files
committed
UCT/IB: Hold rcache refcnt for UMR contig memh
1 parent d0de5ef commit 9dc9e85

File tree

4 files changed

+15
-15
lines changed

4 files changed

+15
-15
lines changed

src/ucp/core/ucp_request.c

+2-1
Original file line numberDiff line numberDiff line change
@@ -277,7 +277,8 @@ UCS_PROFILE_FUNC(ucs_status_t, ucp_request_memory_reg,
277277
status = ucp_mem_rereg_mds(context, UCS_BIT(md_idx),
278278
buffer + s->lb_displ,
279279
s->extent,
280-
UCT_MD_MEM_ACCESS_ALL, NULL,
280+
UCT_MD_MEM_ACCESS_ALL |
281+
UCT_MD_MEM_FLAG_NC_BASE, NULL,
281282
UCS_MEMORY_TYPE_HOST, NULL,
282283
state->dt.struct_dt.contig.memh,
283284
&state->dt.struct_dt.contig.md_map);

src/uct/api/uct.h

+1
Original file line numberDiff line numberDiff line change
@@ -664,6 +664,7 @@ enum uct_md_mem_flags {
664664
UCT_MD_MEM_ACCESS_REMOTE_GET = UCS_BIT(6), /**< enable remote get access */
665665
UCT_MD_MEM_ACCESS_REMOTE_ATOMIC = UCS_BIT(7), /**< enable remote atomic access */
666666
UCT_MD_MEM_FLAG_EMPTY = UCS_BIT(8), /**< Create empty handle (for UMR) */
667+
UCT_MD_MEM_FLAG_NC_BASE = UCS_BIT(9), /**< Used by UMR */
667668

668669
/** enable local and remote access for all operations */
669670
UCT_MD_MEM_ACCESS_ALL = (UCT_MD_MEM_ACCESS_REMOTE_PUT|

src/uct/ib/base/ib_md.c

+6-14
Original file line numberDiff line numberDiff line change
@@ -896,20 +896,6 @@ static ucs_status_t uct_ib_mem_rcache_reg(uct_md_h uct_md, void *address,
896896
ucs_status_t status;
897897
uct_ib_mem_t *memh;
898898

899-
char *ptr = getenv("PMIX_RANK");
900-
if(!strcmp(ptr, "0")){
901-
static int count = 0;
902-
903-
printf("ALLOC: addr=%p, size=%zu, count=%d\n", address, length, count++);
904-
fflush(stdout);
905-
#if 1
906-
static int delay = 1;
907-
while( ((count - 1) == 16) && delay ) {
908-
sleep(1);
909-
}
910-
#endif
911-
}
912-
913899
status = ucs_rcache_get(md->rcache, address, length, PROT_READ|PROT_WRITE,
914900
&flags, &rregion);
915901
if (status != UCS_OK) {
@@ -925,6 +911,12 @@ if(!strcmp(ptr, "0")){
925911
if (flags & UCT_MD_MEM_ACCESS_REMOTE_ATOMIC) {
926912
memh->flags |= UCT_IB_MEM_ACCESS_REMOTE_ATOMIC;
927913
}
914+
915+
if (flags & UCT_MD_MEM_FLAG_NC_BASE) {
916+
/* This region is used by UMR */
917+
ucs_rcache_region_hold(md->rcache, rregion);
918+
}
919+
928920
*memh_p = memh;
929921
return UCS_OK;
930922
}

src/uct/ib/mlx5/exp/ib_exp_md.c

+6
Original file line numberDiff line numberDiff line change
@@ -31,9 +31,11 @@ typedef struct uct_ib_mlx5_mem {
3131
} uct_ib_mlx5_mem_t;
3232

3333
struct uct_ib_umr {
34+
uct_ib_mlx5_md_t *md;
3435
unsigned depth;
3536
int is_inline;
3637
uct_ib_mlx5_mem_t memh; /* memh for indirect mr*/
38+
uct_ib_mlx5_mem_t *contig_memh;
3739
struct ibv_exp_send_wr wr;
3840
size_t repeat_count; /* 0 is not allowed; if 1 it is UMR
3941
list, otherwise repeated block */
@@ -381,11 +383,13 @@ uct_ib_mlx5_exp_umr_alloc(uct_ib_mlx5_md_t *md, const uct_iov_t *iov,
381383
}
382384
memset(&umr->wr, 0, sizeof(umr->wr));
383385

386+
umr->md = md;
384387
umr->repeat_count = repeat_count;
385388
umr->depth = umr_depth;
386389
umr->iov_count = iov_count;
387390
umr->comp.count = 1; /* for async reg */
388391
umr->memh.umr = umr;
392+
umr->contig_memh = ucs_derived_of(iov->memh, uct_ib_mlx5_mem_t); /* assume all iovs use the same memh for now */
389393

390394
if (repeat_count == 1) { /* MRs list */
391395
status = uct_ib_mlx5_exp_umr_fill_region(umr, iov, iov_count);
@@ -552,6 +556,8 @@ uct_ib_mlx5_exp_umr_deregister(uct_ib_mem_t *memh, struct ibv_qp *qp,
552556
}
553557
}
554558

559+
umr->md->super.super.ops->mem_dereg(&umr->md->super.super, &umr->contig_memh->super);
560+
555561
ucs_free(umr);
556562

557563
return UCS_OK;

0 commit comments

Comments
 (0)