Skip to content

Commit

Permalink
DAOS-14845 object: retry migration for retriable failure (#13590)
Browse files Browse the repository at this point in the history
To avoid retry rebuild and reclaim, let's retry rebuild
until further pool map changes, in that case, it should
fail the current rebuild, and further rebuild will resolve
the failure.

various fixs about rebuild if PS leader keeps changing
during rebuild.

Move migrate max ULT control to migrate_obj_iter_cb() to make
sure max ULT count will not exceed the setting.

Change the yield freq from 128 to 16 to make sure the object

Optimize migrate memory usage
- Add max ULT control for all targets on xstream, so
  the object being migrated can not exceed MIGRATE_MAX_ULT.

- Add each target max ULT control, so each target migrate
   ULT can not exceed MIGRATE_MAX_ULT/dss_tgt_nr.

-  Add migrate_cont_open to avoid dsc_cont_open and dsc_pool_open
   for each object and dkey migration.

Features: rebuild
Required-githooks: true

Signed-off-by: Di Wang <[email protected]>
  • Loading branch information
wangdi1 authored and liuxuezhao committed Mar 22, 2024
1 parent cb85a2c commit 3e1393d
Show file tree
Hide file tree
Showing 7 changed files with 392 additions and 184 deletions.
2 changes: 1 addition & 1 deletion src/include/daos/common.h
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
/**
* (C) Copyright 2015-2023 Intel Corporation.
* (C) Copyright 2015-2024 Intel Corporation.
*
* SPDX-License-Identifier: BSD-2-Clause-Patent
*/
Expand Down
31 changes: 19 additions & 12 deletions src/object/srv_internal.h
Original file line number Diff line number Diff line change
Expand Up @@ -40,9 +40,12 @@ struct migrate_pool_tls {
* should provide the pool/handle uuid
*/
uuid_t mpt_poh_uuid;
uuid_t mpt_coh_uuid;
daos_handle_t mpt_pool_hdl;

/* container handle list for the migrate pool */
uuid_t mpt_coh_uuid;
d_list_t mpt_cont_hdl_list;

/* Container/objects to be migrated will be attached to the tree */
daos_handle_t mpt_root_hdl;
struct btr_root mpt_root;
Expand All @@ -66,17 +69,15 @@ struct migrate_pool_tls {
/* Max epoch for the migration, used for migrate fetch RPC */
uint64_t mpt_max_eph;

/* The ULT number generated on the xstream */
uint64_t mpt_generated_ult;

/* The ULT number executed on the xstream */
uint64_t mpt_executed_ult;

/* The ULT number generated for object on the xstream */
uint64_t mpt_obj_generated_ult;
/* The ULT number on each target xstream, which actually refer
* back to the item within mpt_obj/dkey_ult_cnts array.
*/
ATOMIC uint32_t *mpt_tgt_obj_ult_cnt;
ATOMIC uint32_t *mpt_tgt_dkey_ult_cnt;

/* The ULT number executed on the xstream */
uint64_t mpt_obj_executed_ult;
/* ULT count array from all targets, obj: enumeration, dkey:fetch/update */
ATOMIC uint32_t *mpt_obj_ult_cnts;
ATOMIC uint32_t *mpt_dkey_ult_cnts;

/* reference count for the structure */
uint64_t mpt_refcount;
Expand All @@ -88,7 +89,7 @@ struct migrate_pool_tls {
uint64_t mpt_inflight_max_size;
ABT_cond mpt_inflight_cond;
ABT_mutex mpt_inflight_mutex;
int mpt_inflight_max_ult;
uint32_t mpt_inflight_max_ult;
uint32_t mpt_opc;

ABT_cond mpt_init_cond;
Expand All @@ -103,6 +104,12 @@ struct migrate_pool_tls {
mpt_fini:1;
};

struct migrate_cont_hdl {
uuid_t mch_uuid;
daos_handle_t mch_hdl;
d_list_t mch_list;
};

void
migrate_pool_tls_destroy(struct migrate_pool_tls *tls);

Expand Down
Loading

0 comments on commit 3e1393d

Please sign in to comment.