From 08604707eadba8286f6d6c9dc0fed3ce65f0f168 Mon Sep 17 00:00:00 2001 From: Wang Shilong Date: Fri, 6 Mar 2026 16:26:11 +0800 Subject: [PATCH 1/2] DAOS-18633 rebuild: abort orphaned reclaim rpt after PS leader switch After PS leader switch, ds_rebuild_regenerate_task() only regenerates rebuild tasks for DOWN/DRAIN/UP targets. RECLAIM tasks are not regenerated because reintegrated targets are already UPIN. This leaves orphaned rpt on every target with a stale leader term, whose IV updates are silently dropped by the new leader (no matching rgt). The result is sp_rebuilding > 0 permanently, blocking EC aggregation and causing system-wide performance degradation. Fix: detect stale leader term in rebuild_tgt_status_check_ult() and abort the orphaned rpt. Signed-off-by: Wang Shilong --- src/rebuild/rebuild_iv.c | 9 +++++++-- src/rebuild/srv.c | 36 +++++++++++++++++++++++++++++++++--- 2 files changed, 40 insertions(+), 5 deletions(-) diff --git a/src/rebuild/rebuild_iv.c b/src/rebuild/rebuild_iv.c index afb85154615..5754cab8fe8 100644 --- a/src/rebuild/rebuild_iv.c +++ b/src/rebuild/rebuild_iv.c @@ -1,6 +1,6 @@ /** * (C) Copyright 2017-2024 Intel Corporation. - * (C) Copyright 2025 Hewlett Packard Enterprise Development LP + * (C) Copyright 2025-2026 Hewlett Packard Enterprise Development LP * * SPDX-License-Identifier: BSD-2-Clause-Patent */ @@ -129,8 +129,13 @@ rebuild_iv_ent_update(struct ds_iv_entry *entry, struct ds_iv_key *key, /* Gathering the rebuild status here */ rgt = rebuild_global_pool_tracker_lookup(src_iv->riv_pool_uuid, src_iv->riv_ver, src_iv->riv_rebuild_gen); - if (rgt == NULL) + if (rgt == NULL) { + D_WARN(DF_UUID " rgt not found ver %d gen %u from rank %d term " DF_U64 + " on rank %d, possibly stale IV after PS leader switch\n", + DP_UUID(src_iv->riv_pool_uuid), src_iv->riv_ver, src_iv->riv_rebuild_gen, + src_iv->riv_rank, src_iv->riv_leader_term, rank); D_GOTO(out, rc); + } if (rgt->rgt_leader_term == src_iv->riv_leader_term) { /* update the rebuild global status */ diff --git a/src/rebuild/srv.c b/src/rebuild/srv.c index 91185bb5cc3..d5262884eca 100644 --- a/src/rebuild/srv.c +++ b/src/rebuild/srv.c @@ -87,6 +87,38 @@ rpt_stale(struct rebuild_tgt_pool_tracker *rpt) return !found; } +enum { + RPT_ABORT_NONE = 0, + RPT_ABORT_ORPHANED_RECLAIM, + RPT_ABORT_GENERAL_STALE, +}; + +static int +rpt_should_abort(struct rebuild_tgt_pool_tracker *rpt, struct ds_iv_ns *ns, struct rebuild_iv *iv) +{ + /* Abort orphaned rpt whose leader is gone. After PS leader switch, + * reclaim tasks are not regenerated (UPIN not in DOWN/UP/DRAIN), + * so this rpt has no matching rgt on the new leader and IV updates + * are silently dropped. + */ + if (rpt->rt_leader_term < ns->iv_master_term && rpt->rt_scan_done && + (rpt->rt_rebuild_op == RB_OP_FAIL_RECLAIM || rpt->rt_rebuild_op == RB_OP_RECLAIM)) { + D_ERROR(DF_UUID " ver %d gen %u op %s: stale term " DF_U64 " < " DF_U64 + ", abort orphaned rpt\n", + DP_UUID(rpt->rt_pool_uuid), rpt->rt_rebuild_ver, rpt->rt_rebuild_gen, + RB_OP_STR(rpt->rt_rebuild_op), rpt->rt_leader_term, ns->iv_master_term); + + return RPT_ABORT_ORPHANED_RECLAIM; + } + + if (iv->riv_pull_done && rpt_stale(rpt)) { + D_ERROR(DF_RB " is stale, exit the ULT.\n", DP_RB_RPT(rpt)); + return RPT_ABORT_GENERAL_STALE; + } + + return RPT_ABORT_NONE; +} + struct rebuild_pool_tls * rebuild_pool_tls_lookup(uuid_t pool_uuid, unsigned int ver, uint32_t gen) { @@ -3030,10 +3062,8 @@ rebuild_tgt_status_check_ult(void *arg) break; sched_req_sleep(rpt->rt_ult, RBLD_CHECK_INTV); - if (iv.riv_pull_done && rpt_stale(rpt)) { - D_ERROR(DF_RB " is stale, exit the ULT.\n", DP_RB_RPT(rpt)); + if (rpt_should_abort(rpt, rpt->rt_pool->sp_iv_ns, &iv) != RPT_ABORT_NONE) break; - } } sched_req_put(rpt->rt_ult); From b099a4a6f8a4f610f43fccf28bcb57a9ed2785c6 Mon Sep 17 00:00:00 2001 From: Wang Shilong Date: Fri, 6 Mar 2026 14:54:04 +0000 Subject: [PATCH 2/2] abort leader ult Allow-unstable-test: true Features: rebuild Signed-off-by: Wang Shilong --- src/rebuild/srv.c | 12 ++++++++++++ 1 file changed, 12 insertions(+) diff --git a/src/rebuild/srv.c b/src/rebuild/srv.c index d5262884eca..14f27ac5a5e 100644 --- a/src/rebuild/srv.c +++ b/src/rebuild/srv.c @@ -1114,6 +1114,18 @@ rebuild_leader_status_check(struct ds_pool *pool, uint32_t op, ABT_rwlock_unlock(pool->sp_lock); map_ranks_fini(&rank_list); + /* Abort orphaned rgt if the node is no longer the leader. + * After PS leader switch, this rgt becomes orphaned and should be aborted. + */ + if (rgt->rgt_leader_term < pool->sp_iv_ns->iv_master_term && + (rgt->rgt_opc == RB_OP_FAIL_RECLAIM || rgt->rgt_opc == RB_OP_RECLAIM)) { + D_INFO(DF_RB " op %s: stale term " DF_U64 " < " DF_U64 + ", abort orphaned rgt\n", + DP_RB_RGT(rgt), RB_OP_STR(rgt->rgt_opc), rgt->rgt_leader_term, + pool->sp_iv_ns->iv_master_term); + rebuild_abort = true; + } + if (rebuild_abort) { rgt->rgt_abort = 1; rgt->rgt_status.rs_errno = -DER_STALE;