Skip to content

Commit 4d87862

Browse files
authored
chore: add metrics to track per shard inflight sliver recovery during shard sync (#3064)
add metrics to track per shard inflight sliver recovery during shard sync
1 parent 3837623 commit 4d87862

2 files changed

Lines changed: 19 additions & 0 deletions

File tree

crates/walrus-service/src/node/metrics.rs

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -71,6 +71,9 @@ walrus_utils::metrics::define_metric_set! {
7171
#[help = "Total number of slivers pending recovery during shard sync"]
7272
sync_shard_recover_sliver_pending_total: IntGaugeVec["shard"],
7373

74+
#[help = "Number of inflight sliver recovery tasks during shard sync"]
75+
sync_shard_recover_sliver_inflight: IntGaugeVec["shard"],
76+
7477
#[help = "Total number of slivers started recovery during shard sync"]
7578
sync_shard_recover_sliver_total: IntCounterVec["shard", "sliver_type"],
7679

crates/walrus-service/src/node/storage/shard.rs

Lines changed: 16 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1405,6 +1405,10 @@ impl ShardStorage {
14051405
epoch: Epoch,
14061406
) -> Result<(), SyncShardClientError> {
14071407
let mut futures = FuturesUnordered::new();
1408+
let inflight_gauge = walrus_utils::with_label!(
1409+
node.metrics.sync_shard_recover_sliver_inflight,
1410+
&self.id.to_string()
1411+
);
14081412

14091413
// Update the metric for the total number of blobs pending recovery, so that we know how
14101414
// many blobs are pending recovery.
@@ -1444,6 +1448,10 @@ impl ShardStorage {
14441448
self.skip_recover_blob(blob_id, sliver_type, &node, "not_certified")?;
14451449
} else {
14461450
futures.push(self.recover_blob(blob_id, sliver_type, node.clone(), epoch));
1451+
inflight_gauge.set(
1452+
i64::try_from(futures.len())
1453+
.expect("number of inflight recoveries should fit into an i64"),
1454+
);
14471455
}
14481456

14491457
total_blobs_pending_recovery -= 1;
@@ -1453,11 +1461,19 @@ impl ShardStorage {
14531461
if futures.len() >= config.max_concurrent_blob_recovery_during_shard_recovery
14541462
&& let Some(result) = futures.next().await
14551463
{
1464+
inflight_gauge.set(
1465+
i64::try_from(futures.len())
1466+
.expect("number of inflight recoveries should fit into an i64"),
1467+
);
14561468
maybe_log_error(result);
14571469
}
14581470
}
14591471

14601472
while let Some(result) = futures.next().await {
1473+
inflight_gauge.set(
1474+
i64::try_from(futures.len())
1475+
.expect("number of inflight recoveries should fit into an i64"),
1476+
);
14611477
maybe_log_error(result);
14621478
}
14631479

0 commit comments

Comments
 (0)