-
Notifications
You must be signed in to change notification settings - Fork 186
fix: optimize tipset_by_height performance with known_blocks.yaml
#6753
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
base: main
Are you sure you want to change the base?
Changes from all commits
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
| Original file line number | Diff line number | Diff line change |
|---|---|---|
| @@ -0,0 +1,93 @@ | ||
| // Copyright 2019-2026 ChainSafe Systems | ||
| // SPDX-License-Identifier: Apache-2.0, MIT | ||
|
|
||
| //! Provides utilities for efficiently locating the genesis block and known checkpoints | ||
| //! in the Filecoin blockchain by leveraging a list of precomputed, hash-chained block CIDs. | ||
| //! This avoids scanning millions of epochs, significantly speeding up chain traversal. | ||
|
|
||
| use crate::{ | ||
| blocks::{CachingBlockHeader, Tipset}, | ||
| networks::NetworkChain, | ||
| shim::clock::ChainEpoch, | ||
| }; | ||
| use ahash::HashMap; | ||
| use anyhow::Context as _; | ||
| use cid::Cid; | ||
| use fvm_ipld_blockstore::Blockstore; | ||
| use itertools::Itertools; | ||
| use serde::{Deserialize, Serialize}; | ||
| use serde_with::{DisplayFromStr, serde_as}; | ||
| use std::sync::{LazyLock, OnceLock}; | ||
|
|
||
| /// Holds mappings from chain epochs to block CIDs for each network. | ||
| #[serde_as] | ||
| #[derive(Serialize, Deserialize)] | ||
| pub struct KnownBlocks { | ||
| #[serde_as(as = "HashMap<_, DisplayFromStr>")] | ||
| pub calibnet: HashMap<ChainEpoch, Cid>, | ||
| #[serde_as(as = "HashMap<_, DisplayFromStr>")] | ||
| pub mainnet: HashMap<ChainEpoch, Cid>, | ||
| } | ||
|
|
||
| /// Lazily loaded static instance of `KnownBlocks` from YAML. | ||
| /// Caches (`OnceLock`) are used to avoid recomputing known tipsets. | ||
| pub static KNOWN_BLOCKS: LazyLock<KnownBlocks> = LazyLock::new(|| { | ||
| serde_yaml::from_str(include_str!("../../build/known_blocks.yaml")).expect("infallible") | ||
| }); | ||
|
|
||
| /// Returns a cached, ascending-epoch list of known [`Tipset`]s for the given network. | ||
| pub fn known_tipsets( | ||
| bs: &impl Blockstore, | ||
| network: &NetworkChain, | ||
| ) -> anyhow::Result<&'static Vec<Tipset>> { | ||
| static CACHE_CALIBNET: OnceLock<Vec<Tipset>> = OnceLock::new(); | ||
| static CACHE_MAINNET: OnceLock<Vec<Tipset>> = OnceLock::new(); | ||
| let (cache, known_blocks) = match network { | ||
| NetworkChain::Calibnet => (&CACHE_CALIBNET, &KNOWN_BLOCKS.calibnet), | ||
| NetworkChain::Mainnet => (&CACHE_MAINNET, &KNOWN_BLOCKS.mainnet), | ||
| _ => anyhow::bail!("unsupported network {network}"), | ||
| }; | ||
| if let Some(v) = cache.get() { | ||
| Ok(v) | ||
| } else { | ||
| let tipsets = known_blocks_to_known_tipsets(bs, known_blocks)?; | ||
| _ = cache.set(tipsets); | ||
| cache.get().context("infallible") | ||
| } | ||
| } | ||
|
|
||
| fn known_blocks_to_known_tipsets( | ||
| bs: &impl Blockstore, | ||
| blocks: &HashMap<ChainEpoch, Cid>, | ||
| ) -> anyhow::Result<Vec<Tipset>> { | ||
| let mut tipsets: Vec<Tipset> = blocks | ||
| .values() | ||
| .map(|&b| block_cid_to_required_parent_tipset(bs, b)) | ||
| .try_collect()?; | ||
| tipsets.sort_by_key(|ts| ts.epoch()); | ||
| Ok(tipsets) | ||
| } | ||
|
|
||
| fn block_cid_to_parent_tipset(bs: &impl Blockstore, block: Cid) -> anyhow::Result<Option<Tipset>> { | ||
| if let Some(block) = CachingBlockHeader::load(bs, block)? { | ||
| Tipset::load(bs, &block.parents) | ||
| } else { | ||
| Ok(None) | ||
| } | ||
| } | ||
|
|
||
| fn block_cid_to_required_parent_tipset(bs: &impl Blockstore, block: Cid) -> anyhow::Result<Tipset> { | ||
| block_cid_to_parent_tipset(bs, block)? | ||
| .with_context(|| format!("failed to load parent tipset of block {block}")) | ||
| } | ||
|
|
||
| #[cfg(test)] | ||
| mod tests { | ||
| use super::*; | ||
|
|
||
| #[test] | ||
| fn test_known_blocks() { | ||
| assert!(!KNOWN_BLOCKS.calibnet.is_empty()); | ||
| assert!(!KNOWN_BLOCKS.mainnet.is_empty()); | ||
| } | ||
| } | ||
| Original file line number | Diff line number | Diff line change |
|---|---|---|
|
|
@@ -6,7 +6,6 @@ use super::{ | |
| index::{ChainIndex, ResolveNullTipset}, | ||
| tipset_tracker::TipsetTracker, | ||
| }; | ||
| use crate::libp2p_bitswap::{BitswapStoreRead, BitswapStoreReadWrite}; | ||
| use crate::message::{ChainMessage, Message as MessageTrait, SignedMessage}; | ||
| use crate::networks::{ChainConfig, Height}; | ||
| use crate::rpc::eth::{eth_tx_from_signed_eth_message, types::EthHash}; | ||
|
|
@@ -17,6 +16,10 @@ use crate::shim::{ | |
| }; | ||
| use crate::state_manager::StateOutput; | ||
| use crate::utils::db::{BlockstoreExt, CborStoreExt}; | ||
| use crate::{ | ||
| blocks::checkpoints::known_tipsets, | ||
| libp2p_bitswap::{BitswapStoreRead, BitswapStoreReadWrite}, | ||
| }; | ||
| use crate::{ | ||
| blocks::{CachingBlockHeader, Tipset, TipsetKey, TxMeta}, | ||
| db::HeaviestTipsetKeyProvider, | ||
|
|
@@ -242,6 +245,41 @@ where | |
| Tipset::from(self.genesis_block_header()) | ||
| } | ||
|
|
||
| /// Find tipset at epoch `to` in the chain | ||
| pub fn tipset_by_height( | ||
| &self, | ||
| to: ChainEpoch, | ||
| from: Option<Tipset>, | ||
| resolve: ResolveNullTipset, | ||
| ) -> Result<Tipset, Error> { | ||
| let head = self.heaviest_tipset(); | ||
| // Fail fast when `to` is too large | ||
| if to > head.epoch() { | ||
| return Err(Error::Other(format!( | ||
| "looking for tipset with height greater than the current chain head, req: {to}, head: {}", | ||
| head.epoch() | ||
| ))); | ||
| } | ||
|
|
||
| let best_known_from = if let Ok(known_tipsets) = | ||
| known_tipsets(self.blockstore(), &self.chain_config().network) | ||
| && let Some(ts) = known_tipsets.iter().find(|ts| ts.epoch() > to).cloned() | ||
| { | ||
| Some(ts) | ||
| } else { | ||
| None | ||
| }; | ||
|
Comment on lines
+264
to
+271
Contributor
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. 🧩 Analysis chain🏁 Script executed: #!/bin/bash
set -euo pipefail
rg -n -C4 'known_blocks|known_blocks\.yaml|checkpoint|CHAIN_FINALITY|finality' build.rs srcRepository: ChainSafe/forest Length of output: 50372 🏁 Script executed: #!/bin/bash
# Look at the update_chain_checkpoints function and surrounding finality checks
sed -n '110,160p' src/dev/subcommands/update_checkpoints_cmd.rs
# Check CHECKPOINT_INTERVAL and CHAIN_FINALITY relationship
rg -n 'CHAIN_FINALITY|chain_finality' src/dev/subcommands/update_checkpoints_cmd.rs
# Check constants.rs or similar for CHAIN_FINALITY value
rg -n 'const.*CHAIN_FINALITY|const.*CHECKPOINT_INTERVAL' --type rsRepository: ChainSafe/forest Length of output: 1999 🏁 Script executed: #!/bin/bash
# Find the fetch_checkpoint_at_height function and check for finality guards
rg -A 20 'async fn fetch_checkpoint_at_height' src/dev/subcommands/update_checkpoints_cmd.rs
# Search for CHAIN_FINALITY definitions in networks or config
rg 'chain_finality\s*[:=]' src/networks/ src/shim/ | head -20
# Look at the end of update_chain_checkpoints to see if there's finality validation
sed -n '110,200p' src/dev/subcommands/update_checkpoints_cmd.rs | tail -50Repository: ChainSafe/forest Length of output: 3335 🏁 Script executed: #!/bin/bash
# Check CHAIN_FINALITY values for mainnet/calibnet
rg -B 2 -A 2 'chain_finality.*:.*900|chain_finality.*=.*900' src/networks/
# Look at full tipset_by_height implementation to check the `from` filtering issue
sed -n '246,275p' src/chain/store/chain_store.rsRepository: ChainSafe/forest Length of output: 1168 🏁 Script executed: #!/bin/bash
# Find CHAIN_FINALITY values for mainnet and calibnet
rg 'chain_finality.*900' src/networks/ -A 1 -B 1
# Also check the policy structure
rg -A 5 'struct.*Policy' src/shim/ | head -30
# Check if there's any finality-distance validation when adding checkpoints
rg -B 5 -A 5 'latest_checkpoint_epoch.*=' src/dev/subcommands/update_checkpoints_cmd.rsRepository: ChainSafe/forest Length of output: 990 🏁 Script executed: #!/bin/bash
# Find the actual CHAIN_FINALITY constants for networks
rg 'chain_finality\s*[:=]\s*900' src/networks/
# Check what CHECKPOINT_INTERVAL is
rg 'CHECKPOINT_INTERVAL' src/dev/subcommands/update_checkpoints_cmd.rs
# Check mainnet policy to see if CHECKPOINT_INTERVAL >= CHAIN_FINALITY
python3 << 'EOF'
checkpoint_interval = 86400 # 30 days from code
chain_finality = 900 # EC consensus finality for Filecoin
print(f"CHECKPOINT_INTERVAL: {checkpoint_interval} epochs")
print(f"CHAIN_FINALITY: {chain_finality} epochs")
print(f"CHECKPOINT_INTERVAL >= CHAIN_FINALITY: {checkpoint_interval >= chain_finality}")
print(f"\nDistance from head when rounding down:")
print(f" Max distance: {checkpoint_interval - 1} epochs")
print(f" Safe if >= CHAIN_FINALITY: {checkpoint_interval - 1 >= chain_finality}")
EOF
# Check if there's any safety margin or validation applied AFTER fetching
sed -n '110,180p' src/dev/subcommands/update_checkpoints_cmd.rs | grep -A 10 'Fetching checkpoints'Repository: ChainSafe/forest Length of output: 1055 🏁 Script executed: #!/bin/bash
# Check if there's validation for from.epoch() >= to in chain_index.tipset_by_height
rg -A 30 'fn tipset_by_height' src/chain/index.rs | head -40
# Also check if the caller-provided `from` parameter is documented
rg -B 5 'pub fn tipset_by_height' src/chain/store/chain_store.rsRepository: ChainSafe/forest Length of output: 313 🏁 Script executed: #!/bin/bash
# Find where ChainIndex is defined
fd ChainIndex src/ --type f | head -5
# Find the implementation of chain_index().tipset_by_height()
rg -l 'impl.*ChainIndex' src/ | head -5
# Search for the actual tipset_by_height in ChainIndex
rg -A 30 'impl.*ChainIndex' src/chain/ | grep -A 30 'tipset_by_height'Repository: ChainSafe/forest Length of output: 113 🏁 Script executed: #!/bin/bash
# Find the tipset_by_height method in ChainIndex
rg -A 40 'fn tipset_by_height' src/chain/store/index.rs
# Also verify the behavior when from.epoch() < to
sed -n '1,100p' src/chain/store/index.rs | tail -50Repository: ChainSafe/forest Length of output: 3717 Add validation to reject caller-provided The checkpoint finality constraint is sound— Add a validation after the match (or inline in the match arms) to ensure let from = match (from, best_known_from) {
(Some(a), Some(b)) if b.epoch() < a.epoch() => b,
(Some(a), _) => a,
(None, Some(b)) => b,
(None, None) => self.heaviest_tipset(),
};
if from.epoch() < to {
return Err(Error::Other(format!("start tipset epoch {} is before target epoch {}", from.epoch(), to)));
}This prevents downstream errors and provides a clear, actionable error message to callers. 🤖 Prompt for AI Agents |
||
| let from = match (from, best_known_from) { | ||
| // prefer `b` when `b` is closer to `to` | ||
| (Some(a), Some(b)) if b.epoch() < a.epoch() => b, | ||
| // prefer `a` when presents | ||
| (Some(a), _) => a, | ||
| (None, Some(b)) => b, | ||
| // fallback to chain head | ||
| (None, None) => head, | ||
| }; | ||
|
Comment on lines
+272
to
+280
Contributor
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Ignore unusable If a caller passes ♻️ Suggested fix- let from = match (from, best_known_from) {
+ let from = match (from.filter(|ts| ts.epoch() >= to), best_known_from) {
// prefer `b` when `b` is closer to `to`
(Some(a), Some(b)) if b.epoch() < a.epoch() => b,
// prefer `a` when presents
(Some(a), _) => a,
(None, Some(b)) => b,
// fallback to chain head
(None, None) => self.heaviest_tipset(),
};🤖 Prompt for AI Agents
Comment on lines
+264
to
+280
Contributor
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Only seed from known checkpoints once they're finalized against the active head.
♻️ Possible fix+ let head_epoch = from
+ .as_ref()
+ .map(|ts| ts.epoch())
+ .unwrap_or_else(|| self.heaviest_tipset().epoch());
let best_known_from = if let Ok(known_tipsets) =
known_tipsets(self.blockstore(), &self.chain_config().network)
- && let Some(ts) = known_tipsets.iter().find(|ts| ts.epoch() > to).cloned()
+ && let Some(ts) = known_tipsets
+ .iter()
+ .find(|ts| {
+ ts.epoch() > to
+ && ts.epoch() + self.chain_config().policy.chain_finality <= head_epoch
+ })
+ .cloned()
{
Some(ts)
} else {
None
};🤖 Prompt for AI Agents |
||
| self.chain_index().tipset_by_height(to, from, resolve) | ||
| } | ||
| /// Subscribes head changes. | ||
| pub fn subscribe_head_changes(&self) -> broadcast::Receiver<HeadChanges> { | ||
| self.head_changes_tx.subscribe() | ||
|
|
@@ -278,22 +316,10 @@ where | |
| } | ||
|
|
||
| pub fn load_child_tipset(&self, ts: &Tipset) -> Result<Tipset, Error> { | ||
| let head = self.heaviest_tipset(); | ||
| if head.parents() == ts.key() { | ||
| Ok(head) | ||
| } else if head.epoch() > ts.epoch() { | ||
| let maybe_child = self.chain_index().tipset_by_height( | ||
| ts.epoch() + 1, | ||
| head, | ||
| ResolveNullTipset::TakeNewer, | ||
| )?; | ||
| if maybe_child.parents() == ts.key() { | ||
| Ok(maybe_child) | ||
| } else { | ||
| Err(Error::NotFound( | ||
| format!("child of tipset@{}", ts.epoch()).into(), | ||
| )) | ||
| } | ||
| let maybe_child = | ||
| self.tipset_by_height(ts.epoch() + 1, None, ResolveNullTipset::TakeNewer)?; | ||
| if maybe_child.parents() == ts.key() { | ||
| Ok(maybe_child) | ||
| } else { | ||
| Err(Error::NotFound( | ||
| format!("child of tipset@{}", ts.epoch()).into(), | ||
|
|
||
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Memoize checkpoint lookup failures per blockstore.
This only caches the success path. If one checkpoint header or parent tipset is missing,
known_tipsets()keeps retrying the same initialization work on everyChainStore::tipset_by_height()call before falling back. On stores without the full checkpoint set, that turns the optimization into permanent overhead. Consider moving this cache ontoChainStoreso success/failure is memoized per blockstore instance.🤖 Prompt for AI Agents