Skip to content

Commit 5d45ddd

Browse files
committed
wip
1 parent 1599a2e commit 5d45ddd

File tree

1 file changed

+150
-158
lines changed

1 file changed

+150
-158
lines changed

bin/ev-reth/src/main.rs

Lines changed: 150 additions & 158 deletions
Original file line numberDiff line numberDiff line change
@@ -54,123 +54,172 @@ use crate::{
5454
#[global_allocator]
5555
static ALLOC: reth_cli_util::allocator::Allocator = reth_cli_util::allocator::new_allocator();
5656

57-
/// Configuration constants for EV-RETH node operation
58-
struct Config;
57+
/// Centralized configuration for EV-RETH node operation
58+
#[derive(Debug, Clone)]
59+
struct NodeConfig {
60+
shutdown_timeout: Duration,
61+
status_check_interval: u64,
62+
enable_fallback_status_checks: bool,
63+
max_fallback_checks: u64,
64+
}
5965

60-
impl Config {
66+
impl NodeConfig {
6167
/// Default shutdown timeout in seconds (optimized for containers)
6268
const DEFAULT_SHUTDOWN_TIMEOUT_SECS: u64 = 15;
6369

6470
/// Maximum allowed shutdown timeout in seconds (5 minutes)
6571
const MAX_SHUTDOWN_TIMEOUT_SECS: u64 = 300;
6672

67-
/// Default status check interval in seconds (1 hour - more reasonable for debugging)
73+
/// Default status check interval in seconds (1 hour)
6874
const DEFAULT_STATUS_CHECK_INTERVAL_SECS: u64 = 3600;
6975

7076
/// Maximum allowed status check interval in seconds (6 hours)
7177
const MAX_STATUS_CHECK_INTERVAL_SECS: u64 = 21600;
7278

73-
/// Default maximum number of fallback status checks (24 checks at 1-hour intervals = 24 hours)
79+
/// Default maximum number of fallback status checks
7480
const DEFAULT_MAX_FALLBACK_CHECKS: u64 = 24;
75-
}
7681

77-
/// Parse and validate shutdown timeout from environment variable
78-
fn parse_shutdown_timeout() -> Duration {
79-
match std::env::var("EV_RETH_SHUTDOWN_TIMEOUT") {
80-
Ok(val) => match val.parse::<u64>() {
81-
Ok(secs) if secs > 0 && secs <= Config::MAX_SHUTDOWN_TIMEOUT_SECS => {
82-
tracing::info!("Using custom shutdown timeout of {}s from environment", secs);
83-
Duration::from_secs(secs)
84-
}
85-
Ok(secs) => {
86-
tracing::warn!(
87-
"Shutdown timeout {}s is out of bounds (1-{}), using default {}s",
88-
secs, Config::MAX_SHUTDOWN_TIMEOUT_SECS, Config::DEFAULT_SHUTDOWN_TIMEOUT_SECS
89-
);
90-
Duration::from_secs(Config::DEFAULT_SHUTDOWN_TIMEOUT_SECS)
91-
}
92-
Err(_) => {
93-
tracing::warn!(
94-
"Invalid EV_RETH_SHUTDOWN_TIMEOUT value '{}', using default {}s",
95-
val, Config::DEFAULT_SHUTDOWN_TIMEOUT_SECS
96-
);
97-
Duration::from_secs(Config::DEFAULT_SHUTDOWN_TIMEOUT_SECS)
98-
}
99-
},
100-
Err(_) => {
101-
tracing::info!("Using default shutdown timeout of {}s", Config::DEFAULT_SHUTDOWN_TIMEOUT_SECS);
102-
Duration::from_secs(Config::DEFAULT_SHUTDOWN_TIMEOUT_SECS)
82+
/// Load configuration from environment variables with validation
83+
fn from_env() -> Self {
84+
let shutdown_timeout = Self::parse_shutdown_timeout();
85+
let status_check_interval = Self::parse_status_check_interval();
86+
let enable_fallback_status_checks = std::env::var("EV_RETH_ENABLE_FALLBACK_STATUS_CHECKS")
87+
.map(|v| v.to_lowercase() == "true")
88+
.unwrap_or(false);
89+
let max_fallback_checks = std::env::var("EV_RETH_MAX_FALLBACK_CHECKS")
90+
.ok()
91+
.and_then(|s| s.parse().ok())
92+
.unwrap_or(Self::DEFAULT_MAX_FALLBACK_CHECKS);
93+
94+
Self {
95+
shutdown_timeout,
96+
status_check_interval,
97+
enable_fallback_status_checks,
98+
max_fallback_checks,
10399
}
104100
}
105-
}
106101

107-
/// Parse and validate status check interval from environment variable
108-
fn parse_status_check_interval() -> u64 {
109-
match std::env::var("EV_RETH_STATUS_CHECK_INTERVAL") {
110-
Ok(val) => match val.parse::<u64>() {
111-
Ok(secs) if secs > 0 && secs <= Config::MAX_STATUS_CHECK_INTERVAL_SECS => {
112-
tracing::info!("Using custom status check interval of {}s from environment", secs);
113-
secs
114-
}
115-
Ok(secs) => {
116-
tracing::warn!(
117-
"Status check interval {}s is out of bounds (1-{}), using default {}s",
118-
secs, Config::MAX_STATUS_CHECK_INTERVAL_SECS, Config::DEFAULT_STATUS_CHECK_INTERVAL_SECS
119-
);
120-
Config::DEFAULT_STATUS_CHECK_INTERVAL_SECS
121-
}
102+
fn parse_shutdown_timeout() -> Duration {
103+
match std::env::var("EV_RETH_SHUTDOWN_TIMEOUT") {
104+
Ok(val) => match val.parse::<u64>() {
105+
Ok(secs) if secs > 0 && secs <= Self::MAX_SHUTDOWN_TIMEOUT_SECS => {
106+
tracing::info!("Using custom shutdown timeout of {}s from environment", secs);
107+
Duration::from_secs(secs)
108+
}
109+
Ok(secs) => {
110+
tracing::warn!(
111+
"Shutdown timeout {}s is out of bounds (1-{}), using default {}s",
112+
secs, Self::MAX_SHUTDOWN_TIMEOUT_SECS, Self::DEFAULT_SHUTDOWN_TIMEOUT_SECS
113+
);
114+
Duration::from_secs(Self::DEFAULT_SHUTDOWN_TIMEOUT_SECS)
115+
}
116+
Err(_) => {
117+
tracing::warn!(
118+
"Invalid EV_RETH_SHUTDOWN_TIMEOUT value '{}', using default {}s",
119+
val, Self::DEFAULT_SHUTDOWN_TIMEOUT_SECS
120+
);
121+
Duration::from_secs(Self::DEFAULT_SHUTDOWN_TIMEOUT_SECS)
122+
}
123+
},
122124
Err(_) => {
123-
tracing::warn!(
124-
"Invalid EV_RETH_STATUS_CHECK_INTERVAL value '{}', using default {}s",
125-
val, Config::DEFAULT_STATUS_CHECK_INTERVAL_SECS
126-
);
127-
Config::DEFAULT_STATUS_CHECK_INTERVAL_SECS
125+
tracing::info!("Using default shutdown timeout of {}s", Self::DEFAULT_SHUTDOWN_TIMEOUT_SECS);
126+
Duration::from_secs(Self::DEFAULT_SHUTDOWN_TIMEOUT_SECS)
128127
}
129-
},
130-
Err(_) => Config::DEFAULT_STATUS_CHECK_INTERVAL_SECS,
128+
}
129+
}
130+
131+
fn parse_status_check_interval() -> u64 {
132+
match std::env::var("EV_RETH_STATUS_CHECK_INTERVAL") {
133+
Ok(val) => match val.parse::<u64>() {
134+
Ok(secs) if secs > 0 && secs <= Self::MAX_STATUS_CHECK_INTERVAL_SECS => {
135+
tracing::info!("Using custom status check interval of {}s from environment", secs);
136+
secs
137+
}
138+
Ok(secs) => {
139+
tracing::warn!(
140+
"Status check interval {}s is out of bounds (1-{}), using default {}s",
141+
secs, Self::MAX_STATUS_CHECK_INTERVAL_SECS, Self::DEFAULT_STATUS_CHECK_INTERVAL_SECS
142+
);
143+
Self::DEFAULT_STATUS_CHECK_INTERVAL_SECS
144+
}
145+
Err(_) => {
146+
tracing::warn!(
147+
"Invalid EV_RETH_STATUS_CHECK_INTERVAL value '{}', using default {}s",
148+
val, Self::DEFAULT_STATUS_CHECK_INTERVAL_SECS
149+
);
150+
Self::DEFAULT_STATUS_CHECK_INTERVAL_SECS
151+
}
152+
},
153+
Err(_) => Self::DEFAULT_STATUS_CHECK_INTERVAL_SECS,
154+
}
131155
}
132156
}
133157

134158
/// Fallback mechanism for when signal handling fails completely
135-
///
136-
/// In most production deployments with container orchestrators, this fallback should rarely
137-
/// be needed as orchestrators provide their own health checks and signal handling.
138-
/// This provides a minimal fallback that doesn't consume unnecessary resources.
139-
async fn signal_fallback_mechanism() {
140-
// Check if we should use periodic status checks or just wait indefinitely
141-
let use_status_checks = std::env::var("EV_RETH_ENABLE_FALLBACK_STATUS_CHECKS")
142-
.map(|v| v.to_lowercase() == "true")
143-
.unwrap_or(false);
144-
145-
if use_status_checks {
146-
tracing::info!("=== EV-RETH: Fallback status checks enabled ===");
147-
let status_check_interval = parse_status_check_interval();
148-
149-
// Limit the number of status checks to prevent infinite resource consumption
150-
let max_checks = std::env::var("EV_RETH_MAX_FALLBACK_CHECKS")
151-
.ok()
152-
.and_then(|s| s.parse().ok())
153-
.unwrap_or(Config::DEFAULT_MAX_FALLBACK_CHECKS);
159+
async fn signal_fallback_mechanism(config: &NodeConfig) {
160+
if config.enable_fallback_status_checks {
161+
tracing::info!("Fallback status checks enabled");
154162

155163
let mut check_count = 0;
156-
while check_count < max_checks {
157-
match tokio::time::sleep(Duration::from_secs(status_check_interval)).await {
158-
() => {
159-
check_count += 1;
160-
tracing::info!("=== EV-RETH: Periodic status check #{} - node still running ===", check_count);
164+
while check_count < config.max_fallback_checks {
165+
tokio::time::sleep(Duration::from_secs(config.status_check_interval)).await;
166+
check_count += 1;
167+
tracing::info!("Periodic status check #{} - node still running", check_count);
168+
}
169+
170+
tracing::info!("Maximum fallback status checks ({}) reached, switching to efficient wait", config.max_fallback_checks);
171+
} else {
172+
tracing::info!("Using efficient fallback - waiting indefinitely for natural node exit");
173+
tracing::info!("Set EV_RETH_ENABLE_FALLBACK_STATUS_CHECKS=true to enable periodic status logging");
174+
}
175+
176+
// Use std::future::pending() for the most efficient "wait forever" approach
177+
std::future::pending::<()>().await;
178+
}
179+
180+
/// Handle shutdown signals with optimized signal handling
181+
async fn handle_shutdown_signals() {
182+
#[cfg(unix)]
183+
{
184+
// On Unix systems, use a single select to handle both SIGTERM and SIGINT efficiently
185+
// This avoids redundant signal handling and potential race conditions
186+
match signal::unix::signal(signal::unix::SignalKind::terminate()) {
187+
Ok(mut sigterm) => {
188+
tokio::select! {
189+
_ = sigterm.recv() => {
190+
tracing::info!("Received SIGTERM, initiating graceful shutdown");
191+
}
192+
_ = signal::ctrl_c() => {
193+
tracing::info!("Received SIGINT/Ctrl+C, initiating graceful shutdown");
194+
}
195+
}
196+
}
197+
Err(err) => {
198+
tracing::warn!("Failed to install SIGTERM handler: {}, using SIGINT only", err);
199+
// Fallback to SIGINT only - this is the most common scenario
200+
if let Err(ctrl_c_err) = signal::ctrl_c().await {
201+
tracing::error!("Failed to wait for SIGINT: {}", ctrl_c_err);
202+
tracing::warn!("No signal handling available, shutdown will only occur on natural node exit");
203+
let config = NodeConfig::from_env();
204+
signal_fallback_mechanism(&config).await;
205+
} else {
206+
tracing::info!("Received SIGINT/Ctrl+C, initiating graceful shutdown");
161207
}
162208
}
163209
}
210+
}
164211

165-
tracing::info!("=== EV-RETH: Maximum fallback status checks ({}) reached, switching to efficient wait ===", max_checks);
166-
// After reaching max checks, switch to efficient pending wait
167-
std::future::pending::<()>().await;
168-
} else {
169-
tracing::info!("=== EV-RETH: Using efficient fallback - waiting indefinitely for natural node exit ===");
170-
tracing::info!("=== EV-RETH: Set EV_RETH_ENABLE_FALLBACK_STATUS_CHECKS=true to enable periodic status logging ===");
171-
// Use std::future::pending() for the most efficient "wait forever" approach
172-
// This consumes minimal resources and is appropriate for container environments
173-
std::future::pending::<()>().await;
212+
#[cfg(not(unix))]
213+
{
214+
// On non-Unix systems, only handle Ctrl+C (SIGINT)
215+
if let Err(err) = signal::ctrl_c().await {
216+
tracing::error!("Failed to wait for SIGINT: {}", err);
217+
tracing::warn!("No signal handling available, shutdown will only occur on natural node exit");
218+
let config = NodeConfig::from_env();
219+
signal_fallback_mechanism(&config).await;
220+
} else {
221+
tracing::info!("Received SIGINT/Ctrl+C, initiating graceful shutdown");
222+
}
174223
}
175224
}
176225

@@ -310,96 +359,39 @@ fn main() {
310359

311360
tracing::info!("=== EV-RETH: Node launched successfully with ev-reth payload builder ===");
312361

313-
// Set up graceful shutdown handling
314-
let shutdown_signal = async {
315-
#[cfg(unix)]
316-
{
317-
// On Unix systems, handle both SIGTERM and SIGINT (Ctrl+C)
318-
// SIGTERM is typically sent by process managers for graceful shutdown
319-
// SIGINT is sent by Ctrl+C from terminal
320-
match signal::unix::signal(signal::unix::SignalKind::terminate()) {
321-
Ok(mut sigterm) => {
322-
// Successfully set up SIGTERM handler, now wait for either signal
323-
tokio::select! {
324-
_ = sigterm.recv() => {
325-
tracing::info!("=== EV-RETH: Received SIGTERM, initiating graceful shutdown ===");
326-
}
327-
_ = signal::ctrl_c() => {
328-
tracing::info!("=== EV-RETH: Received SIGINT/Ctrl+C, initiating graceful shutdown ===");
329-
}
330-
}
331-
}
332-
Err(err) => {
333-
tracing::warn!("Failed to install SIGTERM handler: {}, falling back to SIGINT only", err);
334-
// Fall back to just handling SIGINT/Ctrl+C
335-
match signal::ctrl_c().await {
336-
Ok(_) => {
337-
tracing::info!("=== EV-RETH: Received SIGINT/Ctrl+C, initiating graceful shutdown ===");
338-
}
339-
Err(ctrl_c_err) => {
340-
tracing::error!("Failed to wait for SIGINT: {}", ctrl_c_err);
341-
tracing::warn!("No signal handling available, shutdown will only occur on natural node exit");
342-
// Use efficient fallback mechanism
343-
signal_fallback_mechanism().await;
344-
}
345-
}
346-
}
347-
}
348-
}
349-
350-
#[cfg(not(unix))]
351-
{
352-
// On non-Unix systems, only handle Ctrl+C (SIGINT)
353-
match signal::ctrl_c().await {
354-
Ok(_) => {
355-
tracing::info!("=== EV-RETH: Received SIGINT/Ctrl+C, initiating graceful shutdown ===");
356-
}
357-
Err(err) => {
358-
tracing::error!("Failed to wait for SIGINT: {}", err);
359-
tracing::warn!("No signal handling available, shutdown will only occur on natural node exit");
360-
// Use efficient fallback mechanism
361-
signal_fallback_mechanism().await;
362-
}
363-
}
364-
}
365-
};
362+
// Load configuration once at startup
363+
let config = NodeConfig::from_env();
366364

367365
// Wait for either the node to exit naturally or a shutdown signal
368366
tokio::select! {
369367
result = &mut handle.node_exit_future => {
370-
tracing::info!("=== EV-RETH: Node exited naturally ===");
368+
tracing::info!("Node exited naturally");
371369
result
372370
}
373-
_ = shutdown_signal => {
374-
tracing::info!("=== EV-RETH: Shutdown signal received, initiating graceful shutdown ===");
371+
_ = handle_shutdown_signals() => {
372+
tracing::info!("Shutdown signal received, initiating graceful shutdown");
375373

376374
// Structured shutdown phases for better observability
377-
tracing::info!("=== EV-RETH: Phase 1 - Stopping new connections ===");
378-
379-
// Initiate graceful shutdown with configurable timeout
380-
let shutdown_timeout = parse_shutdown_timeout();
381-
382-
tracing::info!("=== EV-RETH: Phase 2 - Draining active requests ===");
375+
tracing::info!("Phase 1 - Stopping new connections");
376+
tracing::info!("Phase 2 - Draining active requests");
383377

384378
// Wait for the node to actually exit with a timeout
385-
// Use a reference to avoid partial move issues
386-
let shutdown_result = timeout(shutdown_timeout, &mut handle.node_exit_future).await;
379+
let shutdown_result = timeout(config.shutdown_timeout, &mut handle.node_exit_future).await;
387380

388-
tracing::info!("=== EV-RETH: Phase 3 - Shutdown completed ===");
381+
tracing::info!("Phase 3 - Shutdown completed");
389382

390383
match shutdown_result {
391384
Ok(result) => {
392-
tracing::info!("=== EV-RETH: Node shutdown completed gracefully ===");
385+
tracing::info!("Node shutdown completed gracefully");
393386
result
394387
}
395388
Err(_) => {
396-
tracing::error!("=== EV-RETH: Node shutdown timed out after {:?} ===", shutdown_timeout);
397-
tracing::error!("=== EV-RETH: Forcing application exit - this may indicate a shutdown issue ===");
389+
tracing::error!("Node shutdown timed out after {:?}", config.shutdown_timeout);
390+
tracing::error!("Forcing application exit - this may indicate a shutdown issue");
398391
// Return an error to indicate that shutdown didn't complete gracefully
399-
// This provides better error reporting for monitoring systems
400392
Err(Box::new(std::io::Error::new(
401393
std::io::ErrorKind::TimedOut,
402-
format!("Graceful shutdown timed out after {}s", shutdown_timeout.as_secs())
394+
format!("Graceful shutdown timed out after {}s", config.shutdown_timeout.as_secs())
403395
)) as Box<dyn std::error::Error + Send + Sync>)
404396
}
405397
}

0 commit comments

Comments
 (0)