@@ -63,26 +63,53 @@ struct NodeConfig {
6363}
6464
6565impl NodeConfig {
66- /// Minimum allowed shutdown timeout in seconds
67- const MIN_SHUTDOWN_TIMEOUT_SECS : u64 = 1 ;
68-
69- /// Default shutdown timeout in seconds (optimized for containers)
70- const DEFAULT_SHUTDOWN_TIMEOUT_SECS : u64 = 15 ;
71-
72- /// Maximum allowed shutdown timeout in seconds (5 minutes)
73- const MAX_SHUTDOWN_TIMEOUT_SECS : u64 = 300 ;
74-
75- /// Minimum allowed status check interval in seconds
76- const MIN_STATUS_CHECK_INTERVAL_SECS : u64 = 1 ;
77-
78- /// Default status check interval in seconds (1 hour)
79- const DEFAULT_STATUS_CHECK_INTERVAL_SECS : u64 = 3600 ;
80-
81- /// Maximum allowed status check interval in seconds (6 hours)
82- const MAX_STATUS_CHECK_INTERVAL_SECS : u64 = 21600 ;
83-
84- /// Default maximum number of fallback status checks
85- const DEFAULT_MAX_FALLBACK_CHECKS : u64 = 24 ;
66+ /// Minimum shutdown timeout prevents immediate termination that could cause data corruption
67+ ///
68+ /// Set to 1 second to ensure basic cleanup operations can complete while preventing
69+ /// indefinite hangs during shutdown sequences.
70+ pub const MIN_SHUTDOWN_TIMEOUT_SECS : u64 = 1 ;
71+
72+ /// Default timeout optimized for Kubernetes pod termination grace period (30s)
73+ /// Set to 15s to allow cleanup before SIGKILL
74+ ///
75+ /// This provides sufficient time for graceful shutdown in containerized environments
76+ /// while leaving buffer time before the container orchestrator sends SIGKILL.
77+ /// The 15-second timeout allows for connection draining, state persistence, and
78+ /// cleanup operations to complete normally.
79+ pub const DEFAULT_SHUTDOWN_TIMEOUT_SECS : u64 = 15 ;
80+
81+ /// Maximum shutdown timeout prevents indefinite hangs during node termination
82+ ///
83+ /// Set to 5 minutes (300s) to handle complex shutdown scenarios while ensuring
84+ /// the node doesn't hang indefinitely. This upper bound protects against
85+ /// deadlocks or resource contention that could prevent clean shutdown.
86+ pub const MAX_SHUTDOWN_TIMEOUT_SECS : u64 = 300 ;
87+
88+ /// Minimum status check interval ensures reasonable monitoring frequency
89+ ///
90+ /// Set to 1 second to prevent excessive CPU usage from overly frequent status checks
91+ /// while still allowing responsive monitoring when needed.
92+ pub const MIN_STATUS_CHECK_INTERVAL_SECS : u64 = 1 ;
93+
94+ /// Default status check interval balances monitoring with resource efficiency
95+ ///
96+ /// Set to 1 hour (3600s) to provide periodic health status logging without
97+ /// overwhelming logs or consuming excessive resources. This interval is suitable
98+ /// for long-running production deployments where occasional status updates are sufficient.
99+ pub const DEFAULT_STATUS_CHECK_INTERVAL_SECS : u64 = 3600 ;
100+
101+ /// Maximum status check interval prevents excessively sparse monitoring
102+ ///
103+ /// Set to 6 hours (21600s) to ensure status checks occur at least 4 times per day,
104+ /// providing minimum visibility into node health for operational monitoring.
105+ pub const MAX_STATUS_CHECK_INTERVAL_SECS : u64 = 21600 ;
106+
107+ /// Default maximum fallback status checks limits resource usage during fallback mode
108+ ///
109+ /// Set to 24 checks to provide up to 24 hours of status logging (at default 1-hour intervals)
110+ /// before switching to efficient indefinite waiting. This prevents log spam while
111+ /// maintaining visibility during extended fallback periods.
112+ pub const DEFAULT_MAX_FALLBACK_CHECKS : u64 = 24 ;
86113
87114 /// Load configuration from environment variables with validation
88115 fn from_env ( ) -> Self {
@@ -215,7 +242,7 @@ async fn signal_fallback_mechanism(config: &NodeConfig) {
215242}
216243
217244/// Handle shutdown signals with optimized, non-redundant signal handling
218- async fn handle_shutdown_signals ( ) {
245+ async fn handle_shutdown_signals ( config : & NodeConfig ) {
219246 #[ cfg( unix) ]
220247 {
221248 // On Unix systems, handle SIGTERM and SIGINT separately to avoid redundancy
@@ -261,8 +288,7 @@ async fn handle_shutdown_signals() {
261288 tracing:: warn!(
262289 "No signal handling available, shutdown will only occur on natural node exit"
263290 ) ;
264- let config = NodeConfig :: from_env ( ) ;
265- signal_fallback_mechanism ( & config) . await ;
291+ signal_fallback_mechanism ( config) . await ;
266292 }
267293 }
268294 }
@@ -275,8 +301,7 @@ async fn handle_shutdown_signals() {
275301 tracing:: warn!(
276302 "No signal handling available, shutdown will only occur on natural node exit"
277303 ) ;
278- let config = NodeConfig :: from_env ( ) ;
279- signal_fallback_mechanism ( & config) . await ;
304+ signal_fallback_mechanism ( config) . await ;
280305 } else {
281306 tracing:: info!( "Received SIGINT/Ctrl+C, initiating graceful shutdown" ) ;
282307 }
@@ -382,9 +407,91 @@ where
382407 }
383408}
384409
410+ /// Validate critical environment variables at startup to prevent runtime issues
411+ fn validate_env_vars ( ) -> Result < ( ) , String > {
412+ // Validate shutdown timeout
413+ if let Ok ( val) = std:: env:: var ( "EV_RETH_SHUTDOWN_TIMEOUT" ) {
414+ let timeout = val. parse :: < u64 > ( ) . map_err ( |_| {
415+ format ! ( "Invalid EV_RETH_SHUTDOWN_TIMEOUT: '{}' - must be a valid number" , val)
416+ } ) ?;
417+
418+ if timeout < NodeConfig :: MIN_SHUTDOWN_TIMEOUT_SECS {
419+ return Err ( format ! (
420+ "EV_RETH_SHUTDOWN_TIMEOUT: {} is below minimum of {}s" ,
421+ timeout, NodeConfig :: MIN_SHUTDOWN_TIMEOUT_SECS
422+ ) ) ;
423+ }
424+
425+ if timeout > NodeConfig :: MAX_SHUTDOWN_TIMEOUT_SECS {
426+ return Err ( format ! (
427+ "EV_RETH_SHUTDOWN_TIMEOUT: {} exceeds maximum of {}s" ,
428+ timeout, NodeConfig :: MAX_SHUTDOWN_TIMEOUT_SECS
429+ ) ) ;
430+ }
431+ }
432+
433+ // Validate status check interval
434+ if let Ok ( val) = std:: env:: var ( "EV_RETH_STATUS_CHECK_INTERVAL" ) {
435+ let interval = val. parse :: < u64 > ( ) . map_err ( |_| {
436+ format ! ( "Invalid EV_RETH_STATUS_CHECK_INTERVAL: '{}' - must be a valid number" , val)
437+ } ) ?;
438+
439+ if interval < NodeConfig :: MIN_STATUS_CHECK_INTERVAL_SECS {
440+ return Err ( format ! (
441+ "EV_RETH_STATUS_CHECK_INTERVAL: {} is below minimum of {}s" ,
442+ interval, NodeConfig :: MIN_STATUS_CHECK_INTERVAL_SECS
443+ ) ) ;
444+ }
445+
446+ if interval > NodeConfig :: MAX_STATUS_CHECK_INTERVAL_SECS {
447+ return Err ( format ! (
448+ "EV_RETH_STATUS_CHECK_INTERVAL: {} exceeds maximum of {}s" ,
449+ interval, NodeConfig :: MAX_STATUS_CHECK_INTERVAL_SECS
450+ ) ) ;
451+ }
452+ }
453+
454+ // Validate fallback status checks flag
455+ if let Ok ( val) = std:: env:: var ( "EV_RETH_ENABLE_FALLBACK_STATUS_CHECKS" ) {
456+ let normalized = val. to_lowercase ( ) ;
457+ if normalized != "true" && normalized != "false" {
458+ return Err ( format ! (
459+ "Invalid EV_RETH_ENABLE_FALLBACK_STATUS_CHECKS: '{}' - must be 'true' or 'false'" ,
460+ val
461+ ) ) ;
462+ }
463+ }
464+
465+ // Validate max fallback checks
466+ if let Ok ( val) = std:: env:: var ( "EV_RETH_MAX_FALLBACK_CHECKS" ) {
467+ val. parse :: < u64 > ( ) . map_err ( |_| {
468+ format ! ( "Invalid EV_RETH_MAX_FALLBACK_CHECKS: '{}' - must be a valid number" , val)
469+ } ) ?;
470+ }
471+
472+ // Validate RUST_BACKTRACE if set by user (we set it ourselves if not present)
473+ if let Ok ( val) = std:: env:: var ( "RUST_BACKTRACE" ) {
474+ let normalized = val. to_lowercase ( ) ;
475+ if normalized != "0" && normalized != "1" && normalized != "full" {
476+ return Err ( format ! (
477+ "Invalid RUST_BACKTRACE: '{}' - must be '0', '1', or 'full'" ,
478+ val
479+ ) ) ;
480+ }
481+ }
482+
483+ Ok ( ( ) )
484+ }
485+
385486fn main ( ) {
386487 tracing:: info!( "=== EV-RETH NODE STARTING ===" ) ;
387488
489+ // Validate environment variables early to catch configuration issues
490+ if let Err ( err) = validate_env_vars ( ) {
491+ eprintln ! ( "Environment variable validation failed: {}" , err) ;
492+ std:: process:: exit ( 1 ) ;
493+ }
494+
388495 reth_cli_util:: sigsegv_handler:: install ( ) ;
389496
390497 // Enable backtraces unless a RUST_BACKTRACE value has already been explicitly provided.
@@ -428,7 +535,7 @@ fn main() {
428535 tracing:: info!( "Node exited naturally" ) ;
429536 result
430537 }
431- _ = handle_shutdown_signals( ) => {
538+ _ = handle_shutdown_signals( & config ) => {
432539 tracing:: info!( "Shutdown signal received, initiating graceful shutdown" ) ;
433540
434541 // Structured shutdown phases for better observability (informational only)
0 commit comments