jkleinne
diff --git a/‎config.example.toml‎
Lines changed: 7 additions & 0 deletions b/‎config.example.toml‎
Lines changed: 7 additions & 0 deletions
diff --git a/‎internal/config/config.go‎
Lines changed: 42 additions & 1 deletion b/‎internal/config/config.go‎
Lines changed: 42 additions & 1 deletion
diff --git a/‎internal/config/config_test.go‎
Lines changed: 153 additions & 0 deletions b/‎internal/config/config_test.go‎
Lines changed: 153 additions & 0 deletions
diff --git a/‎internal/engine/rclone.go‎
Lines changed: 13 additions & 5 deletions b/‎internal/engine/rclone.go‎
Lines changed: 13 additions & 5 deletions
diff --git a/‎internal/engine/rclone_test.go‎
Lines changed: 26 additions & 0 deletions b/‎internal/engine/rclone_test.go‎
Lines changed: 26 additions & 0 deletions
diff --git a/‎internal/engine/render.go‎
Lines changed: 4 additions & 1 deletion b/‎internal/engine/render.go‎
Lines changed: 4 additions & 1 deletion
@@ -79,3 +79,10 @@ mode = "copy"
 bwlimit = "2M"
 # optional = true          # skip without failure if the local source is absent
                             # (useful for detachable devices like external drives)
+# max_runtime = "2h"       # cap per-invocation wall-clock time; Shuttle kills
+                            # the tool and reports "timed out" (counts as a
+                            # failure, exit 1) if exceeded. Omit to disable.
+                            # Format: Go duration string (e.g. "30s", "5m",
+                            # "2h", "1h30m"). Applies per invocation: rsync
+                            # jobs with N sources and rclone jobs with N
+                            # remotes each get N independent timeouts.
@@ -11,6 +11,7 @@ import (
 	"os"
 	"path/filepath"
 	"strings"
+	"time"
 
 	toml "github.com/pelletier/go-toml/v2"
 )
@@ -85,6 +86,12 @@ type Job struct {
 	// sources (paths containing ":") since those are not stat'd before
 	// invocation.
 	Optional bool `toml:"optional"`
+	// MaxRuntime caps the wall-clock duration of each rsync/rclone
+	// invocation. Parsed via time.ParseDuration (e.g. "30m", "2h"). Empty
+	// means no timeout. Zero and negative durations are rejected at
+	// validation time. Applies per invocation: a job with N rsync sources
+	// or N rclone remotes gets N independent timeouts.
+	MaxRuntime string `toml:"max_runtime"`
 
 	// Rsync fields
 	Sources     []string `toml:"sources"`
@@ -167,6 +174,19 @@ func (c *Config) ResolvedLogRetentionDays() int {
 	return *c.Defaults.LogRetentionDays
 }
 
+// MaxRuntimeDuration returns the parsed max_runtime value, or 0 when
+// the field is empty (meaning "no timeout"). The LoadBytes/LoadFile
+// path validates max_runtime at parse time, so callers on that path
+// never see the error. Callers that construct Job literals directly
+// (e.g. tests) must handle the error explicitly so typos surface
+// immediately instead of silently becoming "no timeout".
+func (j Job) MaxRuntimeDuration() (time.Duration, error) {
+	if j.MaxRuntime == "" {
+		return 0, nil
+	}
+	return time.ParseDuration(j.MaxRuntime)
+}
+
 // JobNames returns the names of all configured jobs in config order.
 func (c *Config) JobNames() []string {
 	names := make([]string, len(c.Jobs))
@@ -302,7 +322,7 @@ func validateRsyncJob(job Job) error {
 	if job.Destination == "" {
 		return fmt.Errorf("job %q: empty destination", job.Name)
 	}
-	return nil
+	return validateMaxRuntime(job)
 }
 
 func validateRcloneJob(job Job) error {
@@ -326,5 +346,26 @@ func validateRcloneJob(job Job) error {
 		}
 		remoteSeen[r] = true
 	}
+	return validateMaxRuntime(job)
+}
+
+// validateMaxRuntime enforces that max_runtime, when set, parses to a
+// strictly positive time.Duration. An empty string means "no timeout"
+// and passes. Zero and negative values are rejected so users don't
+// accidentally configure "0s" expecting it to mean "no timeout".
+func validateMaxRuntime(job Job) error {
+	if job.MaxRuntime == "" {
+		return nil
+	}
+	d, err := time.ParseDuration(job.MaxRuntime)
+	if err != nil {
+		return fmt.Errorf("job %q: invalid max_runtime %q: %w", job.Name, job.MaxRuntime, err)
+	}
+	if d == 0 {
+		return fmt.Errorf("job %q: max_runtime is %q; omit the field to disable the timeout", job.Name, job.MaxRuntime)
+	}
+	if d < 0 {
+		return fmt.Errorf("job %q: max_runtime must be positive (got %q)", job.Name, job.MaxRuntime)
+	}
 	return nil
 }
@@ -5,6 +5,7 @@ import (
 	"path/filepath"
 	"strings"
 	"testing"
+	"time"
 
 	"github.com/jkleinne/shuttle/internal/config"
 )
@@ -546,3 +547,155 @@ destination = "/tmp/backup"
 		t.Error("Jobs[0].Optional = true, want false (zero value)")
 	}
 }
+
+func TestLoad_MaxRuntime_ParsedAndExposed(t *testing.T) {
+	tomlData := `
+[[job]]
+name = "photos"
+engine = "rsync"
+sources = ["/tmp/photos"]
+destination = "/tmp/backup"
+max_runtime = "2h"
+`
+	cfg, err := config.LoadBytes([]byte(tomlData))
+	if err != nil {
+		t.Fatalf("unexpected error: %v", err)
+	}
+	if cfg.Jobs[0].MaxRuntime != "2h" {
+		t.Errorf("MaxRuntime = %q, want \"2h\"", cfg.Jobs[0].MaxRuntime)
+	}
+	got, err := cfg.Jobs[0].MaxRuntimeDuration()
+	if err != nil {
+		t.Fatalf("MaxRuntimeDuration() error = %v", err)
+	}
+	if got != 2*time.Hour {
+		t.Errorf("MaxRuntimeDuration() = %v, want 2h", got)
+	}
+}
+
+func TestLoad_MaxRuntime_Absent_DurationIsZero(t *testing.T) {
+	tomlData := `
+[[job]]
+name = "photos"
+engine = "rsync"
+sources = ["/tmp/photos"]
+destination = "/tmp/backup"
+`
+	cfg, err := config.LoadBytes([]byte(tomlData))
+	if err != nil {
+		t.Fatalf("unexpected error: %v", err)
+	}
+	if cfg.Jobs[0].MaxRuntime != "" {
+		t.Errorf("MaxRuntime = %q, want empty", cfg.Jobs[0].MaxRuntime)
+	}
+	got, err := cfg.Jobs[0].MaxRuntimeDuration()
+	if err != nil {
+		t.Fatalf("MaxRuntimeDuration() error = %v", err)
+	}
+	if got != 0 {
+		t.Errorf("MaxRuntimeDuration() = %v, want 0", got)
+	}
+}
+
+func TestLoad_MaxRuntime_Zero_Rejected(t *testing.T) {
+	tomlData := `
+[[job]]
+name = "photos"
+engine = "rsync"
+sources = ["/tmp/photos"]
+destination = "/tmp/backup"
+max_runtime = "0s"
+`
+	_, err := config.LoadBytes([]byte(tomlData))
+	if err == nil {
+		t.Fatal("expected error for max_runtime = \"0s\", got nil")
+	}
+	msg := err.Error()
+	if !strings.Contains(msg, "photos") || !strings.Contains(msg, "max_runtime") {
+		t.Errorf("error %q should mention job name and field", msg)
+	}
+}
+
+func TestLoad_MaxRuntime_Negative_Rejected(t *testing.T) {
+	tomlData := `
+[[job]]
+name = "photos"
+engine = "rsync"
+sources = ["/tmp/photos"]
+destination = "/tmp/backup"
+max_runtime = "-5m"
+`
+	_, err := config.LoadBytes([]byte(tomlData))
+	if err == nil {
+		t.Fatal("expected error for negative max_runtime, got nil")
+	}
+	if !strings.Contains(err.Error(), "photos") {
+		t.Errorf("error %q should mention job name", err.Error())
+	}
+	if !strings.Contains(err.Error(), "positive") {
+		t.Errorf("error %q should mention \"positive\"", err.Error())
+	}
+}
+
+func TestLoad_MaxRuntime_Malformed_Rejected(t *testing.T) {
+	tomlData := `
+[[job]]
+name = "photos"
+engine = "rsync"
+sources = ["/tmp/photos"]
+destination = "/tmp/backup"
+max_runtime = "banana"
+`
+	_, err := config.LoadBytes([]byte(tomlData))
+	if err == nil {
+		t.Fatal("expected error for malformed max_runtime, got nil")
+	}
+	if !strings.Contains(err.Error(), "photos") {
+		t.Errorf("error %q should mention job name", err.Error())
+	}
+}
+
+func TestLoad_MaxRuntime_RcloneJob_ValidatedAndExposed(t *testing.T) {
+	// validateMaxRuntime is called from both validateRsyncJob and
+	// validateRcloneJob. The other tests cover the rsync path; this
+	// pins that the validator runs for rclone jobs too.
+	tomlData := `
+[[job]]
+name = "docs-to-cloud"
+engine = "rclone"
+source = "/tmp/docs"
+remotes = ["my_gdrive"]
+mode = "copy"
+max_runtime = "30m"
+`
+	cfg, err := config.LoadBytes([]byte(tomlData))
+	if err != nil {
+		t.Fatalf("unexpected error: %v", err)
+	}
+	got, err := cfg.Jobs[0].MaxRuntimeDuration()
+	if err != nil {
+		t.Fatalf("MaxRuntimeDuration() error = %v", err)
+	}
+	if got != 30*time.Minute {
+		t.Errorf("MaxRuntimeDuration() = %v, want 30m", got)
+	}
+}
+
+func TestLoad_MaxRuntime_RcloneJob_NegativeRejected(t *testing.T) {
+	tomlData := `
+[[job]]
+name = "docs-to-cloud"
+engine = "rclone"
+source = "/tmp/docs"
+remotes = ["my_gdrive"]
+mode = "copy"
+max_runtime = "-1h"
+`
+	_, err := config.LoadBytes([]byte(tomlData))
+	if err == nil {
+		t.Fatal("expected error for negative max_runtime on rclone job, got nil")
+	}
+	if !strings.Contains(err.Error(), "docs-to-cloud") {
+		t.Errorf("error %q should mention job name", err.Error())
+	}
+}
@@ -136,8 +136,13 @@ func (e *RcloneExecutor) Exec(ctx context.Context, args []string, onProgress fun
 	}
 
 	if err := cmd.Start(); err != nil {
-		e.logger.FileError(fmt.Sprintf("rclone start failed for %s: %v", displayName, err))
-		return ItemResult{Name: displayName, Status: StatusFailed}
+		status := classifyExitStatus(ctx, err)
+		if status == StatusTimedOut {
+			e.logger.FileError(fmt.Sprintf("rclone timed out for %s after per-job max_runtime: %v", displayName, err))
+		} else {
+			e.logger.FileError(fmt.Sprintf("rclone start failed for %s: %v", displayName, err))
+		}
+		return ItemResult{Name: displayName, Status: status}
 	}
 
 	var pipeWg sync.WaitGroup
@@ -168,14 +173,17 @@ func (e *RcloneExecutor) Exec(ctx context.Context, args []string, onProgress fun
 	}
 	stats.Elapsed = elapsed
 
-	status := StatusOK
+	status := classifyExitStatus(ctx, runErr)
 	if runErr != nil {
-		status = StatusFailed
 		subcommand := "rclone"
 		if len(args) > 0 {
 			subcommand = "rclone " + args[0]
 		}
-		e.logger.FileError(fmt.Sprintf("%s failed for %s: %v", subcommand, displayName, runErr))
+		if status == StatusTimedOut {
+			e.logger.FileError(fmt.Sprintf("%s timed out for %s after per-job max_runtime: %v", subcommand, displayName, runErr))
+		} else {
+			e.logger.FileError(fmt.Sprintf("%s failed for %s: %v", subcommand, displayName, runErr))
+		}
 	}
 
 	return ItemResult{Name: displayName, Status: status, Stats: stats}
 
@@ -483,3 +483,29 @@ func TestCleanupArchives_KeepsRecent(t *testing.T) {
 		t.Error("recent archive dir should still exist")
 	}
 }
+
+func TestRcloneExec_ExpiredContext_ReturnsTimedOut(t *testing.T) {
+	skipIfNoRclone(t)
+
+	src := t.TempDir()
+	dst := t.TempDir()
+	if err := os.WriteFile(filepath.Join(src, "hello.txt"), []byte("world"), 0o644); err != nil {
+		t.Fatalf("writing test file: %v", err)
+	}
+
+	// A context whose deadline is already in the past will cause exec.CommandContext
+	// to kill the process immediately, producing a DeadlineExceeded context error.
+	ctx, cancel := context.WithDeadline(context.Background(), time.Now().Add(-1*time.Second))
+	defer cancel()
+
+	executor, logPath := newRcloneTestExecutor(t)
+	// --config /dev/null prevents rclone from loading the developer's
+	// real rclone config during the test.
+	job := config.Job{ExtraFlags: []string{"--config", "/dev/null"}}
+	args := BuildRcloneArgs("copy", nil, job, src+"/", ":local:"+dst, false, logPath, "")
+	result := executor.Exec(ctx, args, nil)
+
+	if result.Status != StatusTimedOut {
+		t.Errorf("Status = %q, want %q", result.Status, StatusTimedOut)
+	}
+}
@@ -36,6 +36,7 @@ const (
 	symbolOptionalMissing  = "○"
 	labelFailed            = "failed"
 	labelNotFound          = "not found"
+	labelTimedOut          = "timed out"
 	labelSkipped           = "skipped"
 	labelOptionalMissing   = "source missing (optional)"
 	tallyLabelPassed       = "passed"
@@ -56,7 +57,7 @@ func colorize(useColor bool, code, text string) string {
 // statusSymbol returns a colored status indicator character.
 func statusSymbol(status Status, useColor bool) string {
 	switch status {
-	case StatusFailed, StatusNotFound:
+	case StatusFailed, StatusNotFound, StatusTimedOut:
 		return colorize(useColor, ansiRed, symbolFailed)
 	case StatusSkipped:
 		return colorize(useColor, ansiYellow, symbolSkipped)
@@ -75,6 +76,8 @@ func itemStatsText(item ItemResult, useColor bool) string {
 		return colorize(useColor, ansiRed, labelFailed)
 	case StatusNotFound:
 		return colorize(useColor, ansiRed, labelNotFound)
+	case StatusTimedOut:
+		return colorize(useColor, ansiRed, labelTimedOut)
 	case StatusSkipped:
 		return colorize(useColor, ansiYellow, labelSkipped)
 	case StatusOptionalMissing: