add benchmark

breardon2011 · breardon2011 · commit 053dba499f66 · 2025-12-02T19:35:08.000-08:00
diff --git a/sandbox-sidecar/src/routes/runRoutes.ts b/sandbox-sidecar/src/routes/runRoutes.ts
@@ -17,6 +17,14 @@ export function createRunRouter(
   router.post("/api/v1/sandboxes/runs", (req, res, next) => {
     try {
       const parsed = runRequestSchema.parse(req.body);
+      
+      // Debug: log received metadata including AWS region
+      console.log("Received run request metadata:", {
+        hasMetadata: !!parsed.metadata,
+        awsRegion: parsed.metadata?.AWS_REGION || "(not set)",
+        awsKeyLength: parsed.metadata?.AWS_ACCESS_KEY_ID?.length || 0,
+      });
+      
       const payload: SandboxRunPayload = {
         operation: parsed.operation,
         runId: parsed.run_id,
diff --git a/sandbox-sidecar/src/runners/e2bRunner.ts b/sandbox-sidecar/src/runners/e2bRunner.ts
@@ -136,25 +136,33 @@ export class E2BSandboxRunner implements SandboxRunner {
         stderr: applyResult.stderr.slice(-500),
       }, "terraform apply output (last 500 chars)");
 
-      // Use terraform show to get state - works regardless of workspace configuration
-      // This handles both: terraform.tfstate and terraform.tfstate.d/<workspace>/terraform.tfstate
+      // Read the actual terraform.tfstate file (not terraform show -json which is different format)
+      // Check both standard location and workspace location
       let stateBase64 = "";
       
       try {
-        const showResult = await this.runTerraformCommand(
-          sandbox,
-          workDir,
-          ["show", "-json"],
-          undefined,
-          undefined,
-          metadata,
-        );
+        // Try standard location first
+        let statePath = `${workDir}/terraform.tfstate`;
+        let stateContent: string | null = null;
         
-        if (showResult.stdout && showResult.stdout.trim() !== "{}") {
-          stateBase64 = Buffer.from(showResult.stdout, "utf8").toString("base64");
-          logger.info({ stateSize: showResult.stdout.length }, "captured state via terraform show");
+        try {
+          stateContent = await sandbox.files.read(statePath);
+          logger.info({ path: statePath }, "found state file at standard location");
+        } catch {
+          // Try workspace location - find the workspace state directory
+          const lsResult = await sandbox.commands.run(`find ${workDir} -name "terraform.tfstate" -type f 2>/dev/null | head -1`);
+          const foundPath = lsResult.stdout.trim();
+          if (foundPath) {
+            stateContent = await sandbox.files.read(foundPath);
+            logger.info({ path: foundPath }, "found state file at workspace location");
+          }
+        }
+        
+        if (stateContent && stateContent.trim()) {
+          stateBase64 = Buffer.from(stateContent, "utf8").toString("base64");
+          logger.info({ stateSize: stateContent.length }, "captured terraform.tfstate file");
         } else {
-          logger.info("terraform show returned empty state");
+          logger.info("no terraform.tfstate file found");
         }
       } catch (err) {
         // State doesn't exist - this is OK for empty applies or destroys
@@ -226,6 +234,12 @@ export class E2BSandboxRunner implements SandboxRunner {
       envs.AWS_REGION = metadata.AWS_REGION || "us-east-1";
       // Also set default region for AWS SDK
       envs.AWS_DEFAULT_REGION = envs.AWS_REGION;
+      logger.info({ 
+        region: envs.AWS_REGION,
+        keyLength: envs.AWS_ACCESS_KEY_ID.length,
+      }, "AWS credentials injected into terraform environment");
+    } else {
+      logger.warn("No AWS credentials in metadata - AWS resources will fail");
     }
 
     return envs;
diff --git a/taco/internal/github/commands.go b/taco/internal/github/commands.go
@@ -85,11 +85,17 @@ func (e *CommandExecutor) Execute(ctx context.Context, req *ExecuteRequest) *Com
 
 	// 3. Load existing state if any
 	var state []byte
+	logger.Info("Looking for existing state", slog.String("unit_id", unitID))
 	if meta, err := e.unitRepo.Get(ctx, unitID); err == nil && meta != nil {
+		logger.Info("Unit found, downloading state...")
 		if stateData, err := e.store.Download(ctx, unitID); err == nil {
 			state = stateData
 			logger.Info("Loaded existing state", slog.Int("size", len(state)))
+		} else {
+			logger.Warn("Failed to download state", slog.String("error", err.Error()))
 		}
+	} else {
+		logger.Info("No existing unit/state found", slog.String("error", fmt.Sprintf("%v", err)))
 	}
 
 	// 4. Get terraform version from options or use default
@@ -140,7 +146,12 @@ func (e *CommandExecutor) Execute(ctx context.Context, req *ExecuteRequest) *Com
 		if metadata["AWS_REGION"] == "" {
 			metadata["AWS_REGION"] = "us-east-1"
 		}
-		// Don't log that credentials are present - security best practice
+		// Log that credentials are configured (not the values)
+		logger.Info("AWS credentials configured for sandbox",
+			slog.String("region", metadata["AWS_REGION"]),
+			slog.Int("key_length", len(awsKey)))
+	} else {
+		logger.Warn("AWS_ACCESS_KEY_ID not set - AWS resources will fail")
 	}
 
 	// 8. Execute based on action
@@ -151,6 +162,8 @@ func (e *CommandExecutor) Execute(ctx context.Context, req *ExecuteRequest) *Com
 		result = e.executeApply(ctx, logger, req, runID, unitID, archive, state, tfVersion, engine, workingDir, metadata, totalStart, false)
 	case "destroy":
 		result = e.executeApply(ctx, logger, req, runID, unitID, archive, state, tfVersion, engine, workingDir, metadata, totalStart, true)
+	case "benchmark":
+		result = e.executeBenchmark(ctx, logger, req, runID, unitID, archive, tfVersion, engine, workingDir, metadata, totalStart)
 	default:
 		result.Error = fmt.Sprintf("Unknown action: %s", req.Command.Action)
 	}
@@ -303,12 +316,21 @@ func (e *CommandExecutor) executeApply(
 	}
 
 	// Save the new state
+	logger.Info("Apply result received",
+		slog.Int("state_size", len(applyResult.State)),
+		slog.Int("logs_size", len(applyResult.Logs)),
+		slog.Bool("is_destroy", isDestroy))
+
 	if len(applyResult.State) > 0 && !isDestroy {
 		if err := e.saveState(ctx, unitID, applyResult.State); err != nil {
 			logger.Warn("Failed to save state", slog.String("error", err.Error()))
 		} else {
-			logger.Info("State saved", slog.Int("size", len(applyResult.State)))
+			logger.Info("State saved successfully",
+				slog.String("unit_id", unitID),
+				slog.Int("size", len(applyResult.State)))
 		}
+	} else if !isDestroy {
+		logger.Warn("No state returned from apply - state will not persist!")
 	}
 
 	// For destroy, clean up the state
@@ -339,6 +361,125 @@ func (e *CommandExecutor) executeApply(
 	return result
 }
 
+// executeBenchmark runs apply followed by destroy in a single flow
+// This keeps state in the sandbox and ensures resources are cleaned up
+func (e *CommandExecutor) executeBenchmark(
+	ctx context.Context,
+	logger *slog.Logger,
+	req *ExecuteRequest,
+	runID, unitID string,
+	archive []byte,
+	tfVersion, engine, workingDir string,
+	metadata map[string]string,
+	totalStart time.Time,
+) *CommandResult {
+	result := &CommandResult{
+		Command: req.Command,
+		Success: false,
+	}
+
+	if e.sandbox == nil {
+		result.Error = "Sandbox provider not configured"
+		result.Timing.Total = time.Since(totalStart)
+		return result
+	}
+
+	// Generate a config version ID for the sandbox
+	configVersionID := fmt.Sprintf("cv-%s", uuid.New().String()[:8])
+
+	logger.Info("Starting benchmark: apply + destroy cycle",
+		slog.String("run_id", runID),
+		slog.String("engine", engine),
+		slog.String("version", tfVersion))
+
+	var allLogs strings.Builder
+
+	// Phase 1: Apply
+	applyStart := time.Now()
+	applyReq := &sandbox.ApplyRequest{
+		RunID:                  runID,
+		PlanID:                 uuid.New().String(),
+		OrgID:                  "github-benchmark",
+		UnitID:                 unitID,
+		ConfigurationVersionID: configVersionID,
+		IsDestroy:              false,
+		TerraformVersion:       tfVersion,
+		Engine:                 engine,
+		WorkingDirectory:       workingDir,
+		ConfigArchive:          archive,
+		State:                  nil, // Fresh apply
+		Metadata:               metadata,
+	}
+
+	applyResult, err := e.sandbox.ExecuteApply(ctx, applyReq)
+	result.Timing.Apply = time.Since(applyStart)
+
+	if err != nil {
+		result.Error = fmt.Sprintf("Apply phase failed: %v", err)
+		result.Timing.Total = time.Since(totalStart)
+		logger.Error("Benchmark apply failed", slog.String("error", err.Error()))
+		return result
+	}
+
+	allLogs.WriteString("=== APPLY PHASE ===\n")
+	allLogs.WriteString(applyResult.Logs)
+	allLogs.WriteString("\n\n")
+
+	logger.Info("Benchmark apply completed",
+		slog.Duration("duration", result.Timing.Apply),
+		slog.Int("state_size", len(applyResult.State)))
+
+	// Phase 2: Destroy (using state from apply)
+	destroyStart := time.Now()
+	destroyReq := &sandbox.ApplyRequest{
+		RunID:                  runID + "-destroy",
+		PlanID:                 uuid.New().String(),
+		OrgID:                  "github-benchmark",
+		UnitID:                 unitID,
+		ConfigurationVersionID: configVersionID,
+		IsDestroy:              true,
+		TerraformVersion:       tfVersion,
+		Engine:                 engine,
+		WorkingDirectory:       workingDir,
+		ConfigArchive:          archive,
+		State:                  applyResult.State, // Use state from apply
+		Metadata:               metadata,
+	}
+
+	destroyResult, err := e.sandbox.ExecuteApply(ctx, destroyReq)
+	result.Timing.Destroy = time.Since(destroyStart)
+
+	if err != nil {
+		result.Error = fmt.Sprintf("Destroy phase failed (resources may be orphaned!): %v", err)
+		result.Timing.Total = time.Since(totalStart)
+		logger.Error("Benchmark destroy failed", slog.String("error", err.Error()))
+		return result
+	}
+
+	allLogs.WriteString("=== DESTROY PHASE ===\n")
+	allLogs.WriteString(destroyResult.Logs)
+
+	logger.Info("Benchmark destroy completed",
+		slog.Duration("duration", result.Timing.Destroy))
+
+	// Success!
+	result.Success = true
+	result.Output = allLogs.String()
+	result.Summary = fmt.Sprintf("Apply: %.2fs | Destroy: %.2fs | Total: %.2fs",
+		result.Timing.Apply.Seconds(),
+		result.Timing.Destroy.Seconds(),
+		time.Since(totalStart).Seconds())
+
+	result.Timing.Total = time.Since(totalStart)
+
+	logger.Info("Benchmark completed successfully",
+		slog.Duration("apply", result.Timing.Apply),
+		slog.Duration("destroy", result.Timing.Destroy),
+		slog.Duration("total", result.Timing.Total))
+
+	return result
+}
+
 func (e *CommandExecutor) saveState(ctx context.Context, unitID string, state []byte) error {
 	// Check if unit exists, create if not
 	if _, err := e.unitRepo.Get(ctx, unitID); err != nil {
diff --git a/taco/internal/github/types.go b/taco/internal/github/types.go
@@ -128,6 +128,8 @@ type TimingBreakdown struct {
 	Clone   time.Duration `json:"clone"`
 	Init    time.Duration `json:"init"`
 	Execute time.Duration `json:"execute"` // plan, apply, or destroy time
+	Apply   time.Duration `json:"apply,omitempty"`   // for benchmark: apply phase
+	Destroy time.Duration `json:"destroy,omitempty"` // for benchmark: destroy phase
 	Total   time.Duration `json:"total"`
 }
 
diff --git a/taco/internal/github/webhook.go b/taco/internal/github/webhook.go
@@ -218,7 +218,7 @@ func ParseCommand(text string) *Command {
 
 		// Validate action
 		switch action {
-		case "plan", "apply", "destroy":
+		case "plan", "apply", "destroy", "benchmark":
 			cmd := &Command{
 				Action:  action,
 				Options: make(map[string]string),
@@ -273,6 +273,12 @@ func FormatResult(result *CommandResult) string {
 		} else {
 			sb.WriteString("## ❌ OpenTaco Destroy Failed\n\n")
 		}
+	case "benchmark":
+		if result.Success {
+			sb.WriteString("## ✅ OpenTaco Benchmark Complete\n\n")
+		} else {
+			sb.WriteString("## ❌ OpenTaco Benchmark Failed\n\n")
+		}
 	}
 
 	// Timing breakdown
@@ -282,7 +288,9 @@ func FormatResult(result *CommandResult) string {
 	if result.Timing.Clone > 0 {
 		sb.WriteString(fmt.Sprintf("| Clone | %.2fs |\n", result.Timing.Clone.Seconds()))
 	}
-	sb.WriteString(fmt.Sprintf("| Init | %.2fs |\n", result.Timing.Init.Seconds()))
+	if result.Timing.Init > 0 {
+		sb.WriteString(fmt.Sprintf("| Init | %.2fs |\n", result.Timing.Init.Seconds()))
+	}
 
 	switch result.Command.Action {
 	case "plan":
@@ -291,6 +299,9 @@ func FormatResult(result *CommandResult) string {
 		sb.WriteString(fmt.Sprintf("| Apply | %.2fs |\n", result.Timing.Execute.Seconds()))
 	case "destroy":
 		sb.WriteString(fmt.Sprintf("| Destroy | %.2fs |\n", result.Timing.Execute.Seconds()))
+	case "benchmark":
+		sb.WriteString(fmt.Sprintf("| Apply | %.2fs |\n", result.Timing.Apply.Seconds()))
+		sb.WriteString(fmt.Sprintf("| Destroy | %.2fs |\n", result.Timing.Destroy.Seconds()))
 	}
 
 	sb.WriteString("\n")

Original file line number	Diff line number	Diff line change
`@@ -128,6 +128,8 @@ type TimingBreakdown struct {`
`128`	`128`	Clone time.Duration `json:"clone"`
`129`	`129`	Init time.Duration `json:"init"`
`130`	`130`	Execute time.Duration `json:"execute"` // plan, apply, or destroy time
	`131`	+ Apply time.Duration `json:"apply,omitempty"` // for benchmark: apply phase
	`132`	+ Destroy time.Duration `json:"destroy,omitempty"` // for benchmark: destroy phase
`131`	`133`	Total time.Duration `json:"total"`
`132`	`134`	`}`
`133`	`135`