Skip to content

Commit 98aa36c

Browse files
author
Test
committed
feat(feedback): track failure reasons for model learning
- Extended recordFeedback to include failureReason parameter - Failure events now include LoopDetector stats in metadata - This allows the model selector to learn from specific failure patterns - Removed misleading cost estimate from observer (Groq is free/cheap) Format of failure_reason: max_iterations_reached (Iterations: 25, Files Modified: 0, Files Read: 5, Tool Calls: 12) This data can be used to: - Identify models that struggle with certain task types - Route complex tasks to more capable models - Track iteration patterns that lead to failure
1 parent 0a9f989 commit 98aa36c

File tree

2 files changed

+18
-13
lines changed

2 files changed

+18
-13
lines changed

internal/maestro/conductor.go

Lines changed: 18 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -221,7 +221,7 @@ func (c *Conductor) ExecuteTask(ctx context.Context, task string, complexity str
221221

222222
// Check if this is a query-only task (no validation needed)
223223
if c.isQueryTask(plan, modifiedFiles) {
224-
c.recordFeedback(editBackend, editModel, "editor", task, true)
224+
c.recordFeedback(editBackend, editModel, "editor", task, true, "")
225225

226226
fmt.Printf("\n[OK] Task complete!\n")
227227
if result != "" {
@@ -351,8 +351,8 @@ func (c *Conductor) ExecuteTask(ctx context.Context, task string, complexity str
351351
}
352352

353353
// Success! Record positive feedback
354-
c.recordFeedback(editBackend, editModel, "editor", task, true)
355-
c.recordFeedback(reviewBackend, reviewModel, "reviewer", task, true)
354+
c.recordFeedback(editBackend, editModel, "editor", task, true, "")
355+
c.recordFeedback(reviewBackend, reviewModel, "reviewer", task, true, "")
356356

357357
fmt.Printf("\n[OK] Task complete!\n")
358358
if result != "" {
@@ -374,8 +374,13 @@ func (c *Conductor) ExecuteTask(ctx context.Context, task string, complexity str
374374
// Task failed after all attempts - record negative feedback
375375
editBackend, editModel, _ := c.selector.SelectModel(config.ActionEdit, c.language, complexity)
376376
reviewBackend, reviewModel, _ := c.selector.SelectModel(config.ActionReview, c.language, complexity)
377-
c.recordFeedback(editBackend, editModel, "editor", task, false)
378-
c.recordFeedback(reviewBackend, reviewModel, "reviewer", task, false)
377+
// Capture failure reason from loop detector for learning
378+
failureReason := "max_iterations_reached"
379+
if c.loopDetector != nil {
380+
failureReason = fmt.Sprintf("max_iterations_reached (%s)", c.loopDetector.GetStats())
381+
}
382+
c.recordFeedback(editBackend, editModel, "editor", task, false, failureReason)
383+
c.recordFeedback(reviewBackend, reviewModel, "reviewer", task, false, failureReason)
379384

380385
// Record final failure metrics
381386
if c.Tracer != nil {
@@ -392,7 +397,7 @@ func errorMsg(err error) string {
392397
return err.Error()
393398
}
394399

395-
func (c *Conductor) recordFeedback(backend, model, agent, task string, success bool) {
400+
func (c *Conductor) recordFeedback(backend, model, agent, task string, success bool, failureReason string) {
396401
sentiment := feedback.SentimentBad
397402
if success {
398403
sentiment = feedback.SentimentGood
@@ -407,6 +412,13 @@ func (c *Conductor) recordFeedback(backend, model, agent, task string, success b
407412
Context: fmt.Sprintf("language=%s", c.language),
408413
}
409414

415+
// Add failure reason to metadata so we can learn from specific failure types
416+
if !success && failureReason != "" {
417+
event.Metadata = map[string]string{
418+
"failure_reason": failureReason,
419+
}
420+
}
421+
410422
if err := feedback.Record(event); err != nil {
411423
if os.Getenv("GPTCODE_DEBUG") == "1" {
412424
fmt.Fprintf(os.Stderr, "[WARN] Failed to record feedback: %v\n", err)

internal/observability/observer.go

Lines changed: 0 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -452,13 +452,6 @@ func (o *AgentObserver) PrintSummary() {
452452
fmt.Printf(" Tokens In: %s\n", formatNumber(summary.TokensIn))
453453
fmt.Printf(" Tokens Out: %s\n", formatNumber(summary.TokensOut))
454454
fmt.Printf(" Total Tokens: %s\n", formatNumber(summary.TokensIn+summary.TokensOut))
455-
456-
// Estimate cost (rough approximation)
457-
if summary.TokensIn+summary.TokensOut > 0 {
458-
// Assuming average cost of $0.001 per 1K tokens (varies by model)
459-
estimatedCost := float64(summary.TokensIn+summary.TokensOut) / 1000.0 * 0.001
460-
fmt.Printf(" Est. Cost: $%.4f\n", estimatedCost)
461-
}
462455
} else {
463456
fmt.Println(" No LLM calls recorded")
464457
}

0 commit comments

Comments
 (0)