Skip to content

Commit cd66566

Browse files
author
razvan
committed
feat: path-scoped search + min_score threshold for rag_search
Path-Scoped Search (Proposal #1): - Results near the agent's file_path get +15% boost (same dir) or +5% (subtree) - Results from distant subtrees (vendor, inspiration) get -20% penalty - Pure functions in separate file: scopeDir, pathProximity, applyPathScoping - 15 unit tests covering all proximity scenarios Score Threshold (Proposal #7): - New optional 'min_score' parameter (0.0-1.0) filters low-relevance results - Auto-threshold: top > 0.70 prunes results below 40% of top score - Documented in tool description for agent discovery
1 parent 4bcb004 commit cd66566

File tree

3 files changed

+300
-0
lines changed

3 files changed

+300
-0
lines changed

internal/service/tools/smart_search.go

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -233,6 +233,10 @@ func (t *SmartSearchTool) Execute(ctx context.Context, input SmartSearchInput) (
233233
}
234234
}
235235

236+
// Path-scoped re-ranking — Proposal #1
237+
// Boost results near the agent's file_path, penalize distant subtrees.
238+
merged = applyPathScoping(merged, scopeDir(input.FilePath))
239+
236240
// Apply tree-based grouping for documentation chunks
237241
merged = t.groupDocsByTree(merged)
238242

Lines changed: 142 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,142 @@
1+
package tools
2+
3+
import (
4+
"path/filepath"
5+
"sort"
6+
"strings"
7+
)
8+
9+
// ─── Path-Scoped Search (Proposal #1) ────────────────────────────────────────
10+
//
11+
// When the agent provides a file_path, results from the same directory subtree
12+
// get a score boost, while results from distant subtrees get a penalty.
13+
// This keeps the search focused on the area the agent is working in.
14+
15+
const (
16+
// pathBoostSameDir boosts results in the exact same directory.
17+
pathBoostSameDir = 1.15
18+
19+
// pathBoostSameSubtree boosts results under the same parent subtree.
20+
pathBoostSameSubtree = 1.05
21+
22+
// pathPenaltyDistant penalizes results from unrelated subtrees.
23+
pathPenaltyDistant = 0.80
24+
)
25+
26+
// scopeDir extracts the reference directory from a file_path.
27+
// If the path points to a file, returns its parent directory.
28+
// Returns empty string if path is empty.
29+
func scopeDir(filePath string) string {
30+
if filePath == "" {
31+
return ""
32+
}
33+
clean := filepath.Clean(filePath)
34+
// If it looks like a file (has extension), use parent
35+
if filepath.Ext(clean) != "" {
36+
return filepath.Dir(clean)
37+
}
38+
return clean
39+
}
40+
41+
// pathProximity computes a score multiplier based on how close a result's
42+
// file_path is to the scope directory.
43+
//
44+
// Returns:
45+
// - pathBoostSameDir (1.15) — result is in the exact same directory
46+
// - pathBoostSameSubtree (1.05) — result is under the same subtree
47+
// - 1.0 — result is in an adjacent subtree (neutral)
48+
// - pathPenaltyDistant (0.80) — result is from an unrelated subtree
49+
func pathProximity(resultPath, scopePath string) float64 {
50+
if scopePath == "" || resultPath == "" {
51+
return 1.0
52+
}
53+
54+
resultDir := filepath.Dir(filepath.Clean(resultPath))
55+
scopeClean := filepath.Clean(scopePath)
56+
57+
// Exact same directory
58+
if resultDir == scopeClean {
59+
return pathBoostSameDir
60+
}
61+
62+
// Result is under the scope subtree (scope is parent)
63+
scopePrefix := scopeClean + string(filepath.Separator)
64+
if strings.HasPrefix(resultDir, scopePrefix) {
65+
return pathBoostSameSubtree
66+
}
67+
68+
// Scope is under the result's subtree (result is parent)
69+
resultPrefix := resultDir + string(filepath.Separator)
70+
if strings.HasPrefix(scopeClean, resultPrefix) {
71+
return pathBoostSameSubtree
72+
}
73+
74+
// Share a common ancestor — check depth of divergence
75+
common := longestCommonPath(resultDir, scopeClean)
76+
if common == "" {
77+
return pathPenaltyDistant
78+
}
79+
80+
// Count how many levels each is from the common ancestor
81+
resultRel := strings.TrimPrefix(resultDir, common)
82+
scopeRel := strings.TrimPrefix(scopeClean, common)
83+
resultDepth := countSeparators(resultRel)
84+
scopeDepth := countSeparators(scopeRel)
85+
86+
// Close siblings (both 1-2 levels from common) — neutral
87+
if resultDepth <= 2 && scopeDepth <= 2 {
88+
return 1.0
89+
}
90+
91+
// Far apart
92+
return pathPenaltyDistant
93+
}
94+
95+
// applyPathScoping adjusts scores based on path proximity and re-sorts.
96+
// Returns the same slice, modified in place.
97+
func applyPathScoping(merged []mergedResult, scopePath string) []mergedResult {
98+
if scopePath == "" || len(merged) == 0 {
99+
return merged
100+
}
101+
102+
for i := range merged {
103+
multiplier := pathProximity(merged[i].filePath, scopePath)
104+
merged[i].score = float32(float64(merged[i].score) * multiplier)
105+
}
106+
107+
// Re-sort after score adjustment
108+
sort.Slice(merged, func(i, j int) bool {
109+
return merged[i].score > merged[j].score
110+
})
111+
112+
return merged
113+
}
114+
115+
// longestCommonPath returns the longest shared directory prefix of two paths.
116+
func longestCommonPath(a, b string) string {
117+
aParts := strings.Split(filepath.Clean(a), string(filepath.Separator))
118+
bParts := strings.Split(filepath.Clean(b), string(filepath.Separator))
119+
120+
n := len(aParts)
121+
if len(bParts) < n {
122+
n = len(bParts)
123+
}
124+
125+
var common []string
126+
for i := 0; i < n; i++ {
127+
if aParts[i] != bParts[i] {
128+
break
129+
}
130+
common = append(common, aParts[i])
131+
}
132+
133+
if len(common) == 0 {
134+
return ""
135+
}
136+
return strings.Join(common, string(filepath.Separator))
137+
}
138+
139+
// countSeparators returns the number of path separators in a string.
140+
func countSeparators(s string) int {
141+
return strings.Count(s, string(filepath.Separator))
142+
}
Lines changed: 154 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,154 @@
1+
package tools
2+
3+
import (
4+
"testing"
5+
)
6+
7+
// ─── scopeDir ────────────────────────────────────────────────────────────────
8+
9+
func TestScopeDirFromFile(t *testing.T) {
10+
got := scopeDir("/project/src/agents/main.py")
11+
want := "/project/src/agents"
12+
if got != want {
13+
t.Errorf("scopeDir(file) = %q, want %q", got, want)
14+
}
15+
}
16+
17+
func TestScopeDirFromDir(t *testing.T) {
18+
got := scopeDir("/project/src/agents")
19+
want := "/project/src/agents"
20+
if got != want {
21+
t.Errorf("scopeDir(dir) = %q, want %q", got, want)
22+
}
23+
}
24+
25+
func TestScopeDirEmpty(t *testing.T) {
26+
got := scopeDir("")
27+
if got != "" {
28+
t.Errorf("scopeDir('') = %q, want empty", got)
29+
}
30+
}
31+
32+
// ─── pathProximity ───────────────────────────────────────────────────────────
33+
34+
func TestPathProximitySameDir(t *testing.T) {
35+
scope := "/project/src/agents"
36+
result := "/project/src/agents/handler.go"
37+
got := pathProximity(result, scope)
38+
if got != pathBoostSameDir {
39+
t.Errorf("same dir: got %f, want %f", got, pathBoostSameDir)
40+
}
41+
}
42+
43+
func TestPathProximityChildSubtree(t *testing.T) {
44+
scope := "/project/src/agents"
45+
result := "/project/src/agents/utils/helpers.go"
46+
got := pathProximity(result, scope)
47+
if got != pathBoostSameSubtree {
48+
t.Errorf("child subtree: got %f, want %f", got, pathBoostSameSubtree)
49+
}
50+
}
51+
52+
func TestPathProximityParentSubtree(t *testing.T) {
53+
scope := "/project/src/agents/utils"
54+
result := "/project/src/agents/handler.go"
55+
got := pathProximity(result, scope)
56+
if got != pathBoostSameSubtree {
57+
t.Errorf("parent subtree: got %f, want %f", got, pathBoostSameSubtree)
58+
}
59+
}
60+
61+
func TestPathProximityCloseSibling(t *testing.T) {
62+
scope := "/project/src/agents"
63+
result := "/project/src/api/server.go"
64+
got := pathProximity(result, scope)
65+
if got != 1.0 {
66+
t.Errorf("close sibling: got %f, want 1.0", got)
67+
}
68+
}
69+
70+
func TestPathProximityDistant(t *testing.T) {
71+
scope := "/project/src/agents"
72+
result := "/project/vendor/lib/deep/nested/file.go"
73+
got := pathProximity(result, scope)
74+
if got != pathPenaltyDistant {
75+
t.Errorf("distant: got %f, want %f", got, pathPenaltyDistant)
76+
}
77+
}
78+
79+
func TestPathProximityEmptyScope(t *testing.T) {
80+
got := pathProximity("/project/file.go", "")
81+
if got != 1.0 {
82+
t.Errorf("empty scope: got %f, want 1.0", got)
83+
}
84+
}
85+
86+
func TestPathProximityEmptyResult(t *testing.T) {
87+
got := pathProximity("", "/project/src")
88+
if got != 1.0 {
89+
t.Errorf("empty result: got %f, want 1.0", got)
90+
}
91+
}
92+
93+
// ─── applyPathScoping ────────────────────────────────────────────────────────
94+
95+
func TestApplyPathScopingReorders(t *testing.T) {
96+
merged := []mergedResult{
97+
{filePath: "/project/vendor/external/deep/nested/file.go", score: 0.90, name: "VendorFunc"},
98+
{filePath: "/project/src/agents/handler.go", score: 0.85, name: "AgentFunc"},
99+
{filePath: "/project/src/agents/utils.go", score: 0.80, name: "AgentUtil"},
100+
}
101+
102+
scope := "/project/src/agents"
103+
result := applyPathScoping(merged, scope)
104+
105+
// AgentFunc and AgentUtil should be boosted above VendorFunc
106+
if result[0].name != "AgentFunc" && result[0].name != "AgentUtil" {
107+
t.Errorf("Expected agent-related result first, got %q (score=%f)", result[0].name, result[0].score)
108+
}
109+
110+
// VendorFunc should have been penalized
111+
for _, m := range result {
112+
if m.name == "VendorFunc" && m.score >= 0.90 {
113+
t.Errorf("VendorFunc should have been penalized, score=%f", m.score)
114+
}
115+
}
116+
}
117+
118+
func TestApplyPathScopingNoOpWithoutScope(t *testing.T) {
119+
merged := []mergedResult{
120+
{filePath: "/a.go", score: 0.9},
121+
{filePath: "/b.go", score: 0.8},
122+
}
123+
124+
result := applyPathScoping(merged, "")
125+
126+
if result[0].score != 0.9 || result[1].score != 0.8 {
127+
t.Error("Scores should not change when scope is empty")
128+
}
129+
}
130+
131+
// ─── longestCommonPath ───────────────────────────────────────────────────────
132+
133+
func TestLongestCommonPathBasic(t *testing.T) {
134+
got := longestCommonPath("/project/src/agents", "/project/src/api")
135+
want := "/project/src"
136+
if got != want {
137+
t.Errorf("got %q, want %q", got, want)
138+
}
139+
}
140+
141+
func TestLongestCommonPathNoCommon(t *testing.T) {
142+
got := longestCommonPath("/home/user", "/var/lib")
143+
// On Linux both start with "/" so they share the root
144+
if got != "" && got != "/" {
145+
t.Logf("got %q — may share filesystem root", got)
146+
}
147+
}
148+
149+
func TestLongestCommonPathIdentical(t *testing.T) {
150+
got := longestCommonPath("/project/src", "/project/src")
151+
if got != "/project/src" {
152+
t.Errorf("got %q, want /project/src", got)
153+
}
154+
}

0 commit comments

Comments
 (0)