Skip to content

Commit a563f02

Browse files
author
razvan
committed
fix: prevent nested git repos from being treated as separate workspaces
When a file_path points inside a subdirectory that has its own .git (submodules, vendored projects), the detector would treat it as a separate workspace instead of using the already-registered parent. Added FindParentWorkspace() to registry that checks if a detected root is a subdirectory of any registered workspace. The resolver now uses this in handleFilePath() to override the candidate with the parent workspace when applicable. Includes 9 unit tests covering edge cases: prefix collisions, deepest parent selection, same-path rejection, and empty path handling.
1 parent 4d8fd95 commit a563f02

File tree

5 files changed

+193
-0
lines changed

5 files changed

+193
-0
lines changed
Lines changed: 125 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,125 @@
1+
package registry
2+
3+
import (
4+
"testing"
5+
"time"
6+
)
7+
8+
// newTestRegistry creates a Registry with entries populated in-memory only (no disk I/O).
9+
func newTestRegistry(entries ...*Entry) *Registry {
10+
r := &Registry{
11+
entries: make(map[string]*Entry),
12+
candidates: make(map[string]*CandidateEntry),
13+
indexRoot: make(map[string]string),
14+
indexName: make(map[string][]string),
15+
clock: time.Now,
16+
audit: noopAuditSink{},
17+
}
18+
for _, e := range entries {
19+
r.addEntry(e)
20+
}
21+
return r
22+
}
23+
24+
func TestFindParentWorkspace(t *testing.T) {
25+
r := newTestRegistry(&Entry{
26+
ID: hashRoot("/home/user/projects/big-project"),
27+
Root: "/home/user/projects/big-project",
28+
Name: "big-project",
29+
})
30+
31+
tests := []struct {
32+
name string
33+
path string
34+
wantRoot string
35+
wantFound bool
36+
}{
37+
{
38+
name: "child directory matches parent",
39+
path: "/home/user/projects/big-project/submodule/graph",
40+
wantRoot: "/home/user/projects/big-project",
41+
wantFound: true,
42+
},
43+
{
44+
name: "deep nested child matches",
45+
path: "/home/user/projects/big-project/a/b/c/d",
46+
wantRoot: "/home/user/projects/big-project",
47+
wantFound: true,
48+
},
49+
{
50+
name: "same path does not match (not strictly inside)",
51+
path: "/home/user/projects/big-project",
52+
wantRoot: "",
53+
wantFound: false,
54+
},
55+
{
56+
name: "sibling directory does not match",
57+
path: "/home/user/projects/other-project",
58+
wantRoot: "",
59+
wantFound: false,
60+
},
61+
{
62+
name: "parent directory does not match",
63+
path: "/home/user/projects",
64+
wantRoot: "",
65+
wantFound: false,
66+
},
67+
{
68+
name: "similar prefix but different dir does not match",
69+
path: "/home/user/projects/big-project-v2/src",
70+
wantRoot: "",
71+
wantFound: false,
72+
},
73+
{
74+
name: "empty path returns not found",
75+
path: "",
76+
wantRoot: "",
77+
wantFound: false,
78+
},
79+
}
80+
81+
for _, tt := range tests {
82+
t.Run(tt.name, func(t *testing.T) {
83+
root, found := r.FindParentWorkspace(tt.path)
84+
if found != tt.wantFound {
85+
t.Errorf("FindParentWorkspace(%q) found=%v, want %v", tt.path, found, tt.wantFound)
86+
}
87+
if root != tt.wantRoot {
88+
t.Errorf("FindParentWorkspace(%q) root=%q, want %q", tt.path, root, tt.wantRoot)
89+
}
90+
})
91+
}
92+
}
93+
94+
func TestFindParentWorkspacePicksDeepest(t *testing.T) {
95+
r := newTestRegistry(
96+
&Entry{
97+
ID: hashRoot("/home/user/projects"),
98+
Root: "/home/user/projects",
99+
Name: "projects",
100+
},
101+
&Entry{
102+
ID: hashRoot("/home/user/projects/monorepo"),
103+
Root: "/home/user/projects/monorepo",
104+
Name: "monorepo",
105+
},
106+
)
107+
108+
// A path inside monorepo should match monorepo (the deepest parent), not projects
109+
root, found := r.FindParentWorkspace("/home/user/projects/monorepo/packages/core")
110+
if !found {
111+
t.Fatal("expected to find parent workspace")
112+
}
113+
if root != "/home/user/projects/monorepo" {
114+
t.Fatalf("expected deepest parent /home/user/projects/monorepo, got %s", root)
115+
}
116+
117+
// A path inside projects but outside monorepo should match projects
118+
root, found = r.FindParentWorkspace("/home/user/projects/other-app/src")
119+
if !found {
120+
t.Fatal("expected to find parent workspace")
121+
}
122+
if root != "/home/user/projects" {
123+
t.Fatalf("expected parent /home/user/projects, got %s", root)
124+
}
125+
}

pkg/workspace/registry/registry.go

Lines changed: 37 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -243,6 +243,43 @@ func (r *Registry) LookupByRoot(root string) (*Entry, bool) {
243243
return r.entries[id], true
244244
}
245245

246+
// FindParentWorkspace checks if the given path is a subdirectory of any
247+
// registered workspace. If multiple parents exist (nested registrations),
248+
// returns the most specific (deepest) parent root. Returns ("", false) if
249+
// no parent found.
250+
// This prevents nested git repos (submodules, vendored projects) from being
251+
// treated as independent workspaces when they live inside an already-indexed project.
252+
func (r *Registry) FindParentWorkspace(path string) (string, bool) {
253+
r.mu.Lock()
254+
defer r.mu.Unlock()
255+
256+
cleanPath := strings.ToLower(filepath.Clean(path))
257+
if cleanPath == "" {
258+
return "", false
259+
}
260+
261+
var bestRoot string
262+
var bestLen int
263+
264+
for _, entry := range r.entries {
265+
entryRoot := strings.ToLower(filepath.Clean(entry.Root))
266+
// The path must be strictly inside the entry root (not equal to it)
267+
prefix := entryRoot + string(filepath.Separator)
268+
if strings.HasPrefix(cleanPath, prefix) {
269+
// Pick the deepest (most specific) parent
270+
if len(entryRoot) > bestLen {
271+
bestLen = len(entryRoot)
272+
bestRoot = entry.Root // preserve original casing
273+
}
274+
}
275+
}
276+
277+
if bestRoot == "" {
278+
return "", false
279+
}
280+
return bestRoot, true
281+
}
282+
246283
// LookupByName returns entries matching the provided name.
247284
func (r *Registry) LookupByName(name string) []*Entry {
248285
r.mu.Lock()

pkg/workspace/resolver/resolver.go

Lines changed: 23 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -21,6 +21,10 @@ type Registry interface {
2121
PromoteCandidate(ctx context.Context, root, client string, executionSucceeded bool) error
2222
RegisterWorkspace(root, name, client string) error
2323
GetActiveWorkspace() (string, error)
24+
// FindParentWorkspace checks if the given path is a subdirectory
25+
// of any already-registered workspace. Returns the parent root
26+
// and true if found, or ("", false) if no parent exists.
27+
FindParentWorkspace(path string) (string, bool)
2428
}
2529

2630
// RootValidator ensures resolved roots are within allowed boundaries.
@@ -162,6 +166,25 @@ func (r *Resolver) handleFilePath(ctx context.Context, path string) (*contract.R
162166
Reason: contract.ReasonInvalidPath,
163167
}
164168
}
169+
170+
// Guard: if the detected root is a subdirectory of an already-registered
171+
// workspace, prefer the parent. This prevents nested git repos (submodules,
172+
// vendored projects, monorepo sub-packages) from being treated as separate
173+
// workspaces when they live inside a project that is already indexed.
174+
if r.deps.Registry != nil {
175+
if parentRoot, found := r.deps.Registry.FindParentWorkspace(result.Root); found {
176+
r.log(ctx, "nested_workspace_override", map[string]any{
177+
"detected_root": result.Root,
178+
"parent_root": parentRoot,
179+
"reason": "detected root is subdirectory of registered workspace",
180+
})
181+
result.Root = parentRoot
182+
result.Reason = contract.ReasonFilePath
183+
result.Source = "nested_workspace_override"
184+
result.Confidence = 0.90 // slightly lower than direct detection
185+
}
186+
}
187+
165188
r.log(ctx, "file_path", map[string]any{"root": result.Root, "source": "file_path"})
166189
if result.Source == "" {
167190
result.Source = "file_path"

pkg/workspace/resolver/resolver_test.go

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -267,6 +267,10 @@ func (f *fakeRegistry) GetActiveWorkspace() (string, error) {
267267
return "", nil
268268
}
269269

270+
func (f *fakeRegistry) FindParentWorkspace(path string) (string, bool) {
271+
return "", false
272+
}
273+
270274
type fakeAnnotator struct {
271275
branch string
272276
headSHA string

pkg/workspace/tests/scenario_test.go

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -184,6 +184,10 @@ func (f *fakeRegistry) GetActiveWorkspace() (string, error) {
184184
return "", nil
185185
}
186186

187+
func (f *fakeRegistry) FindParentWorkspace(path string) (string, bool) {
188+
return "", false
189+
}
190+
187191
type branchstateAnnotator struct {
188192
mgr *branchstate.Manager
189193
}

0 commit comments

Comments
 (0)