Skip to content

Commit 6e41a41

Browse files
author
razvan
committed
Merge branch 'v2-detection-coreExactSearch' into feat/refine-installer-and-models
2 parents 2c06cda + 6af7bff commit 6e41a41

File tree

12 files changed

+877
-220
lines changed

12 files changed

+877
-220
lines changed

internal/service/engine/engine.go

Lines changed: 149 additions & 28 deletions
Original file line numberDiff line numberDiff line change
@@ -35,8 +35,20 @@ type Engine struct {
3535
// indexingJobs tracks active background indexing jobs.
3636
// Key: workspace ID, Value: start time
3737
indexingJobs sync.Map
38+
39+
// detectionCache stores resolved WorkspaceContext with TTL to avoid
40+
// repeated full resolver cascades for the same path.
41+
detectionCache sync.Map // map[string]*detectionCacheEntry
42+
}
43+
44+
// detectionCacheEntry wraps a cached WorkspaceContext with an expiry.
45+
type detectionCacheEntry struct {
46+
wctx *WorkspaceContext
47+
expiry time.Time
3848
}
3949

50+
const detectionCacheTTL = 5 * time.Second
51+
4052
func (e *Engine) GetSearchService() *search.Service {
4153
return e.search
4254
}
@@ -104,9 +116,41 @@ type WorkspaceContext struct {
104116
HeadSHA string
105117
}
106118

119+
// CollectionNameFor returns the Qdrant collection name for a workspace ID and language.
120+
func CollectionNameFor(wsID, lang string) string {
121+
return fmt.Sprintf("ragcode-%s-%s", wsID, lang)
122+
}
123+
124+
// CollectionName returns the Qdrant collection name for the given language.
125+
func (w *WorkspaceContext) CollectionName(lang string) string {
126+
return CollectionNameFor(w.ID, lang)
127+
}
128+
129+
// DetectFromParams resolves workspace context from a tool args map.
130+
// Reads file_path, workspace_root, or workspace keys (in that priority order).
131+
func (e *Engine) DetectFromParams(ctx context.Context, params map[string]interface{}) (*WorkspaceContext, error) {
132+
for _, key := range []string{"file_path", "workspace_root", "workspace"} {
133+
if v, ok := params[key].(string); ok && strings.TrimSpace(v) != "" {
134+
return e.DetectContext(ctx, v)
135+
}
136+
}
137+
return e.DetectContext(ctx, "")
138+
}
139+
107140
// DetectContext resolves the workspace context for a given path using the full resolver cascade.
108141
// If path is empty, it falls back to the last active workspace from the registry.
142+
// Results are cached with a 5s TTL to avoid redundant resolver invocations.
109143
func (e *Engine) DetectContext(ctx context.Context, path string) (*WorkspaceContext, error) {
144+
// Normalize cache key
145+
cacheKey := strings.TrimSpace(path)
146+
if entry, ok := e.detectionCache.Load(cacheKey); ok {
147+
ce := entry.(*detectionCacheEntry)
148+
if time.Now().Before(ce.expiry) {
149+
return ce.wctx, nil
150+
}
151+
e.detectionCache.Delete(cacheKey)
152+
}
153+
110154
req := contract.ResolveWorkspaceRequest{}
111155
source := "explicit_file_path"
112156

@@ -130,7 +174,7 @@ func (e *Engine) DetectContext(ctx context.Context, path string) (*WorkspaceCont
130174
return nil, fmt.Errorf("workspace detection failed: %s", wsErr.Message)
131175
}
132176

133-
return &WorkspaceContext{
177+
wctx := &WorkspaceContext{
134178
Root: resp.ResolvedRoot,
135179
ID: resp.WorkspaceID,
136180
Branch: resp.Branch,
@@ -139,7 +183,17 @@ func (e *Engine) DetectContext(ctx context.Context, path string) (*WorkspaceCont
139183
DetectionSource: source,
140184
ReindexRequired: resp.ReindexRequired,
141185
HeadSHA: resp.HeadSHA,
142-
}, nil
186+
}
187+
188+
// Don't cache entries that require reindex or are high-risk — let next call re-evaluate
189+
if !wctx.ReindexRequired && wctx.MismatchRisk != "high" {
190+
e.detectionCache.Store(cacheKey, &detectionCacheEntry{
191+
wctx: wctx,
192+
expiry: time.Now().Add(detectionCacheTTL),
193+
})
194+
}
195+
196+
return wctx, nil
143197
}
144198

145199
// GetActiveWorkspace returns the last confirmed workspace root from the resolver.
@@ -151,6 +205,7 @@ func (e *Engine) GetActiveWorkspace() (string, error) {
151205
type SearchCodeResult struct {
152206
Results []storage.SearchResult
153207
WorkspaceRoot string
208+
WorkspaceID string
154209
Collection string
155210
Language string
156211
MismatchRisk string
@@ -197,59 +252,124 @@ func (e *ErrIndexingStarted) Error() string {
197252
}
198253

199254
// SearchCode detects the workspace from filePath, resolves the correct collection,
200-
// and performs a semantic search. includeDocs=false searches code only.
201-
// If the collection does not exist, it triggers background indexing.
255+
// embeds the query ONCE, then fans out in parallel to all language collections.
256+
// includeDocs=false searches code only. Triggers background indexing if needed.
202257
func (e *Engine) SearchCode(ctx context.Context, filePath, queryText string, limit int, includeDocs bool) (*SearchCodeResult, error) {
203258
wctx, err := e.DetectContext(ctx, filePath)
204259
if err != nil {
205260
return nil, err
206261
}
207262

208-
// Detect language from file extension using the parser registry
209-
lang := "go" // default fallback
263+
// Primary language from file extension
264+
primaryLang := "go"
210265
if a := parser.GetByFile(filePath); a != nil {
211-
lang = a.Name()
266+
primaryLang = a.Name()
212267
}
213268

214-
collection := fmt.Sprintf("ragcode-%s-%s", wctx.ID, lang)
215-
216-
exists, err := e.search.CollectionExists(ctx, collection)
269+
// Ensure at least the primary collection exists before embedding (fast fail + indexing trigger)
270+
primaryColl := wctx.CollectionName(primaryLang)
271+
exists, err := e.search.CollectionExists(ctx, primaryColl)
217272
if err != nil {
218273
return nil, fmt.Errorf("failed to check collection: %w", err)
219274
}
220-
221275
if !exists {
222-
// Check if already indexing
223276
if _, ok := e.indexingJobs.Load(wctx.ID); ok {
224277
return nil, &ErrIndexingInProgress{WorkspaceRoot: wctx.Root}
225278
}
226-
227-
// Trigger background indexing
228279
e.StartIndexingAsync(wctx.Root, wctx.ID, nil, false)
229280
return nil, &ErrIndexingStarted{WorkspaceRoot: wctx.Root}
230281
}
231282

232-
// Trigger background indexing if resolver says re-index is required (e.g. branch change)
233283
if wctx.ReindexRequired {
234284
log.Printf("[INFO] Git state change detected (Head: %s), triggering background re-indexing for %s", wctx.HeadSHA, wctx.Root)
235285
e.StartIndexingAsync(wctx.Root, wctx.ID, nil, false)
236286
}
237287

238-
var results []storage.SearchResult
239-
if includeDocs {
240-
results, err = e.search.Search(ctx, collection, queryText, limit)
241-
} else {
242-
results, err = e.search.SearchCodeOnly(ctx, collection, queryText, limit)
288+
// Embed ONCE, fan-out to all language collections in parallel
289+
langs := parser.SupportedLanguages()
290+
if len(langs) == 0 {
291+
langs = []string{"go", "python", "php", "html"}
243292
}
293+
294+
vector, err := e.search.EmbedQuery(ctx, queryText)
244295
if err != nil {
245-
return nil, fmt.Errorf("search failed: %w", err)
296+
return nil, fmt.Errorf("embedding failed: %w", err)
297+
}
298+
299+
type langResult struct {
300+
lang string
301+
coll string
302+
results []storage.SearchResult
303+
err error
304+
}
305+
306+
resultsChan := make(chan langResult, len(langs))
307+
var wg sync.WaitGroup
308+
309+
for _, lang := range langs {
310+
coll := wctx.CollectionName(lang)
311+
wg.Add(1)
312+
go func(l, c string) {
313+
defer wg.Done()
314+
ok, chkErr := e.search.CollectionExists(ctx, c)
315+
if chkErr != nil || !ok {
316+
return
317+
}
318+
var res []storage.SearchResult
319+
var sErr error
320+
if includeDocs {
321+
res, sErr = e.search.SearchWithVector(ctx, c, vector, limit)
322+
} else {
323+
res, sErr = e.search.SearchCodeWithVector(ctx, c, vector, limit)
324+
}
325+
if sErr != nil {
326+
log.Printf("[WARN] SearchCode: fan-out failed for %s: %v", c, sErr)
327+
resultsChan <- langResult{lang: l, coll: c, err: sErr}
328+
return
329+
}
330+
if len(res) > 0 {
331+
resultsChan <- langResult{lang: l, coll: c, results: res}
332+
}
333+
}(lang, coll)
334+
}
335+
336+
wg.Wait()
337+
close(resultsChan)
338+
339+
// Merge: primary lang results first, others appended; surface first error if no results
340+
var primaryResults []storage.SearchResult
341+
var otherResults []storage.SearchResult
342+
var firstErr error
343+
reportLang := primaryLang
344+
reportColl := primaryColl
345+
346+
for lr := range resultsChan {
347+
if lr.err != nil {
348+
if firstErr == nil {
349+
firstErr = lr.err
350+
}
351+
continue
352+
}
353+
if lr.coll == primaryColl {
354+
primaryResults = lr.results
355+
} else {
356+
otherResults = append(otherResults, lr.results...)
357+
}
358+
}
359+
360+
all := append(primaryResults, otherResults...)
361+
362+
// If nothing was found and there were errors, surface the error
363+
if len(all) == 0 && firstErr != nil {
364+
return nil, fmt.Errorf("search failed: %w", firstErr)
246365
}
247366

248367
return &SearchCodeResult{
249-
Results: results,
368+
Results: all,
250369
WorkspaceRoot: wctx.Root,
251-
Collection: collection,
252-
Language: lang,
370+
WorkspaceID: wctx.ID,
371+
Collection: reportColl,
372+
Language: reportLang,
253373
MismatchRisk: wctx.MismatchRisk,
254374
DetectionSource: wctx.DetectionSource,
255375
}, nil
@@ -268,7 +388,7 @@ func (e *Engine) HybridSearchCode(ctx context.Context, filePath, queryText strin
268388
lang = a.Name()
269389
}
270390

271-
collection := fmt.Sprintf("ragcode-%s-%s", wctx.ID, lang)
391+
collection := wctx.CollectionName(lang)
272392

273393
exists, err := e.search.CollectionExists(ctx, collection)
274394
if err != nil {
@@ -296,6 +416,7 @@ func (e *Engine) HybridSearchCode(ctx context.Context, filePath, queryText strin
296416
return &SearchCodeResult{
297417
Results: results,
298418
WorkspaceRoot: wctx.Root,
419+
WorkspaceID: wctx.ID,
299420
Collection: collection,
300421
Language: lang,
301422
MismatchRisk: wctx.MismatchRisk,
@@ -324,7 +445,7 @@ func (e *Engine) ExactSearchPolyglot(ctx context.Context, wsID string, filters m
324445
var existingCollections int32
325446

326447
for _, lang := range langs {
327-
collection := fmt.Sprintf("ragcode-%s-%s", wsID, lang)
448+
collection := CollectionNameFor(wsID, lang)
328449
wg.Add(1)
329450
go func(coll string) {
330451
defer wg.Done()
@@ -419,7 +540,7 @@ func (e *Engine) IndexFiles(ctx context.Context, root string, files []string) er
419540
lang = a.Name()
420541
}
421542
}
422-
collection := fmt.Sprintf("ragcode-%s-%s", wctx.ID, lang)
543+
collection := wctx.CollectionName(lang)
423544

424545
for _, p := range files {
425546
if err := e.indexer.IndexFile(ctx, collection, p, state); err != nil {
@@ -449,7 +570,7 @@ func (e *Engine) IndexWorkspace(ctx context.Context, path string, recreate bool)
449570
languages := parser.SupportedLanguages()
450571

451572
for _, lang := range languages {
452-
collection := fmt.Sprintf("ragcode-%s-%s", wctx.ID, lang)
573+
collection := wctx.CollectionName(lang)
453574
err := e.indexer.IndexWorkspace(ctx, wctx.Root, collection, indexer.Options{
454575
Language: lang,
455576
ExcludePatterns: e.config.Workspace.ExcludePatterns,

0 commit comments

Comments
 (0)