@@ -22,76 +22,6 @@ import (
2222 "github.com/doITmagic/rag-code-mcp/pkg/storage"
2323)
2424
25- // getSystemMemoryGB attempts to read total system memory from /proc/meminfo (Linux).
26- // Returns 0 if unable to read.
27- func getSystemMemoryGB () int {
28- if runtime .GOOS != "linux" {
29- return 0
30- }
31- data , err := os .ReadFile ("/proc/meminfo" )
32- if err != nil {
33- return 0
34- }
35- lines := strings .Split (string (data ), "\n " )
36- for _ , line := range lines {
37- if strings .HasPrefix (line , "MemTotal:" ) {
38- var kb int
39- if _ , err := fmt .Sscanf (line , "MemTotal: %d kB" , & kb ); err == nil {
40- return kb / (1024 * 1024 )
41- }
42- }
43- }
44- return 0
45- }
46-
47- // globalIndexSemaphore limits the total number of concurrent file-indexing workers
48- // across ALL active workspace indexing jobs.
49- // To prevent Ollama OOM while maximizing speed, we scale concurrency based on system RAM:
50- // - <= 8GB RAM: 1 worker (Survival mode)
51- // - <= 16GB RAM: 2 workers
52- // - <= 32GB RAM: 3 workers
53- // - > 32GB RAM: 4 workers (Max Cap for high-end systems to leave RAM for OS/IDE)
54- var globalIndexSemaphore = func () chan struct {} {
55- n := runtime .NumCPU () / 4
56- if n < 2 {
57- n = 2
58- }
59- if n > 4 {
60- n = 4
61- }
62-
63- memGB := getSystemMemoryGB ()
64- if memGB > 0 {
65- var ramWorkers int
66- switch {
67- case memGB <= 8 :
68- ramWorkers = 1
69- case memGB <= 16 :
70- ramWorkers = 2
71- case memGB <= 32 :
72- ramWorkers = 3
73- default :
74- ramWorkers = 4
75- }
76-
77- // Take the minimum between CPU-recommended workers and RAM-allowed workers
78- if ramWorkers < n {
79- n = ramWorkers
80- }
81-
82- logger .Instance .Info ("🧠 Detected %dGB RAM. Dynamic indexing concurrency set to %d workers." , memGB , n )
83- } else {
84- // Fallback for non-Linux or failures, strictly safe
85- logger .Instance .Warn ("🧠 Could not detect system RAM. Defaulting to safe concurrency limit of 1 worker." )
86- n = 1
87- }
88-
89- ch := make (chan struct {}, n )
90- for i := 0 ; i < n ; i ++ {
91- ch <- struct {}{}
92- }
93- return ch
94- }()
9525
9626const (
9727 deleteCollectionTimeout = 10 * time .Second
@@ -101,6 +31,7 @@ const (
10131// Options configures the indexer.
10232type Options struct {
10333 Language string
34+ WorkspaceName string // basename of workspace root, used for logging
10435 ExcludePatterns []string
10536 Recreate bool
10637 Progress func (doneFiles , totalFiles int )
@@ -220,34 +151,25 @@ func (s *Service) IndexWorkspace(ctx context.Context, root string, collection st
220151 }
221152 }
222153
154+ wsName := opts .WorkspaceName
155+ if wsName == "" {
156+ wsName = filepath .Base (root )
157+ }
158+
223159 totalFiles := len (changedFiles )
224- logger .Instance .Info ("Indexing %d file(s) in %s (Language: %s) " , totalFiles , root , opts .Language )
160+ logger .Instance .Info ("[IDX] ws=%s lang= %s ▶ %d file(s) to index " , wsName , opts .Language , totalFiles )
225161
226162 // Ensure the embedding model is loaded in Ollama's memory before starting.
227163 // If another program evicted it, this will reload it (with up to 2min timeout).
228164 if ollamaProvider , ok := unwrapOllamaProvider (s .embedder ); ok {
229165 if err := ollamaProvider .EnsureLoaded (ctx ); err != nil {
230- logger .Instance .Error ("Cannot ensure embedding model is loaded : %v" , err )
166+ logger .Instance .Error ("[IDX] ws=%s lang=%s ❌ embedding model not available : %v" , wsName , opts . Language , err )
231167 return fmt .Errorf ("embedding model not available: %w" , err )
232168 }
233169 }
234170
235- // 4. Process changed files using the global semaphore to cap total concurrency
236- // across all active workspace indexing jobs (prevents CPU/RAM overload).
237- numFileWorkers := cap (globalIndexSemaphore )
238-
239- filePaths := make (chan string , totalFiles )
240- for _ , p := range changedFiles {
241- filePaths <- p
242- }
243- close (filePaths )
244-
245- var (
246- fileWg sync.WaitGroup
247- errMu sync.Mutex
248- fileErrs []string
249- doneFiles atomic.Int64
250- )
171+ // 4. File-level counters for watchdog (accessed from two goroutines via atomic).
172+ var doneFiles atomic.Int64
251173
252174 // Dedicated periodic-save goroutine + stall watchdog: detects silent deadlocks.
253175 saveStop := make (chan struct {})
@@ -261,35 +183,29 @@ func (s *Service) IndexWorkspace(ctx context.Context, root string, collection st
261183 select {
262184 case <- saveTicker .C :
263185 if err := state .Save (statePath ); err != nil {
264- logger .Instance .Warn ("Periodic state save failed for %s : %v" , root , err )
186+ logger .Instance .Warn ("[IDX] ws=%s lang=%s periodic state save failed: %v" , wsName , opts . Language , err )
265187 }
266188 case <- stallTicker .C :
267189 current := doneFiles .Load ()
268- // We only trigger stall logic if we haven't finished all files AND
269- // we haven't successfully embedded ANY new symbol in the last 60 seconds.
270190 lastActivitySec := s .lastActivity .Load ()
271191 elapsedSinceActivity := time .Now ().Unix () - lastActivitySec
272192
273193 if current < int64 (totalFiles ) && elapsedSinceActivity >= 60 {
274194 stallCount ++
275- semLen := len (globalIndexSemaphore )
276- semCap := cap (globalIndexSemaphore )
277- logger .Instance .Warn ("⚠️ Indexing stall detected for %s/%s: %d/%d files. No embed activity for %ds. Semaphore: %d/%d. Stall count: %d" ,
278- opts .Language , root , current , totalFiles , elapsedSinceActivity , semLen , semCap , stallCount )
195+ logger .Instance .Warn ("[IDX] ws=%s lang=%s ⚠️ STALL: no embed activity for %ds [%d/%d] (stall #%d)" ,
196+ wsName , opts .Language , elapsedSinceActivity , current , totalFiles , stallCount )
279197 if stallCount >= 2 {
280- // Check if Ollama is still alive
281198 if err := healthcheck .PingOllama ("" ); err != nil {
282- logger .Instance .Error ("🔴 Ollama HTTP is unresponsive (%v). Forcing restart..." , err )
199+ logger .Instance .Error ("[IDX] ws=%s lang=%s ❌ Ollama unresponsive: %v — forcing restart" , wsName , opts . Language , err )
283200 } else {
284- logger .Instance .Error ("🔴 Ollama HTTP ping is OK but embedding goroutines are DEADLOCKED. Forcing restart!" )
201+ logger .Instance .Error ("[IDX] ws=%s lang=%s ❌ Ollama ping OK but embed goroutine STALLED — forcing restart" , wsName , opts . Language )
285202 }
286- // Always attempt strict restart to kill stuck runners
287203 attemptOllamaRestart ()
288204 }
289205 if stallCount >= 3 {
290206 buf := make ([]byte , 8192 )
291207 n := runtime .Stack (buf , true )
292- logger .Instance .Error ("🔴 Deadlock confirmed in indexing goroutines for %s. Goroutine dump:\n %s" , root , string (buf [:n ]))
208+ logger .Instance .Error ("[IDX] ws=%s lang=%s goroutine dump:\n %s" , wsName , opts . Language , string (buf [:n ]))
293209 }
294210 } else {
295211 stallCount = 0
@@ -300,53 +216,42 @@ func (s *Service) IndexWorkspace(ctx context.Context, root string, collection st
300216 }
301217 }()
302218
303- for i := 0 ; i < numFileWorkers ; i ++ {
304- fileWg .Add (1 )
305- go func () {
306- defer fileWg .Done ()
307- for path := range filePaths {
308- // Acquire global slot — blocks if too many concurrent indexers active
309- <- globalIndexSemaphore
310- n := int (doneFiles .Add (1 ))
311- pct := 0
312- if totalFiles > 0 {
313- pct = n * 100 / totalFiles
314- }
219+ // 5. Sequential file processing — no worker pool, no semaphore.
220+ // Embed is serial in Ollama anyway (numWorkers=1 in IndexItems), so parallelism
221+ // here only added complexity without meaningful throughput gain.
222+ var fileErrs []string
223+ for _ , path := range changedFiles {
224+ n := int (doneFiles .Add (1 ))
225+ pct := n * 100 / totalFiles
315226
316- logger .Instance .Info ("📄 [%d/%d] %s (%s, %d%%)" , n , totalFiles , filepath .Base (path ), opts .Language , pct )
317- if opts .Progress != nil {
318- opts .Progress (n , totalFiles )
319- }
227+ logger .Instance .Debug ("[IDX] ws=%s lang=%s [%d/%d] %s (%d%%)" ,
228+ wsName , opts .Language , n , totalFiles , filepath .Base (path ), pct )
320229
321- symCount , indexErr := s . IndexFile ( ctx , collection , path , state )
322- // Release slot immediately after processing
323- globalIndexSemaphore <- struct {}{ }
230+ if opts . Progress != nil {
231+ opts . Progress ( n , totalFiles )
232+ }
324233
325- if indexErr != nil {
326- logger .Instance .Error ("Failed to index %s: %v" , path , indexErr )
327- errMu .Lock ()
328- fileErrs = append (fileErrs , fmt .Sprintf ("%s: %v" , path , indexErr ))
329- errMu .Unlock ()
330- } else {
331- logger .Instance .Info (" → %d symbol(s) indexed from %s" , symCount , filepath .Base (path ))
332- }
333- }
334- }()
234+ symCount , indexErr := s .IndexFile (ctx , collection , path , state )
235+ if indexErr != nil {
236+ logger .Instance .Warn ("[IDX] ws=%s lang=%s ⚠️ %s: %v" , wsName , opts .Language , filepath .Base (path ), indexErr )
237+ fileErrs = append (fileErrs , fmt .Sprintf ("%s: %v" , path , indexErr ))
238+ } else {
239+ logger .Instance .Debug ("[IDX] ws=%s lang=%s %s → %d symbol(s)" , wsName , opts .Language , filepath .Base (path ), symCount )
240+ }
335241 }
336242
337- fileWg .Wait ()
338- close (saveStop ) // Stop the periodic save goroutine
339- logger .Instance .Info ("[INDEX] %s done: %d file(s) indexed" , opts .Language , totalFiles )
243+ close (saveStop )
340244
341245 if len (fileErrs ) > 0 {
342- logger .Instance .Warn ("% d file(s) failed to index in %s" , len (fileErrs ), root )
246+ logger .Instance .Warn ("[IDX] ws=%s lang=%s % d file(s) failed to index" , wsName , opts . Language , len (fileErrs ))
343247 }
344248
345- // 5 . Save state
249+ // 6 . Save state
346250 if err := state .Save (statePath ); err != nil {
347- logger .Instance .Warn ("Failed to save index state for %s : %v" , root , err )
251+ logger .Instance .Warn ("[IDX] ws=%s lang=%s failed to save state : %v" , wsName , opts . Language , err )
348252 }
349253
254+ logger .Instance .Info ("[IDX] ws=%s lang=%s ✅ DONE %d file(s)" , wsName , opts .Language , totalFiles )
350255 return nil
351256}
352257
0 commit comments