99import json
1010import logging
1111import os
12+ import re
1213import tempfile
1314import threading
1415import fnmatch
1516from pathlib import Path
1617from typing import Dict , List , Optional , Any
1718
1819from .json_index_builder import JSONIndexBuilder
19- from ..constants import SETTINGS_DIR , INDEX_FILE
20+ from ..constants import SETTINGS_DIR , INDEX_FILE , INDEX_FILE_SHALLOW
2021
2122logger = logging .getLogger (__name__ )
2223
@@ -29,6 +30,8 @@ def __init__(self):
2930 self .index_builder : Optional [JSONIndexBuilder ] = None
3031 self .temp_dir : Optional [str ] = None
3132 self .index_path : Optional [str ] = None
33+ self .shallow_index_path : Optional [str ] = None
34+ self ._shallow_file_list : Optional [List [str ]] = None
3235 self ._lock = threading .RLock ()
3336 logger .info ("Initialized JSON Index Manager" )
3437
@@ -59,6 +62,7 @@ def set_project_path(self, project_path: str) -> bool:
5962 os .makedirs (self .temp_dir , exist_ok = True )
6063
6164 self .index_path = os .path .join (self .temp_dir , INDEX_FILE )
65+ self .shallow_index_path = os .path .join (self .temp_dir , INDEX_FILE_SHALLOW )
6266
6367 logger .info (f"Set project path: { project_path } " )
6468 logger .info (f"Index storage: { self .index_path } " )
@@ -114,6 +118,52 @@ def load_index(self) -> bool:
114118 logger .error (f"Failed to load index: { e } " )
115119 return False
116120
121+ def build_shallow_index (self ) -> bool :
122+ """Build and save the minimal shallow index (file list)."""
123+ with self ._lock :
124+ if not self .index_builder or not self .project_path or not self .shallow_index_path :
125+ logger .error ("Index builder not initialized for shallow index" )
126+ return False
127+
128+ try :
129+ file_list = self .index_builder .build_shallow_file_list ()
130+ # Persist as a JSON array for minimal overhead
131+ with open (self .shallow_index_path , 'w' , encoding = 'utf-8' ) as f :
132+ json .dump (file_list , f , ensure_ascii = False )
133+ self ._shallow_file_list = file_list
134+ logger .info (f"Saved shallow index with { len (file_list )} files to { self .shallow_index_path } " )
135+ return True
136+ except Exception as e :
137+ logger .error (f"Failed to build shallow index: { e } " )
138+ return False
139+
140+ def load_shallow_index (self ) -> bool :
141+ """Load shallow index (file list) from disk into memory."""
142+ with self ._lock :
143+ try :
144+ if not self .shallow_index_path or not os .path .exists (self .shallow_index_path ):
145+ logger .warning ("No existing shallow index found" )
146+ return False
147+ with open (self .shallow_index_path , 'r' , encoding = 'utf-8' ) as f :
148+ data = json .load (f )
149+ if not isinstance (data , list ):
150+ logger .error ("Shallow index format invalid (expected list)" )
151+ return False
152+ # Normalize paths
153+ normalized = []
154+ for p in data :
155+ if isinstance (p , str ):
156+ q = p .replace ('\\ \\ ' , '/' ).replace ('\\ ' , '/' )
157+ if q .startswith ('./' ):
158+ q = q [2 :]
159+ normalized .append (q )
160+ self ._shallow_file_list = normalized
161+ logger .info (f"Loaded shallow index with { len (normalized )} files" )
162+ return True
163+ except Exception as e :
164+ logger .error (f"Failed to load shallow index: { e } " )
165+ return False
166+
117167 def refresh_index (self ) -> bool :
118168 """Refresh the index (rebuild and reload)."""
119169 with self ._lock :
@@ -123,7 +173,14 @@ def refresh_index(self) -> bool:
123173 return False
124174
125175 def find_files (self , pattern : str = "*" ) -> List [str ]:
126- """Find files matching a pattern."""
176+ """
177+ Find files matching a glob pattern using the SHALLOW file list only.
178+
179+ Notes:
180+ - '*' does not cross '/'
181+ - '**' matches across directories
182+ - Always sources from the shallow index for consistency and speed
183+ """
127184 with self ._lock :
128185 # Input validation
129186 if not isinstance (pattern , str ):
@@ -134,18 +191,27 @@ def find_files(self, pattern: str = "*") -> List[str]:
134191 if not pattern :
135192 pattern = "*"
136193
137- if not self .index_builder or not self .index_builder .in_memory_index :
138- logger .warning ("Index not loaded" )
139- return []
194+ # Normalize to forward slashes
195+ norm_pattern = pattern .replace ('\\ \\ ' , '/' ).replace ('\\ ' , '/' )
196+
197+ # Build glob regex: '*' does not cross '/', '**' crosses directories
198+ regex = self ._compile_glob_regex (norm_pattern )
140199
200+ # Always use shallow index for file discovery
141201 try :
142- files = list (self .index_builder .in_memory_index ["files" ].keys ())
202+ if self ._shallow_file_list is None :
203+ # Try load existing shallow index; if missing, build then load
204+ if not self .load_shallow_index ():
205+ # If still not available, attempt to build
206+ if self .build_shallow_index ():
207+ self .load_shallow_index ()
143208
144- if pattern == "*" :
209+ files = list (self ._shallow_file_list or [])
210+
211+ if norm_pattern == "*" :
145212 return files
146213
147- # Simple pattern matching
148- return [f for f in files if fnmatch .fnmatch (f , pattern )]
214+ return [f for f in files if regex .match (f ) is not None ]
149215
150216 except Exception as e :
151217 logger .error (f"Error finding files: { e } " )
@@ -356,6 +422,39 @@ def cleanup(self):
356422 self .index_path = None
357423 logger .info ("Cleaned up JSON Index Manager" )
358424
425+ @staticmethod
426+ def _compile_glob_regex (pattern : str ) -> re .Pattern :
427+ """
428+ Compile a glob pattern where '*' does not match '/', and '**' matches across directories.
429+
430+ Examples:
431+ src/*.py -> direct children .py under src
432+ **/*.py -> .py at any depth
433+ """
434+ # Translate glob to regex
435+ i = 0
436+ out = []
437+ special = ".^$+{}[]|()"
438+ while i < len (pattern ):
439+ c = pattern [i ]
440+ if c == '*' :
441+ if i + 1 < len (pattern ) and pattern [i + 1 ] == '*' :
442+ # '**' -> match across directories
443+ out .append ('.*' )
444+ i += 2
445+ continue
446+ else :
447+ out .append ('[^/]*' )
448+ elif c == '?' :
449+ out .append ('[^/]' )
450+ elif c in special :
451+ out .append ('\\ ' + c )
452+ else :
453+ out .append (c )
454+ i += 1
455+ regex_str = '^' + '' .join (out ) + '$'
456+ return re .compile (regex_str )
457+
359458
360459# Global instance
361460_index_manager = JSONIndexManager ()
@@ -364,4 +463,3 @@ def cleanup(self):
364463def get_index_manager () -> JSONIndexManager :
365464 """Get the global index manager instance."""
366465 return _index_manager
367-
0 commit comments