@@ -45,18 +45,18 @@ class JSONIndexBuilder:
4545
4646 def __init__ (self , project_path : str , additional_excludes : Optional [List [str ]] = None ):
4747 from ..utils import FileFilter
48-
48+
4949 # Input validation
5050 if not isinstance (project_path , str ):
5151 raise ValueError (f"Project path must be a string, got { type (project_path )} " )
52-
52+
5353 project_path = project_path .strip ()
5454 if not project_path :
5555 raise ValueError ("Project path cannot be empty" )
56-
56+
5757 if not os .path .isdir (project_path ):
5858 raise ValueError (f"Project path does not exist: { project_path } " )
59-
59+
6060 self .project_path = project_path
6161 self .in_memory_index : Optional [Dict [str , Any ]] = None
6262 self .strategy_factory = StrategyFactory ()
@@ -74,11 +74,11 @@ def __init__(self, project_path: str, additional_excludes: Optional[List[str]] =
7474 def _process_file (self , file_path : str , specialized_extensions : set ) -> Optional [Tuple [Dict , Dict , str , bool ]]:
7575 """
7676 Process a single file - designed for parallel execution.
77-
77+
7878 Args:
7979 file_path: Path to the file to process
8080 specialized_extensions: Set of extensions with specialized parsers
81-
81+
8282 Returns:
8383 Tuple of (symbols, file_info, language, is_specialized) or None on error
8484 """
@@ -88,20 +88,20 @@ def _process_file(self, file_path: str, specialized_extensions: set) -> Optional
8888
8989 ext = Path (file_path ).suffix .lower ()
9090 rel_path = os .path .relpath (file_path , self .project_path ).replace ('\\ ' , '/' )
91-
91+
9292 # Get appropriate strategy
9393 strategy = self .strategy_factory .get_strategy (ext )
94-
94+
9595 # Track strategy usage
9696 is_specialized = ext in specialized_extensions
97-
97+
9898 # Parse file using strategy
9999 symbols , file_info = strategy .parse_file (rel_path , content )
100-
100+
101101 logger .debug (f"Parsed { rel_path } : { len (symbols )} symbols ({ file_info .language } )" )
102-
102+
103103 return (symbols , {rel_path : file_info }, file_info .language , is_specialized )
104-
104+
105105 except Exception as e :
106106 logger .warning (f"Error processing { file_path } : { e } " )
107107 return None
@@ -128,49 +128,49 @@ def build_index(self, parallel: bool = True, max_workers: Optional[int] = None)
128128
129129 # Get specialized extensions for tracking
130130 specialized_extensions = set (self .strategy_factory .get_specialized_extensions ())
131-
131+
132132 # Get list of files to process
133133 files_to_process = self ._get_supported_files ()
134134 total_files = len (files_to_process )
135-
135+
136136 if total_files == 0 :
137137 logger .warning ("No files to process" )
138138 return self ._create_empty_index ()
139-
139+
140140 logger .info (f"Processing { total_files } files..." )
141-
141+
142142 if parallel and total_files > 1 :
143143 # Use ThreadPoolExecutor for I/O-bound file reading
144144 # ProcessPoolExecutor has issues with strategy sharing
145145 if max_workers is None :
146146 max_workers = min (os .cpu_count () or 4 , total_files )
147-
147+
148148 logger .info (f"Using parallel processing with { max_workers } workers" )
149-
149+
150150 with ThreadPoolExecutor (max_workers = max_workers ) as executor :
151151 # Submit all tasks
152152 future_to_file = {
153153 executor .submit (self ._process_file , file_path , specialized_extensions ): file_path
154154 for file_path in files_to_process
155155 }
156-
156+
157157 # Process completed tasks
158158 processed = 0
159159 for future in as_completed (future_to_file ):
160160 file_path = future_to_file [future ]
161161 result = future .result ()
162-
162+
163163 if result :
164164 symbols , file_info_dict , language , is_specialized = result
165165 all_symbols .update (symbols )
166166 all_files .update (file_info_dict )
167167 languages .add (language )
168-
168+
169169 if is_specialized :
170170 specialized_count += 1
171171 else :
172172 fallback_count += 1
173-
173+
174174 processed += 1
175175 if processed % 100 == 0 :
176176 logger .debug (f"Processed { processed } /{ total_files } files" )
@@ -184,7 +184,7 @@ def build_index(self, parallel: bool = True, max_workers: Optional[int] = None)
184184 all_symbols .update (symbols )
185185 all_files .update (file_info_dict )
186186 languages .add (language )
187-
187+
188188 if is_specialized :
189189 specialized_count += 1
190190 else :
@@ -218,7 +218,7 @@ def build_index(self, parallel: bool = True, max_workers: Optional[int] = None)
218218 logger .info (f"Strategy usage: { specialized_count } specialized, { fallback_count } fallback" )
219219
220220 return index
221-
221+
222222 def _create_empty_index (self ) -> Dict [str , Any ]:
223223 """Create an empty index structure."""
224224 metadata = IndexMetadata (
@@ -231,7 +231,7 @@ def _create_empty_index(self) -> Dict[str, Any]:
231231 specialized_parsers = 0 ,
232232 fallback_files = 0
233233 )
234-
234+
235235 return {
236236 "metadata" : asdict (metadata ),
237237 "symbols" : {},
@@ -371,24 +371,24 @@ def get_file_symbols(self, file_path: str) -> List[Dict[str, Any]]:
371371 # Work directly with global symbols for this file
372372 global_symbols = self .in_memory_index .get ("symbols" , {})
373373 result = []
374-
374+
375375 # Find all symbols for this file directly from global symbols
376376 for symbol_id , symbol_data in global_symbols .items ():
377377 symbol_file = symbol_data .get ("file" , "" ).replace ("\\ " , "/" )
378-
378+
379379 # Check if this symbol belongs to our file
380380 if symbol_file == file_path :
381381 symbol_type = symbol_data .get ("type" , "unknown" )
382382 symbol_name = symbol_id .split ("::" )[- 1 ] # Extract symbol name from ID
383-
383+
384384 # Create symbol info
385385 symbol_info = {
386386 "name" : symbol_name ,
387387 "called_by" : symbol_data .get ("called_by" , []),
388388 "line" : symbol_data .get ("line" ),
389389 "signature" : symbol_data .get ("signature" )
390390 }
391-
391+
392392 # Categorize by type
393393 if symbol_type in ["function" , "method" ]:
394394 result .append (symbol_info )
@@ -397,7 +397,7 @@ def get_file_symbols(self, file_path: str) -> List[Dict[str, Any]]:
397397
398398 # Sort by line number for consistent ordering
399399 result .sort (key = lambda x : x .get ("line" , 0 ))
400-
400+
401401 return result
402402
403403 except Exception as e :
0 commit comments