@@ -21,49 +21,46 @@ def parse_file(self, file_path: str, content: str) -> Tuple[Dict[str, SymbolInfo
2121 """Parse Go file using regex patterns."""
2222 symbols = {}
2323 functions = []
24+ lines = content .splitlines ()
2425 classes = [] # Go doesn't have classes, but we'll track structs/interfaces
25- imports = []
26+ imports = self . _extract_go_imports ( lines )
2627 package = None
2728
28- lines = content .splitlines ()
29-
3029 for i , line in enumerate (lines ):
3130 line = line .strip ()
3231
3332 # Package declaration
3433 if line .startswith ('package ' ):
3534 package = line .split ('package ' )[1 ].strip ()
3635
37- # Import statements
38- elif line .startswith ('import ' ):
39- import_match = re .search (r'import\s+"([^"]+)"' , line )
40- if import_match :
41- imports .append (import_match .group (1 ))
42-
4336 # Function declarations
4437 elif line .startswith ('func ' ):
4538 func_match = re .match (r'func\s+(\w+)\s*\(' , line )
4639 if func_match :
4740 func_name = func_match .group (1 )
41+ docstring = self ._extract_go_comment (lines , i )
4842 symbol_id = self ._create_symbol_id (file_path , func_name )
4943 symbols [symbol_id ] = SymbolInfo (
5044 type = "function" ,
5145 file = file_path ,
5246 line = i + 1 ,
53- signature = line
47+ signature = line ,
48+ docstring = docstring
5449 )
5550 functions .append (func_name )
5651
5752 # Method declarations (func (receiver) methodName)
5853 method_match = re .match (r'func\s+\([^)]+\)\s+(\w+)\s*\(' , line )
5954 if method_match :
6055 method_name = method_match .group (1 )
56+ docstring = self ._extract_go_comment (lines , i )
6157 symbol_id = self ._create_symbol_id (file_path , method_name )
6258 symbols [symbol_id ] = SymbolInfo (
6359 type = "method" ,
6460 file = file_path ,
6561 line = i + 1 ,
66- signature = line
62+ signature = line ,
63+ docstring = docstring
6764 )
6865 functions .append (method_name )
6966
@@ -72,11 +69,13 @@ def parse_file(self, file_path: str, content: str) -> Tuple[Dict[str, SymbolInfo
7269 struct_match = re .match (r'type\s+(\w+)\s+struct' , line )
7370 if struct_match :
7471 struct_name = struct_match .group (1 )
72+ docstring = self ._extract_go_comment (lines , i )
7573 symbol_id = self ._create_symbol_id (file_path , struct_name )
7674 symbols [symbol_id ] = SymbolInfo (
7775 type = "struct" ,
7876 file = file_path ,
79- line = i + 1
77+ line = i + 1 ,
78+ docstring = docstring
8079 )
8180 classes .append (struct_name )
8281
@@ -85,11 +84,13 @@ def parse_file(self, file_path: str, content: str) -> Tuple[Dict[str, SymbolInfo
8584 interface_match = re .match (r'type\s+(\w+)\s+interface' , line )
8685 if interface_match :
8786 interface_name = interface_match .group (1 )
87+ docstring = self ._extract_go_comment (lines , i )
8888 symbol_id = self ._create_symbol_id (file_path , interface_name )
8989 symbols [symbol_id ] = SymbolInfo (
9090 type = "interface" ,
9191 file = file_path ,
92- line = i + 1
92+ line = i + 1 ,
93+ docstring = docstring
9394 )
9495 classes .append (interface_name )
9596
@@ -147,6 +148,200 @@ def _extract_go_function_name(self, line: str) -> Optional[str]:
147148 pass
148149 return None
149150
151+ def _extract_go_imports (self , lines : List [str ]) -> List [str ]:
152+ """Extract Go import paths, handling multi-line blocks and comments."""
153+ imports : List [str ] = []
154+ in_block_comment = False
155+ paren_depth = 0
156+
157+ for raw_line in lines :
158+ clean_line , in_block_comment = self ._strip_go_comments (raw_line , in_block_comment )
159+ stripped = clean_line .strip ()
160+
161+ if not stripped :
162+ continue
163+
164+ if paren_depth == 0 :
165+ if not stripped .startswith ('import ' ):
166+ continue
167+
168+ remainder = stripped [len ('import ' ):].strip ()
169+ if not remainder :
170+ continue
171+
172+ imports .extend (self ._extract_string_literals (remainder ))
173+
174+ paren_depth = (
175+ self ._count_unquoted_characters (remainder , '(' )
176+ - self ._count_unquoted_characters (remainder , ')' )
177+ )
178+ if paren_depth <= 0 :
179+ paren_depth = 0
180+ continue
181+
182+ imports .extend (self ._extract_string_literals (clean_line ))
183+ paren_depth += self ._count_unquoted_characters (clean_line , '(' )
184+ paren_depth -= self ._count_unquoted_characters (clean_line , ')' )
185+ if paren_depth <= 0 :
186+ paren_depth = 0
187+
188+ return imports
189+
190+ def _strip_go_comments (self , line : str , in_block_comment : bool ) -> Tuple [str , bool ]:
191+ """Remove Go comments from a line while tracking block comment state."""
192+ result : List [str ] = []
193+ i = 0
194+ length = len (line )
195+
196+ while i < length :
197+ if in_block_comment :
198+ if line .startswith ('*/' , i ):
199+ in_block_comment = False
200+ i += 2
201+ else :
202+ i += 1
203+ continue
204+
205+ if line .startswith ('//' , i ):
206+ break
207+
208+ if line .startswith ('/*' , i ):
209+ in_block_comment = True
210+ i += 2
211+ continue
212+
213+ result .append (line [i ])
214+ i += 1
215+
216+ return '' .join (result ), in_block_comment
217+
218+ def _extract_string_literals (self , line : str ) -> List [str ]:
219+ """Return string literal values found in a line (supports " and `)."""
220+ literals : List [str ] = []
221+ i = 0
222+ length = len (line )
223+
224+ while i < length :
225+ char = line [i ]
226+ if char not in ('"' , '`' ):
227+ i += 1
228+ continue
229+
230+ delimiter = char
231+ i += 1
232+ buffer : List [str ] = []
233+ while i < length :
234+ current = line [i ]
235+ if delimiter == '"' :
236+ if current == '\\ ' :
237+ if i + 1 < length :
238+ buffer .append (line [i + 1 ])
239+ i += 2
240+ continue
241+ elif current == '"' :
242+ literals .append ('' .join (buffer ))
243+ i += 1
244+ break
245+ else : # Raw string delimited by backticks
246+ if current == '`' :
247+ literals .append ('' .join (buffer ))
248+ i += 1
249+ break
250+
251+ buffer .append (current )
252+ i += 1
253+ else :
254+ break
255+
256+ return literals
257+
258+ def _count_unquoted_characters (self , line : str , target : str ) -> int :
259+ """Count occurrences of a character outside string literals."""
260+ count = 0
261+ i = 0
262+ length = len (line )
263+ delimiter : Optional [str ] = None
264+
265+ while i < length :
266+ char = line [i ]
267+ if delimiter is None :
268+ if char in ('"' , '`' ):
269+ delimiter = char
270+ elif char == target :
271+ count += 1
272+ else :
273+ if delimiter == '"' :
274+ if char == '\\ ' :
275+ i += 2
276+ continue
277+ if char == '"' :
278+ delimiter = None
279+ elif delimiter == '`' and char == '`' :
280+ delimiter = None
281+
282+ i += 1
283+
284+ return count
285+
286+ def _extract_go_comment (self , lines : List [str ], line_index : int ) -> Optional [str ]:
287+ """Extract Go comment (docstring) from lines preceding the given line.
288+
289+ Go documentation comments are regular comments that appear immediately before
290+ the declaration, with no blank line in between.
291+ """
292+ comment_lines = []
293+
294+ # Look backwards from the line before the declaration
295+ i = line_index - 1
296+ while i >= 0 :
297+ stripped = lines [i ].strip ()
298+
299+ # Stop at empty line
300+ if not stripped :
301+ break
302+
303+ # Single-line comment
304+ if stripped .startswith ('//' ):
305+ comment_text = stripped [2 :].strip ()
306+ comment_lines .insert (0 , comment_text )
307+ i -= 1
308+ # Multi-line comment block
309+ elif stripped .startswith ('/*' ) or stripped .endswith ('*/' ):
310+ # Handle single-line /* comment */
311+ if stripped .startswith ('/*' ) and stripped .endswith ('*/' ):
312+ comment_text = stripped [2 :- 2 ].strip ()
313+ comment_lines .insert (0 , comment_text )
314+ i -= 1
315+ # Handle multi-line comment block
316+ elif stripped .endswith ('*/' ):
317+ # Found end of multi-line comment, collect until start
318+ temp_lines = []
319+ temp_lines .insert (0 , stripped [:- 2 ].strip ())
320+ i -= 1
321+ while i >= 0 :
322+ temp_stripped = lines [i ].strip ()
323+ if temp_stripped .startswith ('/*' ):
324+ temp_lines .insert (0 , temp_stripped [2 :].strip ())
325+ comment_lines = temp_lines + comment_lines
326+ i -= 1
327+ break
328+ else :
329+ temp_lines .insert (0 , temp_stripped )
330+ i -= 1
331+ break
332+ else :
333+ break
334+ else :
335+ # Not a comment, stop looking
336+ break
337+
338+ if comment_lines :
339+ # Join with newlines and clean up
340+ docstring = '\n ' .join (comment_lines )
341+ return docstring if docstring else None
342+
343+ return None
344+
150345 def _extract_go_called_functions (self , line : str ) -> List [str ]:
151346 """Extract function names that are being called in this line."""
152347 called_functions = []
0 commit comments