Skip to content

Commit 114ea1c

Browse files
committed
Merge branch 'pr-51'
2 parents 3d6e677 + 2f31ce3 commit 114ea1c

File tree

2 files changed

+474
-13
lines changed

2 files changed

+474
-13
lines changed

src/code_index_mcp/indexing/strategies/go_strategy.py

Lines changed: 208 additions & 13 deletions
Original file line numberDiff line numberDiff line change
@@ -21,49 +21,46 @@ def parse_file(self, file_path: str, content: str) -> Tuple[Dict[str, SymbolInfo
2121
"""Parse Go file using regex patterns."""
2222
symbols = {}
2323
functions = []
24+
lines = content.splitlines()
2425
classes = [] # Go doesn't have classes, but we'll track structs/interfaces
25-
imports = []
26+
imports = self._extract_go_imports(lines)
2627
package = None
2728

28-
lines = content.splitlines()
29-
3029
for i, line in enumerate(lines):
3130
line = line.strip()
3231

3332
# Package declaration
3433
if line.startswith('package '):
3534
package = line.split('package ')[1].strip()
3635

37-
# Import statements
38-
elif line.startswith('import '):
39-
import_match = re.search(r'import\s+"([^"]+)"', line)
40-
if import_match:
41-
imports.append(import_match.group(1))
42-
4336
# Function declarations
4437
elif line.startswith('func '):
4538
func_match = re.match(r'func\s+(\w+)\s*\(', line)
4639
if func_match:
4740
func_name = func_match.group(1)
41+
docstring = self._extract_go_comment(lines, i)
4842
symbol_id = self._create_symbol_id(file_path, func_name)
4943
symbols[symbol_id] = SymbolInfo(
5044
type="function",
5145
file=file_path,
5246
line=i + 1,
53-
signature=line
47+
signature=line,
48+
docstring=docstring
5449
)
5550
functions.append(func_name)
5651

5752
# Method declarations (func (receiver) methodName)
5853
method_match = re.match(r'func\s+\([^)]+\)\s+(\w+)\s*\(', line)
5954
if method_match:
6055
method_name = method_match.group(1)
56+
docstring = self._extract_go_comment(lines, i)
6157
symbol_id = self._create_symbol_id(file_path, method_name)
6258
symbols[symbol_id] = SymbolInfo(
6359
type="method",
6460
file=file_path,
6561
line=i + 1,
66-
signature=line
62+
signature=line,
63+
docstring=docstring
6764
)
6865
functions.append(method_name)
6966

@@ -72,11 +69,13 @@ def parse_file(self, file_path: str, content: str) -> Tuple[Dict[str, SymbolInfo
7269
struct_match = re.match(r'type\s+(\w+)\s+struct', line)
7370
if struct_match:
7471
struct_name = struct_match.group(1)
72+
docstring = self._extract_go_comment(lines, i)
7573
symbol_id = self._create_symbol_id(file_path, struct_name)
7674
symbols[symbol_id] = SymbolInfo(
7775
type="struct",
7876
file=file_path,
79-
line=i + 1
77+
line=i + 1,
78+
docstring=docstring
8079
)
8180
classes.append(struct_name)
8281

@@ -85,11 +84,13 @@ def parse_file(self, file_path: str, content: str) -> Tuple[Dict[str, SymbolInfo
8584
interface_match = re.match(r'type\s+(\w+)\s+interface', line)
8685
if interface_match:
8786
interface_name = interface_match.group(1)
87+
docstring = self._extract_go_comment(lines, i)
8888
symbol_id = self._create_symbol_id(file_path, interface_name)
8989
symbols[symbol_id] = SymbolInfo(
9090
type="interface",
9191
file=file_path,
92-
line=i + 1
92+
line=i + 1,
93+
docstring=docstring
9394
)
9495
classes.append(interface_name)
9596

@@ -147,6 +148,200 @@ def _extract_go_function_name(self, line: str) -> Optional[str]:
147148
pass
148149
return None
149150

151+
def _extract_go_imports(self, lines: List[str]) -> List[str]:
152+
"""Extract Go import paths, handling multi-line blocks and comments."""
153+
imports: List[str] = []
154+
in_block_comment = False
155+
paren_depth = 0
156+
157+
for raw_line in lines:
158+
clean_line, in_block_comment = self._strip_go_comments(raw_line, in_block_comment)
159+
stripped = clean_line.strip()
160+
161+
if not stripped:
162+
continue
163+
164+
if paren_depth == 0:
165+
if not stripped.startswith('import '):
166+
continue
167+
168+
remainder = stripped[len('import '):].strip()
169+
if not remainder:
170+
continue
171+
172+
imports.extend(self._extract_string_literals(remainder))
173+
174+
paren_depth = (
175+
self._count_unquoted_characters(remainder, '(')
176+
- self._count_unquoted_characters(remainder, ')')
177+
)
178+
if paren_depth <= 0:
179+
paren_depth = 0
180+
continue
181+
182+
imports.extend(self._extract_string_literals(clean_line))
183+
paren_depth += self._count_unquoted_characters(clean_line, '(')
184+
paren_depth -= self._count_unquoted_characters(clean_line, ')')
185+
if paren_depth <= 0:
186+
paren_depth = 0
187+
188+
return imports
189+
190+
def _strip_go_comments(self, line: str, in_block_comment: bool) -> Tuple[str, bool]:
191+
"""Remove Go comments from a line while tracking block comment state."""
192+
result: List[str] = []
193+
i = 0
194+
length = len(line)
195+
196+
while i < length:
197+
if in_block_comment:
198+
if line.startswith('*/', i):
199+
in_block_comment = False
200+
i += 2
201+
else:
202+
i += 1
203+
continue
204+
205+
if line.startswith('//', i):
206+
break
207+
208+
if line.startswith('/*', i):
209+
in_block_comment = True
210+
i += 2
211+
continue
212+
213+
result.append(line[i])
214+
i += 1
215+
216+
return ''.join(result), in_block_comment
217+
218+
def _extract_string_literals(self, line: str) -> List[str]:
219+
"""Return string literal values found in a line (supports " and `)."""
220+
literals: List[str] = []
221+
i = 0
222+
length = len(line)
223+
224+
while i < length:
225+
char = line[i]
226+
if char not in ('"', '`'):
227+
i += 1
228+
continue
229+
230+
delimiter = char
231+
i += 1
232+
buffer: List[str] = []
233+
while i < length:
234+
current = line[i]
235+
if delimiter == '"':
236+
if current == '\\':
237+
if i + 1 < length:
238+
buffer.append(line[i + 1])
239+
i += 2
240+
continue
241+
elif current == '"':
242+
literals.append(''.join(buffer))
243+
i += 1
244+
break
245+
else: # Raw string delimited by backticks
246+
if current == '`':
247+
literals.append(''.join(buffer))
248+
i += 1
249+
break
250+
251+
buffer.append(current)
252+
i += 1
253+
else:
254+
break
255+
256+
return literals
257+
258+
def _count_unquoted_characters(self, line: str, target: str) -> int:
259+
"""Count occurrences of a character outside string literals."""
260+
count = 0
261+
i = 0
262+
length = len(line)
263+
delimiter: Optional[str] = None
264+
265+
while i < length:
266+
char = line[i]
267+
if delimiter is None:
268+
if char in ('"', '`'):
269+
delimiter = char
270+
elif char == target:
271+
count += 1
272+
else:
273+
if delimiter == '"':
274+
if char == '\\':
275+
i += 2
276+
continue
277+
if char == '"':
278+
delimiter = None
279+
elif delimiter == '`' and char == '`':
280+
delimiter = None
281+
282+
i += 1
283+
284+
return count
285+
286+
def _extract_go_comment(self, lines: List[str], line_index: int) -> Optional[str]:
287+
"""Extract Go comment (docstring) from lines preceding the given line.
288+
289+
Go documentation comments are regular comments that appear immediately before
290+
the declaration, with no blank line in between.
291+
"""
292+
comment_lines = []
293+
294+
# Look backwards from the line before the declaration
295+
i = line_index - 1
296+
while i >= 0:
297+
stripped = lines[i].strip()
298+
299+
# Stop at empty line
300+
if not stripped:
301+
break
302+
303+
# Single-line comment
304+
if stripped.startswith('//'):
305+
comment_text = stripped[2:].strip()
306+
comment_lines.insert(0, comment_text)
307+
i -= 1
308+
# Multi-line comment block
309+
elif stripped.startswith('/*') or stripped.endswith('*/'):
310+
# Handle single-line /* comment */
311+
if stripped.startswith('/*') and stripped.endswith('*/'):
312+
comment_text = stripped[2:-2].strip()
313+
comment_lines.insert(0, comment_text)
314+
i -= 1
315+
# Handle multi-line comment block
316+
elif stripped.endswith('*/'):
317+
# Found end of multi-line comment, collect until start
318+
temp_lines = []
319+
temp_lines.insert(0, stripped[:-2].strip())
320+
i -= 1
321+
while i >= 0:
322+
temp_stripped = lines[i].strip()
323+
if temp_stripped.startswith('/*'):
324+
temp_lines.insert(0, temp_stripped[2:].strip())
325+
comment_lines = temp_lines + comment_lines
326+
i -= 1
327+
break
328+
else:
329+
temp_lines.insert(0, temp_stripped)
330+
i -= 1
331+
break
332+
else:
333+
break
334+
else:
335+
# Not a comment, stop looking
336+
break
337+
338+
if comment_lines:
339+
# Join with newlines and clean up
340+
docstring = '\n'.join(comment_lines)
341+
return docstring if docstring else None
342+
343+
return None
344+
150345
def _extract_go_called_functions(self, line: str) -> List[str]:
151346
"""Extract function names that are being called in this line."""
152347
called_functions = []

0 commit comments

Comments
 (0)