Skip to content

Commit 2f31ce3

Browse files
committed
fix(go): harden import parsing
1 parent 32ded9d commit 2f31ce3

File tree

2 files changed

+152
-34
lines changed

2 files changed

+152
-34
lines changed

src/code_index_mcp/indexing/strategies/go_strategy.py

Lines changed: 137 additions & 25 deletions
Original file line numberDiff line numberDiff line change
@@ -21,40 +21,18 @@ def parse_file(self, file_path: str, content: str) -> Tuple[Dict[str, SymbolInfo
2121
"""Parse Go file using regex patterns."""
2222
symbols = {}
2323
functions = []
24+
lines = content.splitlines()
2425
classes = [] # Go doesn't have classes, but we'll track structs/interfaces
25-
imports = []
26+
imports = self._extract_go_imports(lines)
2627
package = None
2728

28-
lines = content.splitlines()
29-
in_import_block = False
30-
3129
for i, line in enumerate(lines):
3230
line = line.strip()
3331

3432
# Package declaration
3533
if line.startswith('package '):
3634
package = line.split('package ')[1].strip()
3735

38-
# Import statements
39-
elif line.startswith('import '):
40-
# Single import: import "package"
41-
import_match = re.search(r'import\s+"([^"]+)"', line)
42-
if import_match:
43-
imports.append(import_match.group(1))
44-
# Multi-line import block: import (
45-
elif '(' in line:
46-
in_import_block = True
47-
48-
# Inside import block
49-
elif in_import_block:
50-
if ')' in line:
51-
in_import_block = False
52-
else:
53-
# Extract import path from quotes
54-
import_match = re.search(r'"([^"]+)"', line)
55-
if import_match:
56-
imports.append(import_match.group(1))
57-
5836
# Function declarations
5937
elif line.startswith('func '):
6038
func_match = re.match(r'func\s+(\w+)\s*\(', line)
@@ -170,6 +148,141 @@ def _extract_go_function_name(self, line: str) -> Optional[str]:
170148
pass
171149
return None
172150

151+
def _extract_go_imports(self, lines: List[str]) -> List[str]:
152+
"""Extract Go import paths, handling multi-line blocks and comments."""
153+
imports: List[str] = []
154+
in_block_comment = False
155+
paren_depth = 0
156+
157+
for raw_line in lines:
158+
clean_line, in_block_comment = self._strip_go_comments(raw_line, in_block_comment)
159+
stripped = clean_line.strip()
160+
161+
if not stripped:
162+
continue
163+
164+
if paren_depth == 0:
165+
if not stripped.startswith('import '):
166+
continue
167+
168+
remainder = stripped[len('import '):].strip()
169+
if not remainder:
170+
continue
171+
172+
imports.extend(self._extract_string_literals(remainder))
173+
174+
paren_depth = (
175+
self._count_unquoted_characters(remainder, '(')
176+
- self._count_unquoted_characters(remainder, ')')
177+
)
178+
if paren_depth <= 0:
179+
paren_depth = 0
180+
continue
181+
182+
imports.extend(self._extract_string_literals(clean_line))
183+
paren_depth += self._count_unquoted_characters(clean_line, '(')
184+
paren_depth -= self._count_unquoted_characters(clean_line, ')')
185+
if paren_depth <= 0:
186+
paren_depth = 0
187+
188+
return imports
189+
190+
def _strip_go_comments(self, line: str, in_block_comment: bool) -> Tuple[str, bool]:
191+
"""Remove Go comments from a line while tracking block comment state."""
192+
result: List[str] = []
193+
i = 0
194+
length = len(line)
195+
196+
while i < length:
197+
if in_block_comment:
198+
if line.startswith('*/', i):
199+
in_block_comment = False
200+
i += 2
201+
else:
202+
i += 1
203+
continue
204+
205+
if line.startswith('//', i):
206+
break
207+
208+
if line.startswith('/*', i):
209+
in_block_comment = True
210+
i += 2
211+
continue
212+
213+
result.append(line[i])
214+
i += 1
215+
216+
return ''.join(result), in_block_comment
217+
218+
def _extract_string_literals(self, line: str) -> List[str]:
219+
"""Return string literal values found in a line (supports " and `)."""
220+
literals: List[str] = []
221+
i = 0
222+
length = len(line)
223+
224+
while i < length:
225+
char = line[i]
226+
if char not in ('"', '`'):
227+
i += 1
228+
continue
229+
230+
delimiter = char
231+
i += 1
232+
buffer: List[str] = []
233+
while i < length:
234+
current = line[i]
235+
if delimiter == '"':
236+
if current == '\\':
237+
if i + 1 < length:
238+
buffer.append(line[i + 1])
239+
i += 2
240+
continue
241+
elif current == '"':
242+
literals.append(''.join(buffer))
243+
i += 1
244+
break
245+
else: # Raw string delimited by backticks
246+
if current == '`':
247+
literals.append(''.join(buffer))
248+
i += 1
249+
break
250+
251+
buffer.append(current)
252+
i += 1
253+
else:
254+
break
255+
256+
return literals
257+
258+
def _count_unquoted_characters(self, line: str, target: str) -> int:
259+
"""Count occurrences of a character outside string literals."""
260+
count = 0
261+
i = 0
262+
length = len(line)
263+
delimiter: Optional[str] = None
264+
265+
while i < length:
266+
char = line[i]
267+
if delimiter is None:
268+
if char in ('"', '`'):
269+
delimiter = char
270+
elif char == target:
271+
count += 1
272+
else:
273+
if delimiter == '"':
274+
if char == '\\':
275+
i += 2
276+
continue
277+
if char == '"':
278+
delimiter = None
279+
elif delimiter == '`' and char == '`':
280+
delimiter = None
281+
282+
i += 1
283+
284+
return count
285+
173286
def _extract_go_comment(self, lines: List[str], line_index: int) -> Optional[str]:
174287
"""Extract Go comment (docstring) from lines preceding the given line.
175288
@@ -244,4 +357,3 @@ def _extract_go_called_functions(self, line: str) -> List[str]:
244357
called_functions.extend(matches)
245358

246359
return called_functions
247-

tests/strategies/test_go_discovery.py

Lines changed: 15 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -1,9 +1,6 @@
11
#!/usr/bin/env python3
2-
"""
3-
Test for Go symbol discovery including all symbol types.
4-
"""
2+
"""Test for Go symbol discovery including all symbol types."""
53
import pytest
6-
from textwrap import dedent
74

85
from code_index_mcp.indexing.strategies.go_strategy import GoParsingStrategy
96

@@ -15,12 +12,17 @@ def test_code_with_all_symbols():
1512
package main
1613
1714
import (
18-
"fmt"
19-
"strings"
15+
"fmt" // comment mentioning ) parentheses )
16+
strutil "strings"
17+
helper "example.com/project/toolkit"
18+
) // closing comment with ) ) characters
19+
import ("math" /* inline comment ) inside */)
20+
import . "errors"
21+
import _ "embed"
22+
import (
23+
"time"
2024
)
2125
22-
import "errors"
23-
2426
// Application version constant
2527
const VERSION = "1.0.0"
2628
@@ -154,10 +156,14 @@ def test_go_symbol_discovery(test_code_with_all_symbols):
154156
# Verify package is extracted
155157
assert file_info.package == "main"
156158

157-
# Verify imports are extracted (both multi-line block and single-line)
159+
# Verify imports are extracted (including tricky formats and aliases)
158160
assert "fmt" in file_info.imports
159161
assert "strings" in file_info.imports
160162
assert "errors" in file_info.imports
163+
assert "example.com/project/toolkit" in file_info.imports
164+
assert "math" in file_info.imports
165+
assert "embed" in file_info.imports
166+
assert "time" in file_info.imports
161167

162168
# Verify all expected functions are in file_info
163169
discovered_functions = file_info.symbols.get('functions', [])

0 commit comments

Comments
 (0)