Skip to content

Commit ac46028

Browse files
committed
fix(indexing): avoid sqlite symbol id collisions
1 parent fccd97c commit ac46028

File tree

2 files changed

+44
-10
lines changed

2 files changed

+44
-10
lines changed

src/code_index_mcp/indexing/strategies/base_strategy.py

Lines changed: 14 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -49,18 +49,22 @@ def _create_symbol_id(self, file_path: str, symbol_name: str) -> str:
4949
return f"{relative_path}::{symbol_name}"
5050

5151
def _get_relative_path(self, file_path: str) -> str:
52-
"""Convert absolute file path to relative path."""
53-
parts = file_path.replace('\\', '/').split('/')
52+
"""Normalize path for symbol identifiers relative to project root."""
53+
if not file_path:
54+
return ""
5455

55-
# Priority order: test > src (outermost project roots first)
56-
for root_dir in ['test', 'src']:
57-
if root_dir in parts:
58-
root_index = parts.index(root_dir)
59-
relative_parts = parts[root_index:]
60-
return '/'.join(relative_parts)
56+
normalized = os.path.normpath(file_path)
57+
if normalized == ".":
58+
return ""
6159

62-
# Fallback: use just filename
63-
return os.path.basename(file_path)
60+
normalized = normalized.replace("\\", "/")
61+
if normalized.startswith("./"):
62+
normalized = normalized[2:]
63+
64+
if not os.path.isabs(file_path):
65+
normalized = normalized.lstrip("/")
66+
67+
return normalized or os.path.basename(file_path)
6468

6569
def _extract_line_number(self, content: str, symbol_position: int) -> int:
6670
"""

tests/indexing/test_symbol_ids.py

Lines changed: 30 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,30 @@
1+
"""Tests for symbol identifier generation."""
2+
3+
from code_index_mcp.indexing.json_index_builder import JSONIndexBuilder
4+
5+
6+
def test_symbol_ids_use_relative_paths(tmp_path):
7+
project_dir = tmp_path / "project"
8+
scripts_dir = project_dir / "scripts"
9+
examples_dir = project_dir / "examples"
10+
scripts_dir.mkdir(parents=True)
11+
examples_dir.mkdir(parents=True)
12+
13+
(scripts_dir / "foo.py").write_text(
14+
"def foo():\n"
15+
" return 1\n",
16+
encoding="utf-8",
17+
)
18+
(examples_dir / "foo.py").write_text(
19+
"def foo():\n"
20+
" return 2\n",
21+
encoding="utf-8",
22+
)
23+
24+
builder = JSONIndexBuilder(str(project_dir))
25+
index = builder.build_index(parallel=False)
26+
symbols = index["symbols"]
27+
28+
assert "scripts/foo.py::foo" in symbols
29+
assert "examples/foo.py::foo" in symbols
30+
assert len({sid for sid in symbols if sid.endswith("::foo")}) == 2

0 commit comments

Comments
 (0)