Skip to content

Commit 66b956b

Browse files
RecoDemoclaude
andcommitted
Fix get_usage_stats: use per-tool cost multipliers instead of full codebase per query
The naive "without indexer" estimate was massively overstated — it assumed every query would require reading the entire codebase. Now each tool has a realistic multiplier (e.g. find_symbol=5%, get_change_impact=30%) reflecting what fraction of the codebase you'd actually need to read without the indexer. Co-Authored-By: Claude Opus 4.6 <[email protected]>
1 parent 194341e commit 66b956b

File tree

3 files changed

+81
-5
lines changed

3 files changed

+81
-5
lines changed

pyproject.toml

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -4,7 +4,7 @@ build-backend = "hatchling.build"
44

55
[project]
66
name = "mcp-codebase-index"
7-
version = "0.4.4"
7+
version = "0.4.5"
88
description = "Structural codebase indexer with MCP server for AI-assisted development"
99
requires-python = ">=3.11"
1010
readme = "README.md"

src/mcp_codebase_index/server.py

Lines changed: 28 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -60,6 +60,27 @@
6060
_tool_call_counts: dict[str, int] = {}
6161
_total_chars_returned: int = 0
6262

63+
# Realistic estimate of what % of codebase you'd need to read without the indexer
64+
_TOOL_COST_MULTIPLIERS: dict[str, float] = {
65+
"get_project_summary": 0.10,
66+
"list_files": 0.01,
67+
"get_structure_summary": 0.05,
68+
"get_functions": 0.05,
69+
"get_classes": 0.05,
70+
"get_imports": 0.03,
71+
"get_function_source": 0.02,
72+
"get_class_source": 0.03,
73+
"find_symbol": 0.05,
74+
"get_dependencies": 0.10,
75+
"get_dependents": 0.15,
76+
"get_change_impact": 0.30,
77+
"get_call_chain": 0.20,
78+
"get_file_dependencies": 0.02,
79+
"get_file_dependents": 0.10,
80+
"search_codebase": 0.15,
81+
"reindex": 0.0,
82+
}
83+
6384

6485
def _format_result(value: object) -> str:
6586
"""Format a query result as readable text."""
@@ -101,8 +122,13 @@ def _format_usage_stats() -> str:
101122
if source_chars > 0:
102123
lines.append(f"Total source in index: {source_chars:,} chars")
103124
if query_calls > 0 and source_chars > _total_chars_returned:
104-
# Each query could have required reading the full source
105-
naive_chars = source_chars * query_calls
125+
# Per-tool estimate of what you'd read without the indexer
126+
naive_chars = 0
127+
for tool_name, count in _tool_call_counts.items():
128+
if tool_name == "get_usage_stats":
129+
continue
130+
multiplier = _TOOL_COST_MULTIPLIERS.get(tool_name, 0.10)
131+
naive_chars += int(source_chars * multiplier * count)
106132
reduction = (1 - _total_chars_returned / naive_chars) * 100 if naive_chars > 0 else 0
107133
lines.append(
108134
f"Estimated without indexer: {naive_chars:,} chars "

tests/test_usage_stats.py

Lines changed: 52 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -88,7 +88,8 @@ def test_with_indexed_project(self, tmp_path):
8888
assert "Total source in index:" in result
8989
assert "Estimated token savings:" in result
9090

91-
def test_token_savings_calculation(self, tmp_path):
91+
def test_token_savings_uses_per_tool_multipliers(self, tmp_path):
92+
"""Naive estimate should use per-tool cost multipliers, not full codebase per query."""
9293
import mcp_codebase_index.server as srv
9394
from mcp_codebase_index.project_indexer import ProjectIndexer
9495

@@ -99,15 +100,64 @@ def test_token_savings_calculation(self, tmp_path):
99100
indexer.index()
100101
srv._indexer = indexer
101102

103+
source_chars = sum(m.total_chars for m in indexer._project_index.files.values())
104+
105+
# find_symbol has multiplier 0.05, so 10 calls = source_chars * 0.05 * 10
102106
srv._tool_call_counts["find_symbol"] = 10
103107
srv._total_chars_returned = 500
104108

105109
result = srv._format_usage_stats()
106110
assert "Estimated without indexer:" in result
107111
assert "Estimated with indexer:" in result
108-
# 500 chars returned vs 6000 * 10 = 60000 naive
109112
assert "tokens" in result
110113

114+
# The naive estimate should be source_chars * 0.05 * 10, NOT source_chars * 10
115+
expected_naive = int(source_chars * 0.05 * 10)
116+
assert f"{expected_naive:,} chars" in result
117+
118+
def test_different_tools_produce_different_costs(self, tmp_path):
119+
"""Tools with different multipliers should produce different naive estimates."""
120+
import mcp_codebase_index.server as srv
121+
from mcp_codebase_index.project_indexer import ProjectIndexer
122+
123+
(tmp_path / "code.py").write_text("x = 1\n" * 1000)
124+
125+
indexer = ProjectIndexer(str(tmp_path), include_patterns=["**/*.py"])
126+
indexer.index()
127+
srv._indexer = indexer
128+
129+
source_chars = sum(m.total_chars for m in indexer._project_index.files.values())
130+
131+
# Test with a cheap tool (list_files: 0.01)
132+
srv._tool_call_counts["list_files"] = 1
133+
srv._total_chars_returned = 50
134+
result_cheap = srv._format_usage_stats()
135+
136+
# Reset and test with an expensive tool (get_change_impact: 0.30)
137+
srv._tool_call_counts.clear()
138+
srv._total_chars_returned = 50
139+
srv._tool_call_counts["get_change_impact"] = 1
140+
result_expensive = srv._format_usage_stats()
141+
142+
# Extract the "Estimated without indexer" numbers
143+
def extract_naive(text: str) -> int:
144+
for line in text.splitlines():
145+
if "Estimated without indexer:" in line:
146+
# Format: "Estimated without indexer: N chars (M tokens) over Q queries"
147+
num_str = line.split(":")[1].split("chars")[0].strip().replace(",", "")
148+
return int(num_str)
149+
return 0
150+
151+
cheap_naive = extract_naive(result_cheap)
152+
expensive_naive = extract_naive(result_expensive)
153+
154+
assert cheap_naive > 0
155+
assert expensive_naive > 0
156+
assert expensive_naive > cheap_naive
157+
# Verify exact values based on multipliers
158+
assert cheap_naive == int(source_chars * 0.01)
159+
assert expensive_naive == int(source_chars * 0.30)
160+
111161
def test_no_savings_section_without_index(self):
112162
import mcp_codebase_index.server as srv
113163

0 commit comments

Comments
 (0)