|
32 | 32 | import json |
33 | 33 | import os |
34 | 34 | import sys |
| 35 | +import pickle |
35 | 36 | import time |
36 | 37 | import traceback |
37 | 38 |
|
|
55 | 56 | _query_fns: dict | None = None |
56 | 57 | _is_git: bool = False |
57 | 58 |
|
| 59 | +# Persistent cache |
| 60 | +_CACHE_FILENAME = ".codebase-index-cache.pkl" |
| 61 | +_CACHE_VERSION = 1 # Bump when ProjectIndex schema changes |
| 62 | + |
58 | 63 | # Session usage stats |
59 | 64 | _session_start: float = time.time() |
60 | 65 | _tool_call_counts: dict[str, int] = {} |
@@ -156,33 +161,116 @@ def _format_duration(seconds: float) -> str: |
156 | 161 | return f"{hours}h {mins}m" |
157 | 162 |
|
158 | 163 |
|
| 164 | +def _cache_path(project_root: str) -> str: |
| 165 | + """Return the path to the pickle cache file for this project.""" |
| 166 | + return os.path.join(project_root, _CACHE_FILENAME) |
| 167 | + |
| 168 | + |
| 169 | +def _save_cache(index: "ProjectIndex") -> None: |
| 170 | + """Persist the project index to a pickle cache file.""" |
| 171 | + try: |
| 172 | + root = index.root_path |
| 173 | + path = _cache_path(root) |
| 174 | + payload = {"version": _CACHE_VERSION, "index": index} |
| 175 | + with open(path, "wb") as f: |
| 176 | + pickle.dump(payload, f, protocol=pickle.HIGHEST_PROTOCOL) |
| 177 | + print(f"[mcp-codebase-index] Cache saved → {path}", file=sys.stderr) |
| 178 | + except Exception as exc: |
| 179 | + print(f"[mcp-codebase-index] Cache save failed: {exc}", file=sys.stderr) |
| 180 | + |
| 181 | + |
| 182 | +def _load_cache(project_root: str) -> "ProjectIndex | None": |
| 183 | + """Load a cached project index if it exists and is compatible.""" |
| 184 | + path = _cache_path(project_root) |
| 185 | + if not os.path.exists(path): |
| 186 | + return None |
| 187 | + try: |
| 188 | + with open(path, "rb") as f: |
| 189 | + payload = pickle.load(f) |
| 190 | + if not isinstance(payload, dict) or payload.get("version") != _CACHE_VERSION: |
| 191 | + print("[mcp-codebase-index] Cache version mismatch, ignoring", file=sys.stderr) |
| 192 | + return None |
| 193 | + index = payload["index"] |
| 194 | + from mcp_codebase_index.models import ProjectIndex as PI |
| 195 | + if not isinstance(index, PI): |
| 196 | + return None |
| 197 | + return index |
| 198 | + except Exception as exc: |
| 199 | + print(f"[mcp-codebase-index] Cache load failed: {exc}", file=sys.stderr) |
| 200 | + return None |
| 201 | + |
| 202 | + |
159 | 203 | def _ensure_index() -> None: |
160 | 204 | """Build the project index on first use (lazy initialization). |
161 | 205 |
|
| 206 | + Tries to load from a pickle cache first. If the cache is valid and |
| 207 | + the git ref matches (or the changeset is small enough for incremental |
| 208 | + update), skips a full rebuild. |
| 209 | +
|
162 | 210 | This is called on the first tool call rather than at startup so that |
163 | 211 | the MCP server can complete its initialization handshake immediately. |
164 | 212 | Without this, large projects would cause Claude Code to timeout waiting |
165 | 213 | for the server to become ready. |
166 | 214 | """ |
| 215 | + global _project_root, _indexer, _query_fns, _is_git |
| 216 | + |
167 | 217 | if _indexer is not None: |
168 | 218 | return |
| 219 | + |
| 220 | + _project_root = os.environ.get("PROJECT_ROOT", os.getcwd()) |
| 221 | + _is_git = is_git_repo(_project_root) |
| 222 | + |
| 223 | + cached_index = _load_cache(_project_root) |
| 224 | + if cached_index is not None and _is_git and cached_index.last_indexed_git_ref: |
| 225 | + current_head = get_head_commit(_project_root) |
| 226 | + if current_head == cached_index.last_indexed_git_ref: |
| 227 | + # Exact match — use cache directly |
| 228 | + print("[mcp-codebase-index] Cache hit (git ref matches)", file=sys.stderr) |
| 229 | + _indexer = ProjectIndexer(_project_root) |
| 230 | + _indexer._project_index = cached_index |
| 231 | + _query_fns = create_project_query_functions(cached_index) |
| 232 | + return |
| 233 | + |
| 234 | + # Check if changeset is small enough for incremental update on cache |
| 235 | + changeset = get_changed_files(_project_root, cached_index.last_indexed_git_ref) |
| 236 | + total_changes = len(changeset.modified) + len(changeset.added) + len(changeset.deleted) |
| 237 | + if not changeset.is_empty and total_changes <= 20: |
| 238 | + print( |
| 239 | + f"[mcp-codebase-index] Cache hit with {total_changes} changed files, " |
| 240 | + f"applying incremental update", |
| 241 | + file=sys.stderr, |
| 242 | + ) |
| 243 | + _indexer = ProjectIndexer(_project_root) |
| 244 | + _indexer._project_index = cached_index |
| 245 | + _query_fns = create_project_query_functions(cached_index) |
| 246 | + # _maybe_incremental_update will handle the rest on first tool call |
| 247 | + return |
| 248 | + |
| 249 | + print( |
| 250 | + f"[mcp-codebase-index] Cache stale ({total_changes} changes), full rebuild", |
| 251 | + file=sys.stderr, |
| 252 | + ) |
| 253 | + |
169 | 254 | _build_index() |
170 | 255 |
|
171 | 256 |
|
172 | 257 | def _build_index() -> None: |
173 | 258 | """Build (or rebuild) the project index and query functions.""" |
174 | 259 | global _project_root, _indexer, _query_fns, _is_git |
175 | 260 |
|
176 | | - _project_root = os.environ.get("PROJECT_ROOT", os.getcwd()) |
| 261 | + if not _project_root: |
| 262 | + _project_root = os.environ.get("PROJECT_ROOT", os.getcwd()) |
177 | 263 | print(f"[mcp-codebase-index] Indexing project: {_project_root}", file=sys.stderr) |
178 | 264 |
|
179 | 265 | _indexer = ProjectIndexer(_project_root) |
180 | 266 | index = _indexer.index() |
181 | 267 | _query_fns = create_project_query_functions(index) |
182 | 268 |
|
183 | | - _is_git = is_git_repo(_project_root) |
| 269 | + if not _is_git: |
| 270 | + _is_git = is_git_repo(_project_root) |
184 | 271 | if _is_git: |
185 | 272 | index.last_indexed_git_ref = get_head_commit(_project_root) |
| 273 | + _save_cache(index) |
186 | 274 |
|
187 | 275 | print( |
188 | 276 | f"[mcp-codebase-index] Indexed {index.total_files} files, " |
@@ -256,6 +344,8 @@ def _maybe_incremental_update() -> None: |
256 | 344 | file=sys.stderr, |
257 | 345 | ) |
258 | 346 |
|
| 347 | + _save_cache(idx) |
| 348 | + |
259 | 349 |
|
260 | 350 | # --------------------------------------------------------------------------- |
261 | 351 | # Tool definitions |
|
0 commit comments