johnhuang316
diff --git a/‎src/code_index_mcp/analyzers/__init__.py‎
Lines changed: 22 additions & 0 deletions b/‎src/code_index_mcp/analyzers/__init__.py‎
Lines changed: 22 additions & 0 deletions
diff --git a/‎src/code_index_mcp/analyzers/analysis_result.py‎
Lines changed: 96 additions & 0 deletions b/‎src/code_index_mcp/analyzers/analysis_result.py‎
Lines changed: 96 additions & 0 deletions
diff --git a/‎src/code_index_mcp/analyzers/analyzer_factory.py‎
Lines changed: 80 additions & 0 deletions b/‎src/code_index_mcp/analyzers/analyzer_factory.py‎
Lines changed: 80 additions & 0 deletions
diff --git a/‎src/code_index_mcp/analyzers/base_analyzer.py‎
Lines changed: 92 additions & 0 deletions b/‎src/code_index_mcp/analyzers/base_analyzer.py‎
Lines changed: 92 additions & 0 deletions
diff --git a/‎src/code_index_mcp/analyzers/default_analyzer.py‎
Lines changed: 23 additions & 0 deletions b/‎src/code_index_mcp/analyzers/default_analyzer.py‎
Lines changed: 23 additions & 0 deletions
diff --git a/‎src/code_index_mcp/analyzers/java_analyzer.py‎
Lines changed: 77 additions & 0 deletions b/‎src/code_index_mcp/analyzers/java_analyzer.py‎
Lines changed: 77 additions & 0 deletions
@@ -0,0 +1,22 @@
+"""Language analyzers for code analysis."""
+
+from .base_analyzer import LanguageAnalyzer
+from .analyzer_factory import AnalyzerFactory
+from .analysis_result import AnalysisResult, Symbol
+from .python_analyzer import PythonAnalyzer
+from .javascript_analyzer import JavaScriptAnalyzer
+from .java_analyzer import JavaAnalyzer
+from .objective_c_analyzer import ObjectiveCAnalyzer
+from .default_analyzer import DefaultAnalyzer
+
+__all__ = [
+    'LanguageAnalyzer',
+    'AnalyzerFactory',
+    'AnalysisResult',
+    'Symbol',
+    'PythonAnalyzer',
+    'JavaScriptAnalyzer',
+    'JavaAnalyzer',
+    'ObjectiveCAnalyzer',
+    'DefaultAnalyzer',
+]
@@ -0,0 +1,96 @@
+"""Standardized analysis result structure."""
+
+from dataclasses import dataclass, field
+from typing import Dict, List, Any, Optional
+
+
+@dataclass
+class Symbol:
+    """Represents a code symbol (function, class, etc.)."""
+    name: str
+    line: int
+    symbol_type: str  # 'function', 'class', 'import', 'variable', etc.
+    metadata: Dict[str, Any] = field(default_factory=dict)
+
+
+@dataclass  
+class AnalysisResult:
+    """Standardized result structure for all analyzers."""
+    # Basic file information
+    file_path: str
+    line_count: int
+    size_bytes: int
+    extension: str
+    analysis_type: str
+    
+    # Symbols found in the file
+    symbols: Dict[str, List[Symbol]] = field(default_factory=dict)
+    
+    # Summary counts
+    counts: Dict[str, int] = field(default_factory=dict)
+    
+    # Language-specific metadata
+    metadata: Dict[str, Any] = field(default_factory=dict)
+    
+    # Error information if analysis failed
+    error: Optional[str] = None
+    
+    def add_symbol(self, symbol_type: str, name: str, line: int, metadata: Dict[str, Any] = None):
+        """Add a symbol to the result."""
+        if symbol_type not in self.symbols:
+            self.symbols[symbol_type] = []
+        
+        symbol = Symbol(
+            name=name,
+            line=line,
+            symbol_type=symbol_type,
+            metadata=metadata or {}
+        )
+        self.symbols[symbol_type].append(symbol)
+        
+        # Update counts
+        count_key = f"{symbol_type}_count"
+        self.counts[count_key] = self.counts.get(count_key, 0) + 1
+    
+    def get_symbols(self, symbol_type: str) -> List[Symbol]:
+        """Get symbols of a specific type."""
+        return self.symbols.get(symbol_type, [])
+    
+    def get_count(self, symbol_type: str) -> int:
+        """Get count of symbols of a specific type."""
+        return self.counts.get(f"{symbol_type}_count", 0)
+    
+    def to_dict(self) -> Dict[str, Any]:
+        """Convert to dictionary for backwards compatibility."""
+        result = {
+            "file_path": self.file_path,
+            "line_count": self.line_count, 
+            "size_bytes": self.size_bytes,
+            "extension": self.extension,
+            "analysis_type": self.analysis_type,
+        }
+        
+        # Add error if present
+        if self.error:
+            result["error"] = self.error
+            return result
+        
+        # Add symbol lists (backwards compatibility)
+        for symbol_type, symbols in self.symbols.items():
+            if symbol_type == "import":
+                # Special handling for imports - return strings for backwards compatibility
+                result["imports"] = [s.name for s in symbols]
+            else:
+                # Return list of dicts for other symbols
+                result[f"{symbol_type}s"] = [
+                    {"line": s.line, "name": s.name, **s.metadata} 
+                    for s in symbols
+                ]
+        
+        # Add counts
+        result.update(self.counts)
+        
+        # Add metadata
+        result.update(self.metadata)
+        
+        return result
@@ -0,0 +1,80 @@
+"""Factory for creating language-specific analyzers."""
+
+from typing import Dict, Type, Optional
+from .base_analyzer import LanguageAnalyzer
+from .default_analyzer import DefaultAnalyzer
+from .python_analyzer import PythonAnalyzer
+from .javascript_analyzer import JavaScriptAnalyzer
+from .java_analyzer import JavaAnalyzer
+from .objective_c_analyzer import ObjectiveCAnalyzer
+
+
+class AnalyzerFactory:
+    """Factory class for creating language-specific analyzers."""
+
+    _analyzers: Dict[str, Type[LanguageAnalyzer]] = {}
+
+    @classmethod
+    def register(cls, extensions: list[str], analyzer_class: Type[LanguageAnalyzer]) -> None:
+        """
+        Register an analyzer for specific file extensions.
+
+        Args:
+            extensions: List of file extensions (e.g., ['.py', '.pyx'])
+            analyzer_class: The analyzer class to register
+        """
+        for extension in extensions:
+            cls._analyzers[extension.lower()] = analyzer_class
+
+    @classmethod
+    def get_analyzer(cls, extension: str) -> LanguageAnalyzer:
+        """
+        Get an analyzer instance for the given file extension.
+
+        Args:
+            extension: The file extension (e.g., '.py')
+
+        Returns:
+            Language analyzer instance, or DefaultAnalyzer if not found
+        """
+        extension = extension.lower()
+        analyzer_class = cls._analyzers.get(extension, DefaultAnalyzer)
+        # Create instance
+        return analyzer_class()
+
+    @classmethod
+    def get_supported_extensions(cls) -> list[str]:
+        """
+        Get all supported file extensions.
+
+        Returns:
+            List of all registered extensions
+        """
+        return list(cls._analyzers.keys())
+
+    @classmethod
+    def is_extension_supported(cls, extension: str) -> bool:
+        """
+        Check if an extension has a specific analyzer.
+
+        Args:
+            extension: The file extension to check
+
+        Returns:
+            True if a specific analyzer exists for the extension
+        """
+        return extension.lower() in cls._analyzers
+
+
+# Initialize factory with built-in analyzers
+def _initialize_factory():
+    """Initialize the factory with built-in analyzers."""
+    # Register analyzers
+    AnalyzerFactory.register(['.py'], PythonAnalyzer)
+    AnalyzerFactory.register(['.js', '.jsx', '.ts', '.tsx', '.mjs', '.cjs'], JavaScriptAnalyzer)
+    AnalyzerFactory.register(['.java'], JavaAnalyzer)
+    AnalyzerFactory.register(['.m', '.mm'], ObjectiveCAnalyzer)
+
+
+# Initialize on import
+_initialize_factory()
@@ -0,0 +1,92 @@
+"""Base analyzer interface for language-specific code analysis."""
+
+from abc import ABC, abstractmethod
+from typing import Dict, Any, List, Optional
+import os
+import re
+from .analysis_result import AnalysisResult
+
+
+class LanguageAnalyzer(ABC):
+    """Abstract base class for language-specific code analyzers."""
+
+    @abstractmethod
+    def analyze(self, content: str, file_path: str, full_path: str = None) -> AnalysisResult:
+        """
+        Analyze the content of a file and return structured information.
+
+        Args:
+            content: The file content as a string
+            file_path: The relative path of the file
+            full_path: The absolute path of the file (optional)
+
+        Returns:
+            AnalysisResult containing structured analysis information
+        """
+
+
+    def _count_lines(self, content: str) -> int:
+        """Count the number of lines in the content."""
+        return len(content.splitlines())
+
+    def _get_file_size(self, content: str, full_path: str = None) -> int:
+        """Get the file size in bytes."""
+        if full_path:
+            try:
+                return os.path.getsize(full_path)
+            except (OSError, IOError):
+                pass
+        # Fallback to content size in bytes
+        return len(content.encode('utf-8'))
+    
+    def _filter_comments_and_empty_lines(self, lines: List[str], comment_patterns: List[str] = None) -> List[str]:
+        """Filter out comments and empty lines."""
+        if comment_patterns is None:
+            comment_patterns = ['//', '#', '/*', '*', '--']
+        
+        filtered_lines = []
+        in_multiline_comment = False
+        
+        for line in lines:
+            stripped = line.strip()
+            
+            # Skip empty lines
+            if not stripped:
+                continue
+                
+            # Handle multiline comments
+            if '/*' in stripped:
+                in_multiline_comment = True
+            if '*/' in stripped:
+                in_multiline_comment = False
+                continue
+            if in_multiline_comment:
+                continue
+                
+            # Skip single line comments
+            is_comment = False
+            for pattern in comment_patterns:
+                if stripped.startswith(pattern):
+                    is_comment = True
+                    break
+            
+            if not is_comment:
+                filtered_lines.append(stripped)
+                
+        return filtered_lines
+    
+    # Constants for ReDoS protection
+    MAX_PATTERN_LENGTH = 500
+    MAX_WILDCARD_COUNT = 10
+    
+    def _safe_regex_match(self, pattern: str, text: str) -> Optional[re.Match]:
+        """Safely match regex pattern with timeout protection."""
+        try:
+            # Simple pattern validation to prevent ReDoS
+            if (len(pattern) > self.MAX_PATTERN_LENGTH or 
+                pattern.count('*') > self.MAX_WILDCARD_COUNT or 
+                pattern.count('+') > self.MAX_WILDCARD_COUNT):
+                return None
+            return re.match(pattern, text)
+        except re.error:
+            return None
@@ -0,0 +1,23 @@
+"""Default analyzer for basic file information."""
+
+import os
+from typing import Dict, Any
+from .base_analyzer import LanguageAnalyzer
+from .analysis_result import AnalysisResult
+
+
+class DefaultAnalyzer(LanguageAnalyzer):
+    """Default analyzer that provides basic file information."""
+    
+    def analyze(self, content: str, file_path: str, full_path: str = None) -> AnalysisResult:
+        """Provide basic file analysis."""
+        _, ext = os.path.splitext(file_path)
+        
+        return AnalysisResult(
+            file_path=file_path,
+            line_count=self._count_lines(content),
+            size_bytes=self._get_file_size(content, full_path),
+            extension=ext,
+            analysis_type="basic"
+        )
+    
@@ -0,0 +1,77 @@
+"""Java language analyzer."""
+
+import os
+import re
+from typing import Dict, Any
+from .base_analyzer import LanguageAnalyzer
+from .analysis_result import AnalysisResult
+
+
+class JavaAnalyzer(LanguageAnalyzer):
+    """Analyzer for Java files."""
+    
+    def __init__(self):
+        """Initialize with compiled regex patterns for performance."""
+        self.import_pattern = re.compile(r'^import\s+([\w.]+);')
+        self.class_pattern = re.compile(r'^(public\s+|protected\s+|private\s+)?(static\s+)?(abstract\s+)?(final\s+)?class\s+(\w+)')
+        self.method_pattern = re.compile(r'^(public|protected|private|static|final|abstract|synchronized|native|strictfp|\s)+[\w<>\[\]]+\s+(\w+)\s*\([^)]*\)')
+        self.field_pattern = re.compile(r'^(public|protected|private|static|final|transient|volatile|\s)+[\w<>\[\]]+\s+(\w+)\s*(=|;)')
+
+    def analyze(self, content: str, file_path: str, full_path: str = None) -> AnalysisResult:
+        """Analyze Java file content."""
+        lines = content.splitlines()
+
+        # Create result object
+        _, ext = os.path.splitext(file_path)
+        result = AnalysisResult(
+            file_path=file_path,
+            line_count=self._count_lines(content),
+            size_bytes=self._get_file_size(content, full_path),
+            extension=ext,
+            analysis_type="java"
+        )
+
+        # Java-specific analysis using pre-compiled patterns
+
+        in_multiline_comment = False
+
+        for i, line in enumerate(lines):
+            line = line.strip()
+
+            # Skip comments and empty lines
+            if not line or line.startswith('//'):
+                continue
+
+            # Handle multiline comments
+            if '/*' in line:
+                in_multiline_comment = True
+            if '*/' in line:
+                in_multiline_comment = False
+                continue
+            if in_multiline_comment:
+                continue
+
+            # Check for imports
+            import_match = self.import_pattern.match(line)
+            if import_match:
+                result.add_symbol("import", import_match.group(1), i + 1)
+
+            # Check for class definitions
+            class_match = self.class_pattern.match(line)
+            if class_match:
+                modifiers = [m for m in class_match.groups()[:4] if m and m.strip()]
+                result.add_symbol("class", class_match.group(5), i + 1, 
+                                {"modifiers": modifiers})
+
+            # Check for method definitions
+            method_match = self.method_pattern.match(line)
+            if method_match and not line.strip().endswith(';'):
+                result.add_symbol("function", method_match.group(2), i + 1)
+
+            # Check for field definitions
+            field_match = self.field_pattern.match(line)
+            if field_match and not line.strip().startswith('//'):
+                result.add_symbol("field", field_match.group(2), i + 1)
+
+        return result
+