Skip to content

Commit ccf0c23

Browse files
committed
refactor: implement pluggable analyzer architecture with improved code quality
Major refactoring to replace hardcoded language analysis with a flexible, extensible analyzer framework: Architecture improvements: - Create pluggable analyzer system with AnalyzerFactory - Implement standardized AnalysisResult for consistent output - Add language-specific analyzers (Python, JavaScript, Java, Objective-C) - Remove 248 lines of duplicated analysis code from server.py Code quality fixes: - Fix import order following PEP 8 standards - Replace broad Exception handlers with specific exception types - Add proper type annotations with Optional types - Define constants for magic numbers (ReDoS protection) - Remove unused variables and improve variable naming Performance optimizations: - Move regex compilation to analyzer initialization (performance boost) - Implement backwards-compatible to_dict() method - Add error handling with graceful fallbacks The new analyzer framework is extensible and maintains API compatibility while providing better maintainability and performance.
1 parent dd223a0 commit ccf0c23

File tree

11 files changed

+646
-264
lines changed

11 files changed

+646
-264
lines changed
Lines changed: 22 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,22 @@
1+
"""Language analyzers for code analysis."""
2+
3+
from .base_analyzer import LanguageAnalyzer
4+
from .analyzer_factory import AnalyzerFactory
5+
from .analysis_result import AnalysisResult, Symbol
6+
from .python_analyzer import PythonAnalyzer
7+
from .javascript_analyzer import JavaScriptAnalyzer
8+
from .java_analyzer import JavaAnalyzer
9+
from .objective_c_analyzer import ObjectiveCAnalyzer
10+
from .default_analyzer import DefaultAnalyzer
11+
12+
__all__ = [
13+
'LanguageAnalyzer',
14+
'AnalyzerFactory',
15+
'AnalysisResult',
16+
'Symbol',
17+
'PythonAnalyzer',
18+
'JavaScriptAnalyzer',
19+
'JavaAnalyzer',
20+
'ObjectiveCAnalyzer',
21+
'DefaultAnalyzer',
22+
]
Lines changed: 96 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,96 @@
1+
"""Standardized analysis result structure."""
2+
3+
from dataclasses import dataclass, field
4+
from typing import Dict, List, Any, Optional
5+
6+
7+
@dataclass
8+
class Symbol:
9+
"""Represents a code symbol (function, class, etc.)."""
10+
name: str
11+
line: int
12+
symbol_type: str # 'function', 'class', 'import', 'variable', etc.
13+
metadata: Dict[str, Any] = field(default_factory=dict)
14+
15+
16+
@dataclass
17+
class AnalysisResult:
18+
"""Standardized result structure for all analyzers."""
19+
# Basic file information
20+
file_path: str
21+
line_count: int
22+
size_bytes: int
23+
extension: str
24+
analysis_type: str
25+
26+
# Symbols found in the file
27+
symbols: Dict[str, List[Symbol]] = field(default_factory=dict)
28+
29+
# Summary counts
30+
counts: Dict[str, int] = field(default_factory=dict)
31+
32+
# Language-specific metadata
33+
metadata: Dict[str, Any] = field(default_factory=dict)
34+
35+
# Error information if analysis failed
36+
error: Optional[str] = None
37+
38+
def add_symbol(self, symbol_type: str, name: str, line: int, metadata: Dict[str, Any] = None):
39+
"""Add a symbol to the result."""
40+
if symbol_type not in self.symbols:
41+
self.symbols[symbol_type] = []
42+
43+
symbol = Symbol(
44+
name=name,
45+
line=line,
46+
symbol_type=symbol_type,
47+
metadata=metadata or {}
48+
)
49+
self.symbols[symbol_type].append(symbol)
50+
51+
# Update counts
52+
count_key = f"{symbol_type}_count"
53+
self.counts[count_key] = self.counts.get(count_key, 0) + 1
54+
55+
def get_symbols(self, symbol_type: str) -> List[Symbol]:
56+
"""Get symbols of a specific type."""
57+
return self.symbols.get(symbol_type, [])
58+
59+
def get_count(self, symbol_type: str) -> int:
60+
"""Get count of symbols of a specific type."""
61+
return self.counts.get(f"{symbol_type}_count", 0)
62+
63+
def to_dict(self) -> Dict[str, Any]:
64+
"""Convert to dictionary for backwards compatibility."""
65+
result = {
66+
"file_path": self.file_path,
67+
"line_count": self.line_count,
68+
"size_bytes": self.size_bytes,
69+
"extension": self.extension,
70+
"analysis_type": self.analysis_type,
71+
}
72+
73+
# Add error if present
74+
if self.error:
75+
result["error"] = self.error
76+
return result
77+
78+
# Add symbol lists (backwards compatibility)
79+
for symbol_type, symbols in self.symbols.items():
80+
if symbol_type == "import":
81+
# Special handling for imports - return strings for backwards compatibility
82+
result["imports"] = [s.name for s in symbols]
83+
else:
84+
# Return list of dicts for other symbols
85+
result[f"{symbol_type}s"] = [
86+
{"line": s.line, "name": s.name, **s.metadata}
87+
for s in symbols
88+
]
89+
90+
# Add counts
91+
result.update(self.counts)
92+
93+
# Add metadata
94+
result.update(self.metadata)
95+
96+
return result
Lines changed: 80 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,80 @@
1+
"""Factory for creating language-specific analyzers."""
2+
3+
from typing import Dict, Type, Optional
4+
from .base_analyzer import LanguageAnalyzer
5+
from .default_analyzer import DefaultAnalyzer
6+
from .python_analyzer import PythonAnalyzer
7+
from .javascript_analyzer import JavaScriptAnalyzer
8+
from .java_analyzer import JavaAnalyzer
9+
from .objective_c_analyzer import ObjectiveCAnalyzer
10+
11+
12+
class AnalyzerFactory:
13+
"""Factory class for creating language-specific analyzers."""
14+
15+
_analyzers: Dict[str, Type[LanguageAnalyzer]] = {}
16+
17+
@classmethod
18+
def register(cls, extensions: list[str], analyzer_class: Type[LanguageAnalyzer]) -> None:
19+
"""
20+
Register an analyzer for specific file extensions.
21+
22+
Args:
23+
extensions: List of file extensions (e.g., ['.py', '.pyx'])
24+
analyzer_class: The analyzer class to register
25+
"""
26+
for extension in extensions:
27+
cls._analyzers[extension.lower()] = analyzer_class
28+
29+
@classmethod
30+
def get_analyzer(cls, extension: str) -> LanguageAnalyzer:
31+
"""
32+
Get an analyzer instance for the given file extension.
33+
34+
Args:
35+
extension: The file extension (e.g., '.py')
36+
37+
Returns:
38+
Language analyzer instance, or DefaultAnalyzer if not found
39+
"""
40+
extension = extension.lower()
41+
analyzer_class = cls._analyzers.get(extension, DefaultAnalyzer)
42+
# Create instance
43+
return analyzer_class()
44+
45+
@classmethod
46+
def get_supported_extensions(cls) -> list[str]:
47+
"""
48+
Get all supported file extensions.
49+
50+
Returns:
51+
List of all registered extensions
52+
"""
53+
return list(cls._analyzers.keys())
54+
55+
@classmethod
56+
def is_extension_supported(cls, extension: str) -> bool:
57+
"""
58+
Check if an extension has a specific analyzer.
59+
60+
Args:
61+
extension: The file extension to check
62+
63+
Returns:
64+
True if a specific analyzer exists for the extension
65+
"""
66+
return extension.lower() in cls._analyzers
67+
68+
69+
# Initialize factory with built-in analyzers
70+
def _initialize_factory():
71+
"""Initialize the factory with built-in analyzers."""
72+
# Register analyzers
73+
AnalyzerFactory.register(['.py'], PythonAnalyzer)
74+
AnalyzerFactory.register(['.js', '.jsx', '.ts', '.tsx', '.mjs', '.cjs'], JavaScriptAnalyzer)
75+
AnalyzerFactory.register(['.java'], JavaAnalyzer)
76+
AnalyzerFactory.register(['.m', '.mm'], ObjectiveCAnalyzer)
77+
78+
79+
# Initialize on import
80+
_initialize_factory()
Lines changed: 92 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,92 @@
1+
"""Base analyzer interface for language-specific code analysis."""
2+
3+
from abc import ABC, abstractmethod
4+
from typing import Dict, Any, List, Optional
5+
import os
6+
import re
7+
from .analysis_result import AnalysisResult
8+
9+
10+
class LanguageAnalyzer(ABC):
11+
"""Abstract base class for language-specific code analyzers."""
12+
13+
@abstractmethod
14+
def analyze(self, content: str, file_path: str, full_path: str = None) -> AnalysisResult:
15+
"""
16+
Analyze the content of a file and return structured information.
17+
18+
Args:
19+
content: The file content as a string
20+
file_path: The relative path of the file
21+
full_path: The absolute path of the file (optional)
22+
23+
Returns:
24+
AnalysisResult containing structured analysis information
25+
"""
26+
27+
28+
def _count_lines(self, content: str) -> int:
29+
"""Count the number of lines in the content."""
30+
return len(content.splitlines())
31+
32+
def _get_file_size(self, content: str, full_path: str = None) -> int:
33+
"""Get the file size in bytes."""
34+
if full_path:
35+
try:
36+
return os.path.getsize(full_path)
37+
except (OSError, IOError):
38+
pass
39+
# Fallback to content size in bytes
40+
return len(content.encode('utf-8'))
41+
42+
def _filter_comments_and_empty_lines(self, lines: List[str], comment_patterns: List[str] = None) -> List[str]:
43+
"""Filter out comments and empty lines."""
44+
if comment_patterns is None:
45+
comment_patterns = ['//', '#', '/*', '*', '--']
46+
47+
filtered_lines = []
48+
in_multiline_comment = False
49+
50+
for line in lines:
51+
stripped = line.strip()
52+
53+
# Skip empty lines
54+
if not stripped:
55+
continue
56+
57+
# Handle multiline comments
58+
if '/*' in stripped:
59+
in_multiline_comment = True
60+
if '*/' in stripped:
61+
in_multiline_comment = False
62+
continue
63+
if in_multiline_comment:
64+
continue
65+
66+
# Skip single line comments
67+
is_comment = False
68+
for pattern in comment_patterns:
69+
if stripped.startswith(pattern):
70+
is_comment = True
71+
break
72+
73+
if not is_comment:
74+
filtered_lines.append(stripped)
75+
76+
return filtered_lines
77+
78+
# Constants for ReDoS protection
79+
MAX_PATTERN_LENGTH = 500
80+
MAX_WILDCARD_COUNT = 10
81+
82+
def _safe_regex_match(self, pattern: str, text: str) -> Optional[re.Match]:
83+
"""Safely match regex pattern with timeout protection."""
84+
try:
85+
# Simple pattern validation to prevent ReDoS
86+
if (len(pattern) > self.MAX_PATTERN_LENGTH or
87+
pattern.count('*') > self.MAX_WILDCARD_COUNT or
88+
pattern.count('+') > self.MAX_WILDCARD_COUNT):
89+
return None
90+
return re.match(pattern, text)
91+
except re.error:
92+
return None
Lines changed: 23 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,23 @@
1+
"""Default analyzer for basic file information."""
2+
3+
import os
4+
from typing import Dict, Any
5+
from .base_analyzer import LanguageAnalyzer
6+
from .analysis_result import AnalysisResult
7+
8+
9+
class DefaultAnalyzer(LanguageAnalyzer):
10+
"""Default analyzer that provides basic file information."""
11+
12+
def analyze(self, content: str, file_path: str, full_path: str = None) -> AnalysisResult:
13+
"""Provide basic file analysis."""
14+
_, ext = os.path.splitext(file_path)
15+
16+
return AnalysisResult(
17+
file_path=file_path,
18+
line_count=self._count_lines(content),
19+
size_bytes=self._get_file_size(content, full_path),
20+
extension=ext,
21+
analysis_type="basic"
22+
)
23+
Lines changed: 77 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,77 @@
1+
"""Java language analyzer."""
2+
3+
import os
4+
import re
5+
from typing import Dict, Any
6+
from .base_analyzer import LanguageAnalyzer
7+
from .analysis_result import AnalysisResult
8+
9+
10+
class JavaAnalyzer(LanguageAnalyzer):
11+
"""Analyzer for Java files."""
12+
13+
def __init__(self):
14+
"""Initialize with compiled regex patterns for performance."""
15+
self.import_pattern = re.compile(r'^import\s+([\w.]+);')
16+
self.class_pattern = re.compile(r'^(public\s+|protected\s+|private\s+)?(static\s+)?(abstract\s+)?(final\s+)?class\s+(\w+)')
17+
self.method_pattern = re.compile(r'^(public|protected|private|static|final|abstract|synchronized|native|strictfp|\s)+[\w<>\[\]]+\s+(\w+)\s*\([^)]*\)')
18+
self.field_pattern = re.compile(r'^(public|protected|private|static|final|transient|volatile|\s)+[\w<>\[\]]+\s+(\w+)\s*(=|;)')
19+
20+
def analyze(self, content: str, file_path: str, full_path: str = None) -> AnalysisResult:
21+
"""Analyze Java file content."""
22+
lines = content.splitlines()
23+
24+
# Create result object
25+
_, ext = os.path.splitext(file_path)
26+
result = AnalysisResult(
27+
file_path=file_path,
28+
line_count=self._count_lines(content),
29+
size_bytes=self._get_file_size(content, full_path),
30+
extension=ext,
31+
analysis_type="java"
32+
)
33+
34+
# Java-specific analysis using pre-compiled patterns
35+
36+
in_multiline_comment = False
37+
38+
for i, line in enumerate(lines):
39+
line = line.strip()
40+
41+
# Skip comments and empty lines
42+
if not line or line.startswith('//'):
43+
continue
44+
45+
# Handle multiline comments
46+
if '/*' in line:
47+
in_multiline_comment = True
48+
if '*/' in line:
49+
in_multiline_comment = False
50+
continue
51+
if in_multiline_comment:
52+
continue
53+
54+
# Check for imports
55+
import_match = self.import_pattern.match(line)
56+
if import_match:
57+
result.add_symbol("import", import_match.group(1), i + 1)
58+
59+
# Check for class definitions
60+
class_match = self.class_pattern.match(line)
61+
if class_match:
62+
modifiers = [m for m in class_match.groups()[:4] if m and m.strip()]
63+
result.add_symbol("class", class_match.group(5), i + 1,
64+
{"modifiers": modifiers})
65+
66+
# Check for method definitions
67+
method_match = self.method_pattern.match(line)
68+
if method_match and not line.strip().endswith(';'):
69+
result.add_symbol("function", method_match.group(2), i + 1)
70+
71+
# Check for field definitions
72+
field_match = self.field_pattern.match(line)
73+
if field_match and not line.strip().startswith('//'):
74+
result.add_symbol("field", field_match.group(2), i + 1)
75+
76+
return result
77+

0 commit comments

Comments
 (0)