[ST-1803] apps/summarization: add more robust AI response handling

partizipation · web-flow · commit 58ac75ffaffe · 2026-03-24T12:28:49.000+01:00
diff --git a/CHANGELOG.md b/CHANGELOG.md
@@ -18,8 +18,9 @@ This project (not yet) adheres to [Semantic Versioning](https://semver.org/spec/
 
 ### Changed
 
+- Summarization: resilient LLM JSON parsing with json-repair and Text trimming
 - Changed BMBF logo to BMFTR
-- Adjusted Newsletter emails.
+- Adjusted Newsletter emails
 - Installed HTMX rather than using script tag
 
 
diff --git a/agents.md b/agents.md
@@ -1,3 +1,7 @@
 ## Agent Note
 
 In this project **always run Python from the virtual environment (`venv`)** (e.g. `venv/bin/python` or `source venv/bin/activate`), not the system Python.
+
+**Code style:** Comments and variable names must always be in English.
+
+**Changelog:** Add entries **only** to the root `CHANGELOG.md`. Keep them **very short**; do not use separate changelog files or long prose.
diff --git a/apps/summarization/llm_json.py b/apps/summarization/llm_json.py
@@ -0,0 +1,40 @@
+"""Extract and parse JSON from LLM text (strip wrappers, repair, validate as Pydantic)."""
+
+from __future__ import annotations
+
+import re
+from typing import Any
+
+import json_repair
+from pydantic import BaseModel
+
+_FENCED_JSON = re.compile(r"```(?:json)?\s*([\s\S]*?)```", re.IGNORECASE)
+
+
+def extract_llm_json_payload(text: str) -> str:
+    """
+    Remove surrounding prose and markdown fences so the remainder is JSON-like.
+
+    - Strips ```json ... ``` (or ``` ... ```) blocks if present.
+    - Drops any leading characters before the first ``{`` or ``[``.
+    """
+    s = (text or "").strip()
+    if not s:
+        return s
+
+    m = _FENCED_JSON.search(s)
+    if m:
+        s = m.group(1).strip()
+
+    for i, ch in enumerate(s):
+        if ch in "{[":
+            return s[i:].strip()
+
+    return s
+
+
+def parse_structured_llm_json(raw_text: str, result_type: type[BaseModel]) -> BaseModel:
+    """Strip wrappers, repair with json_repair, validate to ``result_type``."""
+    payload = extract_llm_json_payload(raw_text)
+    data: Any = json_repair.loads(payload)
+    return result_type.model_validate(data)
diff --git a/apps/summarization/playground_errors.py b/apps/summarization/playground_errors.py
@@ -0,0 +1,136 @@
+"""Rich error formatting for AI summarization test playground (staff debugging)."""
+
+from __future__ import annotations
+
+from collections import deque
+from typing import Any
+
+
+def format_playground_exception(exc: BaseException, *, max_related: int = 16) -> str:
+    """
+    Build a multi-line, human-readable error for the summarization test UIs.
+
+    Works for any exception type. Adds optional Pydantic validation details when present.
+    """
+    lines: list[str] = []
+
+    def append_block(title: str, body: str) -> None:
+        if lines:
+            lines.append("")
+        lines.append(f"=== {title} ===")
+        lines.append(body.strip() if body else "(keine Meldung)")
+
+    append_block("Hauptfehler", f"{type(exc).__name__}: {exc}")
+
+    related = _collect_related_exceptions(exc, max_total=max_related)
+    if related:
+        parts = []
+        for i, link in enumerate(related, start=1):
+            parts.append(f"[{i}] {type(link).__name__}: {link}")
+        append_block("Verknüpfte Fehler (Ursache / Kontext / Gruppe)", "\n".join(parts))
+
+    pydantic_bits = _collect_pydantic_error_details(exc)
+    if pydantic_bits:
+        append_block("Validierungsdetails", "\n".join(pydantic_bits))
+
+    return "\n".join(lines)
+
+
+def _collect_related_exceptions(root: BaseException, *, max_total: int) -> list[BaseException]:
+    """
+    All exceptions reachable via __cause__, __context__, and ExceptionGroup.subexceptions.
+
+    Excludes `root` (already shown as Hauptfehler). Order: BFS, no duplicates.
+    """
+    out: list[BaseException] = []
+    seen: set[int] = set()
+    q: deque[BaseException] = deque()
+    seen.add(id(root))
+
+    def enqueue(e: BaseException | None) -> None:
+        if e is None or id(e) in seen:
+            return
+        seen.add(id(e))
+        q.append(e)
+
+    enqueue(getattr(root, "__cause__", None))
+    enqueue(getattr(root, "__context__", None))
+    _enqueue_exception_group_children(root, enqueue)
+
+    while q and len(out) < max_total:
+        cur = q.popleft()
+        out.append(cur)
+        enqueue(getattr(cur, "__cause__", None))
+        enqueue(getattr(cur, "__context__", None))
+        _enqueue_exception_group_children(cur, enqueue)
+
+    return out
+
+
+def _enqueue_exception_group_children(exc: BaseException, enqueue: Any) -> None:
+    subs = getattr(exc, "exceptions", None)
+    if not subs:
+        return
+    if type(exc).__name__ not in ("ExceptionGroup", "BaseExceptionGroup"):
+        return
+    for sub in subs:
+        if isinstance(sub, BaseException):
+            enqueue(sub)
+
+
+def _collect_pydantic_error_details(exc: BaseException, *, max_errors: int = 12) -> list[str]:
+    """Extract pydantic v2 ValidationError.errors() entries from an exception chain."""
+    out: list[str] = []
+    seen: set[int] = set()
+    stack: list[BaseException] = [exc]
+
+    while stack:
+        cur = stack.pop()
+        cid = id(cur)
+        if cid in seen:
+            continue
+        seen.add(cid)
+
+        err_fn = getattr(cur, "errors", None)
+        if callable(err_fn):
+            try:
+                raw = err_fn()
+            except Exception:
+                raw = None
+            if isinstance(raw, list) and raw:
+                for item in raw[:max_errors]:
+                    out.append(_format_one_pydantic_error(item))
+                if len(raw) > max_errors:
+                    out.append(f"... und {len(raw) - max_errors} weitere Fehler")
+                return out
+
+        for nxt in (getattr(cur, "__cause__", None), getattr(cur, "__context__", None)):
+            if isinstance(nxt, BaseException):
+                stack.append(nxt)
+
+    return out
+
+
+def _format_one_pydantic_error(item: Any) -> str:
+    if not isinstance(item, dict):
+        return str(item)
+    loc = item.get("loc")
+    loc_s = ".".join(str(x) for x in loc) if isinstance(loc, tuple) else str(loc)
+    msg = item.get("msg", "")
+    typ = item.get("type", "")
+    parts = [f"• {loc_s or '(root)'}: {msg}"]
+    if typ:
+        parts.append(f"  (Typ: {typ})")
+    inp = item.get("input")
+    if inp is not None:
+        snippet = _shorten_for_display(inp, limit=400)
+        parts.append(f"  Eingabe-Ausschnitt: {snippet!r}")
+    return "\n".join(parts)
+
+
+def _shorten_for_display(s: Any, *, limit: int) -> str:
+    text = s if isinstance(s, str) else repr(s)
+    text = text.replace("\r\n", "\n").replace("\r", "\n")
+    if len(text) <= limit:
+        return text
+    return text[: limit - 3] + "..."
diff --git a/apps/summarization/providers.py b/apps/summarization/providers.py
@@ -2,19 +2,34 @@
 
 import logging
 from abc import ABC
+from typing import TypeVar
+from typing import cast
 
 from django.conf import settings
 from pydantic import BaseModel
 from pydantic_ai import Agent
 from pydantic_ai import ImageUrl
+from pydantic_ai import TextOutput
 from pydantic_ai.models.mistral import MistralModel
 from pydantic_ai.models.openai import OpenAIChatModel
 from pydantic_ai.providers.mistral import MistralProvider
 from pydantic_ai.providers.openai import OpenAIProvider
 from sentry_sdk import capture_exception
 
+from .llm_json import parse_structured_llm_json
+from .pydantic_models import DocumentSummaryResponse
+
 logger = logging.getLogger(__name__)
 
+TModel = TypeVar("TModel", bound=BaseModel)
+
+
+def _make_json_parse_fn(result_type: type[TModel]):
+    def parse(text: str) -> TModel:
+        return parse_structured_llm_json(text, result_type)
+
+    return parse
+
 
 class ProviderConfig:
     """Configuration for an AI provider."""
@@ -176,8 +191,8 @@ def text_request(
         agent = Agent(
             model=model,
             system_prompt=self.system_prompt,
-            output_type=result_type,
-            tools=[],  # Disable tool_calls to avoid validation errors with non-standard providers
+            output_type=TextOutput(_make_json_parse_fn(result_type)),
+            tools=[],
         )
 
         try:
@@ -195,6 +210,7 @@ def text_request(
             capture_exception(e)
             raise
 
+    # Deprecate ? And Use text_request or vision_request instead ?
     def request(self, request: AIRequest, result_type: type[BaseModel]) -> BaseModel:
         """
         Automatically determines if it's a text or multimodal request.
@@ -203,24 +219,33 @@ def request(self, request: AIRequest, result_type: type[BaseModel]) -> BaseModel
         # Check if request supports vision (multimodal request)
         if getattr(request, "vision_support", False):
             image_urls = getattr(request, "image_urls", None) or []
-            return self.multimodal_request(request, result_type, image_urls)
+            if not issubclass(result_type, DocumentSummaryResponse):
+                raise TypeError(
+                    "Vision requests require result_type to be DocumentSummaryResponse or a subclass."
+                )
+            return self.multimodal_request(
+                request, cast(type[DocumentSummaryResponse], result_type), image_urls
+            )
         else:
             return self.text_request(request, result_type)
 
-    # TODO: Deprectaed ? Use separate Vison Requests instead ?
+    # Rename to vision_request instead and use only DocumentSummaryResponse for the result type?
     def multimodal_request(
-        self, request: AIRequest, result_type: type[BaseModel], image_urls: list[str]
-    ) -> BaseModel:
+        self,
+        request: AIRequest,
+        result_type: type[DocumentSummaryResponse],
+        image_urls: list[str],
+    ) -> DocumentSummaryResponse:
         """
         Execute a multimodal request with images using vision API.
 
         Args:
             request: Pydantic BaseModel with request data
-            result_type: Pydantic BaseModel class for structured output
+            result_type: DocumentSummaryResponse (or a subclass at runtime)
             image_urls: List of image URLs to include in the request
 
         Returns:
-            Structured response as BaseModel instance
+            Structured response instance
         """
         # Use MistralModel for Mistral, OpenAIChatModel for others
         # Note: Mistral may not support vision/multimodal requests
@@ -241,9 +266,9 @@ def multimodal_request(
         agent = Agent(
             model=model,
             system_prompt=self.system_prompt,
-            output_type=result_type,
-            output_retries=3,  # Allow more retries for vision output validation
-            tools=[],  # Disable tool_calls to avoid validation errors with non-standard providers
+            output_type=TextOutput(_make_json_parse_fn(result_type)),
+            output_retries=3,
+            tools=[],
         )
 
         # Build user content with prompt and image URLs
diff --git a/apps/summarization/templates/summarization/test.html b/apps/summarization/templates/summarization/test.html
@@ -80,6 +80,19 @@
             border-left-color: #dc3545;
             color: #721c24;
         }
+        .error pre.error-detail {
+            margin: 10px 0 0 0;
+            padding: 12px;
+            background: #fff;
+            border: 1px solid #f5c6cb;
+            border-radius: 4px;
+            white-space: pre-wrap;
+            word-break: break-word;
+            font-family: ui-monospace, "Courier New", monospace;
+            font-size: 13px;
+            line-height: 1.45;
+            color: #491217;
+        }
         .summary {
             margin-top: 15px;
             margin-bottom: 20px;
@@ -169,7 +182,8 @@ <h1>AI Summarization Test</h1>
         
         {% if error %}
         <div class="result error">
-            <strong>Error:</strong> {{ error }}
+            <strong>Fehlerdetails</strong>
+            <pre class="error-detail">{{ error }}</pre>
         </div>
         {% endif %}
 
diff --git a/apps/summarization/templates/summarization/test_documents.html b/apps/summarization/templates/summarization/test_documents.html
@@ -70,6 +70,19 @@
             border-left-color: #dc3545;
             color: #721c24;
         }
+        .error pre.error-detail {
+            margin: 10px 0 0 0;
+            padding: 12px;
+            background: #fff;
+            border: 1px solid #f5c6cb;
+            border-radius: 4px;
+            white-space: pre-wrap;
+            word-break: break-word;
+            font-family: ui-monospace, "Courier New", monospace;
+            font-size: 13px;
+            line-height: 1.45;
+            color: #491217;
+        }
         .document-item {
             margin-top: 15px;
             margin-bottom: 20px;
@@ -152,7 +165,8 @@ <h1>Document Summarization Test</h1>
         
         {% if error %}
         <div class="result error">
-            <strong>Error:</strong> {{ error }}
+            <strong>Fehlerdetails</strong>
+            <pre class="error-detail">{{ error }}</pre>
         </div>
         {% endif %}
         
diff --git a/apps/summarization/views.py b/apps/summarization/views.py
@@ -11,6 +11,7 @@
 from adhocracy4.projects.models import Project
 
 from .export_utils.core import generate_full_export
+from .playground_errors import format_playground_exception
 from .pydantic_models import DocumentInputItem
 from .pydantic_models import ProjectSummaryResponse
 from .services import AIService
@@ -80,7 +81,7 @@ def _handle_text_request(
         except BaseException as e:
             if isinstance(e, (KeyboardInterrupt, SystemExit, GeneratorExit)):
                 raise
-            return None, 0, str(e)
+            return None, 0, format_playground_exception(e)
 
     def _extract_project_from_json(self, text: str):
         """Extract project information from JSON text if available."""
@@ -236,9 +237,9 @@ def post(self, request):
                 context["summary_response"] = response
 
             except json.JSONDecodeError as e:
-                context["error"] = f"Invalid JSON: {str(e)}"
+                context["error"] = format_playground_exception(e)
             except Exception as e:
-                context["error"] = str(e)
+                context["error"] = format_playground_exception(e)
         else:
             context["error"] = "Please provide documents in JSON format"
 
diff --git a/requirements/base.txt b/requirements/base.txt
diff --git a/tests/summarization/test_llm_json.py b/tests/summarization/test_llm_json.py