livekit · tpirc3 · Feb 26, 2026 · Feb 26, 2026 · Feb 26, 2026 · Feb 26, 2026
diff --git a/livekit-agents/livekit/agents/cli/cli.py b/livekit-agents/livekit/agents/cli/cli.py
@@ -1300,7 +1300,7 @@ def _print_run_event(
             )
         )
     elif event.type == "function_call_output":
-        output = event.item.output
+        output = llm.utils.tool_output_to_text(event.item.output)
         display_output = output
         is_error = output.lower().startswith("error") or output.lower().startswith("exception")
 

diff --git a/livekit-agents/livekit/agents/evals/judge.py b/livekit-agents/livekit/agents/evals/judge.py
@@ -46,10 +46,11 @@ def _format_items(items: list) -> str:
         elif item.type == "function_call":
             parts.append(f"[function call: {item.name}({item.arguments})]")
         elif item.type == "function_call_output":
+            output_text = llm_utils.tool_output_to_text(item.output)
             if item.is_error:
-                parts.append(f"[function error: {item.output}]")
+                parts.append(f"[function error: {output_text}]")
             else:
-                parts.append(f"[function output: {item.output}]")
+                parts.append(f"[function output: {output_text}]")
         elif item.type == "agent_handoff":
             parts.append(f"[agent handoff: {item.old_agent_id} -> {item.new_agent_id}]")
         elif item.type == "agent_config_update":

diff --git a/livekit-agents/livekit/agents/inference/llm.py b/livekit-agents/livekit/agents/inference/llm.py
@@ -294,6 +294,7 @@ def __init__(
         conn_options: APIConnectOptions,
         extra_kwargs: dict[str, Any],
         provider_fmt: str = "openai",  # used internally for chat_ctx format
+        provider_format_kwargs: dict[str, Any] | None = None,
     ) -> None:
         super().__init__(llm_v, chat_ctx=chat_ctx, tools=tools, conn_options=conn_options)
         self._model = model
@@ -303,6 +304,7 @@ def __init__(
         self._client = client
         self._llm = llm_v
         self._extra_kwargs = drop_unsupported_params(model, extra_kwargs)
+        self._provider_format_kwargs = provider_format_kwargs or {}
         self._tool_ctx = llm.ToolContext(tools)
 
     async def _run(self) -> None:
@@ -317,7 +319,9 @@ async def _run(self) -> None:
         retryable = True
 
         try:
-            chat_ctx, _ = self._chat_ctx.to_provider_format(format=self._provider_fmt)
+            chat_ctx, _ = self._chat_ctx.to_provider_format(
+                format=self._provider_fmt, **self._provider_format_kwargs
+            )
             tool_schemas = cast(
                 list[ChatCompletionToolParam],
                 self._tool_ctx.parse_function_tools("openai", strict=self._strict_tool_schema),

diff --git a/livekit-agents/livekit/agents/llm/_provider_format/anthropic.py b/livekit-agents/livekit/agents/llm/_provider_format/anthropic.py
@@ -61,13 +61,22 @@ def to_chat_ctx(
                 }
             )
         elif msg.type == "function_call_output":
-            result_content: list[Any] | str = msg.output
-            try:
-                parsed = json.loads(msg.output)
-                if isinstance(parsed, list):
-                    result_content = parsed
-            except (json.JSONDecodeError, TypeError):
-                pass
+            result_content: list[Any] | str
+            if isinstance(msg.output, str):
+                result_content = msg.output
+                try:
+                    parsed = json.loads(msg.output)
+                    if isinstance(parsed, list):
+                        result_content = parsed
+                except (json.JSONDecodeError, TypeError):
+                    pass
+            else:
+                result_content = []
+                for part in llm.utils.tool_output_parts(msg.output):
+                    if isinstance(part, str):
+                        result_content.append({"type": "text", "text": part})
+                    else:
+                        result_content.append(_to_image_content(part))
             content.append(
                 {
                     "tool_use_id": msg.call_id,

diff --git a/livekit-agents/livekit/agents/llm/_provider_format/aws.py b/livekit-agents/livekit/agents/llm/_provider_format/aws.py
@@ -59,16 +59,27 @@ def to_chat_ctx(
                 }
             )
         elif msg.type == "function_call_output":
+            tool_result_content: list[dict[str, Any]]
+            if msg.is_error:
+                tool_result_content = [{"text": llm.utils.tool_output_to_text(msg.output)}]
+            else:
+                tool_result_content = []
+                for part in llm.utils.tool_output_parts(msg.output):
+                    if isinstance(part, str):
+                        tool_result_content.append({"text": part})
+                    else:
+                        try:
+                            tool_result_content.append(_build_image(part))
+                        except ValueError:
+                            tool_result_content.append(
+                                {"text": llm.utils.TOOL_OUTPUT_IMAGE_PLACEHOLDER}
+                            )
             current_content.append(
                 {
                     "toolResult": {
                         "toolUseId": msg.call_id,
-                        "content": [
-                            {"json": msg.output}
-                            if isinstance(msg.output, dict)
-                            else {"text": msg.output}
-                        ],
-                        "status": "success",
+                        "content": tool_result_content,
+                        "status": "error" if msg.is_error else "success",
                     }
                 }
             )
@@ -95,12 +106,28 @@ def _build_image(image: llm.ImageContent) -> dict:
 
     return {
         "image": {
-            "format": "jpeg",
+            "format": _bedrock_image_format_from_mime_type(img.mime_type),
             "source": {"bytes": img.data_bytes},
         }
     }
 
 
+def _bedrock_image_format_from_mime_type(mime_type: str | None) -> str:
+    if not mime_type:
+        return "jpeg"
+
+    mime_base = mime_type.split(";", 1)[0].strip().lower()
+    if mime_base in {"image/jpeg", "image/jpg", "image/pjpeg"}:
+        return "jpeg"
+
+    if mime_base.startswith("image/"):
+        image_format = mime_base.split("/", 1)[1]
+        if image_format in {"png", "gif", "webp"}:
+            return image_format
+
+    raise ValueError(f"Unsupported mime_type {mime_type!r} for AWS Bedrock image format.")
+
+
 def to_fnc_ctx(tool_ctx: llm.ToolContext) -> list[dict[str, Any]]:
     return [_build_tool_spec(tool) for tool in tool_ctx.function_tools.values()]
 

diff --git a/livekit-agents/livekit/agents/llm/_provider_format/google.py b/livekit-agents/livekit/agents/llm/_provider_format/google.py
@@ -68,16 +68,33 @@ def to_chat_ctx(
                 fc_part["thought_signature"] = sig
             parts.append(fc_part)
         elif msg.type == "function_call_output":
-            response = {"output": msg.output} if not msg.is_error else {"error": msg.output}
-            parts.append(
-                {
-                    "function_response": {
-                        "id": msg.call_id,
-                        "name": msg.name,
-                        "response": response,
+            if msg.is_error:
+                response = {"error": llm.utils.tool_output_to_text(msg.output)}
+                parts.append(
+                    {
+                        "function_response": {
+                            "id": msg.call_id,
+                            "name": msg.name,
+                            "response": response,
+                        }
                     }
+                )
+            else:
+                text_output = llm.utils.tool_output_to_text(
+                    msg.output, include_image_placeholder=False
+                )
+                _, image_parts = llm.utils.split_tool_output_parts(msg.output)
+                response_payload: dict[str, Any] = {}
+                if text_output:
+                    response_payload["output"] = text_output
+                function_response: dict[str, Any] = {
+                    "id": msg.call_id,
+                    "name": msg.name,
+                    "response": response_payload,
                 }
-            )
+                if image_parts:
+                    function_response["parts"] = [_to_image_part(image) for image in image_parts]
+                parts.append({"function_response": function_response})
 
     if current_role is not None and parts:
         turns.append({"role": current_role, "parts": parts})

diff --git a/livekit-agents/livekit/agents/llm/_provider_format/openai.py b/livekit-agents/livekit/agents/llm/_provider_format/openai.py
@@ -4,12 +4,16 @@
 from typing import Any, Literal
 
 from livekit.agents import llm
+from livekit.agents.log import logger
 
 from .utils import group_tool_calls
 
 
 def to_chat_ctx(
-    chat_ctx: llm.ChatContext, *, inject_dummy_user_message: bool = True
+    chat_ctx: llm.ChatContext,
+    *,
+    inject_dummy_user_message: bool = True,
+    supports_tool_image_output: bool = False,
 ) -> tuple[list[dict], Literal[None]]:
     item_groups = group_tool_calls(chat_ctx)
     messages = []
@@ -36,7 +40,12 @@ def to_chat_ctx(
 
         # append tool outputs following the tool calls
         for tool_output in group.tool_outputs:
-            messages.append(_to_chat_item(tool_output))
+            messages.append(
+                _to_chat_tool_output_item(
+                    tool_output,
+                    supports_tool_image_output=supports_tool_image_output,
+                )
+            )
 
     return messages, None
 
@@ -84,16 +93,28 @@ def _to_chat_item(msg: llm.ChatItem) -> dict[str, Any]:
             "tool_calls": [tc],
         }
 
-    elif msg.type == "function_call_output":
-        return {
-            "role": "tool",
-            "tool_call_id": msg.call_id,
-            "content": msg.output,
-        }
-
     raise ValueError(f"unsupported message type: {msg.type}")
 
 
+def _to_chat_tool_output_item(
+    msg: llm.ChatItem, *, supports_tool_image_output: bool
+) -> dict[str, Any]:
+    if msg.type != "function_call_output":
+        raise ValueError(f"unsupported message type: {msg.type}")
+
+    content: str | list[dict[str, Any]]
+    if supports_tool_image_output:
+        content = _to_chat_tool_output_content(msg.output)
+    else:
+        content = llm.utils.tool_output_to_text(msg.output)
+
+    return {
+        "role": "tool",
+        "tool_call_id": msg.call_id,
+        "content": content,
+    }
+
+
 def _to_image_content(image: llm.ImageContent) -> dict[str, Any]:
     img = llm.utils.serialize_image(image)
     if img.external_url:
@@ -115,6 +136,27 @@ def _to_image_content(image: llm.ImageContent) -> dict[str, Any]:
     }
 
 
+def _to_chat_tool_output_content(output: Any) -> str | list[dict[str, Any]]:
+    text_parts, image_parts = llm.utils.split_tool_output_parts(output)
+    if not image_parts:
+        return llm.utils.tool_output_to_text(output, include_image_placeholder=False)
+
+    parts: list[dict[str, Any]] = []
+    for part in llm.utils.tool_output_parts(output):
+        if isinstance(part, str):
+            parts.append({"type": "text", "text": part})
+            continue
+        try:
+            parts.append(_to_image_content(part))
+        except ValueError as e:
+            logger.warning(
+                "Failed to serialize tool output image for openai chat format", exc_info=e
+            )
+            parts.append({"type": "text", "text": llm.utils.TOOL_OUTPUT_IMAGE_PLACEHOLDER})
+
+    return parts
+
+
 def _to_responses_image_content(image: llm.ImageContent) -> dict[str, Any]:
     img = llm.utils.serialize_image(image)
     if img.external_url:
@@ -184,7 +226,7 @@ def _to_responses_chat_item(msg: llm.ChatItem) -> dict[str, Any]:
         return {
             "type": "function_call_output",
             "call_id": msg.call_id,
-            "output": msg.output,
+            "output": _to_responses_tool_output(msg.output),
         }
 
     raise ValueError(f"unsupported message type: {msg.type}")
@@ -212,6 +254,21 @@ def to_fnc_ctx(tool_ctx: llm.ToolContext, *, strict: bool = True) -> list[dict[s
     return schemas
 
 
+def _to_responses_tool_output(output: Any) -> str | list[dict[str, Any]]:
+    normalized = llm.utils.normalize_function_output_value(output)
+    if isinstance(normalized, str):
+        return normalized
+
+    parts: list[dict[str, Any]] = []
+    for part in llm.utils.tool_output_parts(normalized):
+        if isinstance(part, str):
+            parts.append({"type": "input_text", "text": part})
+        else:
+            parts.append(_to_responses_image_content(part))
+
+    return parts or ""
+
+
 def to_responses_fnc_ctx(tool_ctx: llm.ToolContext, *, strict: bool = True) -> list[dict[str, Any]]:
     from livekit.plugins import openai
 

diff --git a/livekit-agents/livekit/agents/llm/chat_context.py b/livekit-agents/livekit/agents/llm/chat_context.py
@@ -174,6 +174,8 @@ def text_content(self) -> str | None:
 
 
 ChatContent: TypeAlias = ImageContent | AudioContent | str
+ToolOutputContent: TypeAlias = str | ImageContent
+FunctionCallOutputValue: TypeAlias = ToolOutputContent | list[ToolOutputContent]
 
 
 class FunctionCall(BaseModel):
@@ -197,7 +199,7 @@ class FunctionCallOutput(BaseModel):
     type: Literal["function_call_output"] = Field(default="function_call_output")
     name: str = Field(default="")
     call_id: str
-    output: str
+    output: FunctionCallOutputValue
     is_error: bool
     created_at: float = Field(default_factory=time.time)
 
@@ -459,6 +461,16 @@ def to_dict(
                     item.content = [c for c in item.content if not isinstance(c, ImageContent)]
                 if exclude_audio:
                     item.content = [c for c in item.content if not isinstance(c, AudioContent)]
+            elif item.type == "function_call_output" and exclude_image:
+                item = item.model_copy()
+                if isinstance(item.output, ImageContent):
+                    item.output = ""
+                elif isinstance(item.output, list):
+                    filtered: list[ToolOutputContent] = []
+                    for content in item.output:
+                        if isinstance(content, str):
+                            filtered.append(content)
+                    item.output = filtered if filtered else ""
 
             items.append(item)
 
@@ -483,7 +495,16 @@ def to_dict(
     @overload
     def to_provider_format(
         self,
-        format: Literal["openai", "openai.responses"],
+        format: Literal["openai"],
+        *,
+        inject_dummy_user_message: bool = True,
+        supports_tool_image_output: bool = False,
+    ) -> tuple[list[dict], Literal[None]]: ...
+
+    @overload
+    def to_provider_format(
+        self,
+        format: Literal["openai.responses"],
         *,
         inject_dummy_user_message: bool = True,
     ) -> tuple[list[dict], Literal[None]]: ...