Skip to content

Commit 524d69f

Browse files
committed
fix(openai): preserve multimodal tool output ordering
1 parent cb2b06d commit 524d69f

File tree

2 files changed

+36
-3
lines changed

2 files changed

+36
-3
lines changed

livekit-agents/livekit/agents/llm/_provider_format/openai.py

Lines changed: 6 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -141,10 +141,13 @@ def _to_chat_tool_output_content(output: Any) -> str | list[dict[str, Any]]:
141141
if not image_parts:
142142
return llm.utils.tool_output_to_text(output, include_image_placeholder=False)
143143

144-
parts: list[dict[str, Any]] = [{"type": "text", "text": text} for text in text_parts]
145-
for image in image_parts:
144+
parts: list[dict[str, Any]] = []
145+
for part in llm.utils.tool_output_parts(output):
146+
if isinstance(part, str):
147+
parts.append({"type": "text", "text": part})
148+
continue
146149
try:
147-
parts.append(_to_image_content(image))
150+
parts.append(_to_image_content(part))
148151
except ValueError as e:
149152
logger.warning(
150153
"Failed to serialize tool output image for openai chat format", exc_info=e

tests/test_tool_output_multimodal.py

Lines changed: 30 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -184,6 +184,36 @@ def test_openai_chat_supports_multimodal_tool_output_when_enabled() -> None:
184184
assert content[1]["type"] == "image_url"
185185

186186

187+
def test_openai_chat_preserves_multimodal_tool_output_order_when_enabled() -> None:
188+
image_1 = ImageContent(image="data:image/png;base64,aW1nMQ==")
189+
image_2 = ImageContent(image="data:image/png;base64,aW1nMg==")
190+
chat_ctx = ChatContext(
191+
items=[
192+
_fnc_call(),
193+
FunctionCallOutput(
194+
name="capture",
195+
call_id="call_1",
196+
output=["before", image_1, "middle", image_2, "after"],
197+
is_error=False,
198+
),
199+
]
200+
)
201+
202+
openai_messages, _ = chat_ctx.to_provider_format(
203+
"openai",
204+
inject_dummy_user_message=False,
205+
supports_tool_image_output=True,
206+
)
207+
content = openai_messages[1]["content"]
208+
assert isinstance(content, list)
209+
assert [part["type"] for part in content] == ["text", "image_url", "text", "image_url", "text"]
210+
assert content[0]["text"] == "before"
211+
assert content[1]["image_url"]["url"].endswith("aW1nMQ==")
212+
assert content[2]["text"] == "middle"
213+
assert content[3]["image_url"]["url"].endswith("aW1nMg==")
214+
assert content[4]["text"] == "after"
215+
216+
187217
def test_openai_chat_keeps_text_tool_output_as_string_when_enabled() -> None:
188218
chat_ctx = ChatContext(
189219
items=[

0 commit comments

Comments
 (0)