Skip to content
Open
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
19 changes: 19 additions & 0 deletions livekit-agents/livekit/agents/inference/stt.py
Original file line number Diff line number Diff line change
Expand Up @@ -47,6 +47,7 @@
AssemblyAIModels = Literal[
"assemblyai/universal-streaming",
"assemblyai/universal-streaming-multilingual",
"assemblyai/u3-rt-pro",
]
ElevenlabsModels = Literal["elevenlabs/scribe_v2_realtime",]

Expand Down Expand Up @@ -76,6 +77,7 @@ class AssemblyaiOptions(TypedDict, total=False):
min_end_of_turn_silence_when_confident: int # default: 0
max_turn_silence: int # default: not specified
keyterms_prompt: list[str] # default: not specified
prompt: str # default: not specified (u3-rt-pro only, mutually exclusive with keyterms_prompt)


class ElevenlabsOptions(TypedDict, total=False):
Expand Down Expand Up @@ -437,6 +439,21 @@ def __init__(
self._reconnect_event = asyncio.Event()
self._speaking = False
self._speech_duration: float = 0
self._ws: aiohttp.ClientWebSocketResponse | None = None

async def update_session(self, *, extra_kwargs: dict[str, Any]) -> None:
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Adding the support makes sense, but this function is not called anywhere. Do we expect more changes to come?

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

When u3-rt-pro is enabled in gateway this will allow mid-session updates by users (will also be able to do it for deepgram-flux when its updated in gateway)

There is an example here https://github.com/livekit/e2e/pull/908/changes#diff-be337c20cd29f5768663fa4227a69096f86c10817d41be3a47aa9b949619ae37R299

Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

shouldn't this just be stt.update_options? where it attempts to update the session if supported.

we should be creating the same API across the board.. versus having a special API only for specific models

"""Send a mid-stream session.update to change STT parameters without reconnecting.

Supported by providers that accept mid-stream configuration changes
(e.g. AssemblyAI UpdateConfiguration, Deepgram Flux Configure).
Providers that don't support it will silently ignore the message.
"""
update_msg = {
"type": "session.update",
"settings": {"extra": extra_kwargs},
}
if self._ws is not None and not self._ws.closed:
await self._ws.send_str(json.dumps(update_msg))

def update_options(
self,
Expand Down Expand Up @@ -532,6 +549,7 @@ async def recv_task(ws: aiohttp.ClientWebSocketResponse) -> None:
while True:
try:
ws = await self._connect_ws()
self._ws = ws
tasks = [
asyncio.create_task(send_task(ws)),
asyncio.create_task(recv_task(ws)),
Expand All @@ -558,6 +576,7 @@ async def recv_task(ws: aiohttp.ClientWebSocketResponse) -> None:
tasks_group.cancel()
tasks_group.exception() # retrieve the exception
finally:
self._ws = None
if ws is not None:
await ws.close()

Expand Down
Loading