4040from livekit .agents .utils import AudioBuffer , http_context , is_given
4141from livekit .agents .voice .io import TimedString
4242
43+ from ._utils import PeriodicCollector
4344from .log import logger
4445from .models import STTRealtimeSampleRates
4546
@@ -327,6 +328,10 @@ def __init__(
327328 self ._session = http_session
328329 self ._reconnect_event = asyncio .Event ()
329330 self ._speaking = False # Track if we're currently in a speech segment
331+ self ._audio_duration_collector = PeriodicCollector (
332+ callback = self ._on_audio_duration_report ,
333+ duration = 5.0 ,
334+ )
330335
331336 def update_options (
332337 self ,
@@ -337,6 +342,14 @@ def update_options(
337342 self ._opts .server_vad = server_vad
338343 self ._reconnect_event .set ()
339344
345+ def _on_audio_duration_report (self , duration : float ) -> None :
346+ usage_event = stt .SpeechEvent (
347+ type = stt .SpeechEventType .RECOGNITION_USAGE ,
348+ alternatives = [],
349+ recognition_usage = stt .RecognitionUsage (audio_duration = duration ),
350+ )
351+ self ._event_ch .send_nowait (usage_event )
352+
340353 async def _run (self ) -> None :
341354 """Run the streaming transcription session"""
342355 closing_ws = False
@@ -361,15 +374,18 @@ async def send_task(ws: aiohttp.ClientWebSocketResponse) -> None:
361374 samples_per_channel = samples_50ms ,
362375 )
363376
377+ has_ended = False
364378 async for data in self ._input_ch :
365379 # Write audio bytes to buffer and get 50ms frames
366380 frames : list [rtc .AudioFrame ] = []
367381 if isinstance (data , rtc .AudioFrame ):
368382 frames .extend (audio_bstream .write (data .data .tobytes ()))
369383 elif isinstance (data , self ._FlushSentinel ):
370384 frames .extend (audio_bstream .flush ())
385+ has_ended = True
371386
372387 for frame in frames :
388+ self ._audio_duration_collector .push (frame .duration )
373389 audio_b64 = base64 .b64encode (frame .data .tobytes ()).decode ("utf-8" )
374390 await ws .send_str (
375391 json .dumps (
@@ -382,6 +398,10 @@ async def send_task(ws: aiohttp.ClientWebSocketResponse) -> None:
382398 )
383399 )
384400
401+ if has_ended :
402+ self ._audio_duration_collector .flush ()
403+ has_ended = False
404+
385405 closing_ws = True
386406
387407 @utils .log_exceptions (logger = logger )
0 commit comments