From e275f291f58b157f261b978057ebedc29006f279 Mon Sep 17 00:00:00 2001 From: lyla Date: Fri, 17 Apr 2026 08:13:24 +0900 Subject: [PATCH] =?UTF-8?q?fix(mcp):=20=ED=8C=8C=EC=9D=B4=ED=94=84?= =?UTF-8?q?=EB=9D=BC=EC=9D=B8=20=ED=98=B8=EC=B6=9C=EC=9D=84=20=EC=A0=84?= =?UTF-8?q?=EC=9A=A9=20=EB=8B=A8=EC=9D=BC=20=EC=9B=8C=EC=BB=A4=20=EC=8A=A4?= =?UTF-8?q?=EB=A0=88=EB=93=9C=EC=97=90=20=EA=B3=A0=EC=A0=95?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit `asyncio.to_thread`의 기본 executor는 여러 워커에 작업을 분배하는데, trawl 파이프라인이 쓰는 sync_playwright의 greenlet 디스패처는 최초 호출 스레드에 핀되므로 다른 워커로 들어온 호출이 "Cannot switch to a different thread" greenlet.error를 뱉었다. max_workers=1인 전용 ThreadPoolExecutor로 모든 fetch_page / profile_page 호출을 동일 스레드에 고정해 이 간헐적 실패를 제거한다. --- src/trawl_mcp/server.py | 45 +++++++++++++++++++++++++++++------------ 1 file changed, 32 insertions(+), 13 deletions(-) diff --git a/src/trawl_mcp/server.py b/src/trawl_mcp/server.py index ed1fae0..5f09f92 100644 --- a/src/trawl_mcp/server.py +++ b/src/trawl_mcp/server.py @@ -1,16 +1,23 @@ """Stdio/HTTP MCP server exposing trawl's fetch_page and profile_page tools. The pipeline uses sync_playwright internally, which can't run inside an -asyncio event loop on its own. We run each pipeline invocation in a worker -thread via `asyncio.to_thread`, keeping the MCP server responsive. +asyncio event loop on its own. We run every pipeline invocation on a +single dedicated worker thread so the process-wide sync_playwright +greenlet dispatcher — which is pinned to the thread that first called +sync_playwright() — always sees the same thread. Using +`asyncio.to_thread` (default executor) instead causes intermittent +"Cannot switch to a different thread" greenlet errors whenever a call +is dispatched to a different worker thread. """ from __future__ import annotations import asyncio +import functools import json import logging import os +from concurrent.futures import ThreadPoolExecutor from mcp.server import Server from mcp.server.stdio import stdio_server @@ -18,6 +25,10 @@ from trawl import fetch_relevant, to_dict +_pipeline_executor = ThreadPoolExecutor( + max_workers=1, thread_name_prefix="trawl-pipeline" +) + logger = logging.getLogger("trawl_mcp") server: Server = Server("trawl") @@ -166,13 +177,17 @@ async def _call_fetch_page(arguments: dict) -> list[TextContent]: use_hyde, use_rerank, ) - result = await asyncio.to_thread( - fetch_relevant, - url, - query, - k=k, - use_hyde=use_hyde, - use_rerank=use_rerank, + loop = asyncio.get_running_loop() + result = await loop.run_in_executor( + _pipeline_executor, + functools.partial( + fetch_relevant, + url, + query, + k=k, + use_hyde=use_hyde, + use_rerank=use_rerank, + ), ) payload = to_dict(result) payload["ok"] = not bool(payload.get("error")) @@ -195,10 +210,14 @@ async def _call_profile_page(arguments: dict) -> list[TextContent]: # when the tool is actually called. from trawl.profiles import generate_profile - payload = await asyncio.to_thread( - generate_profile, - url, - force_refresh=force_refresh, + loop = asyncio.get_running_loop() + payload = await loop.run_in_executor( + _pipeline_executor, + functools.partial( + generate_profile, + url, + force_refresh=force_refresh, + ), ) return [TextContent(type="text", text=json.dumps(payload, ensure_ascii=False))]