-
Notifications
You must be signed in to change notification settings - Fork 0
Expand file tree
/
Copy pathpyproject.toml
More file actions
81 lines (70 loc) · 2.16 KB
/
pyproject.toml
File metadata and controls
81 lines (70 loc) · 2.16 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
[build-system]
requires = ["setuptools>=68", "wheel"]
build-backend = "setuptools.build_meta"
[project]
name = "trawl"
version = "0.4.4"
description = "Selective web content extraction — fetch a page and return only the chunks most relevant to a query. Exposed as a Python library and an MCP server."
readme = "README.md"
requires-python = ">=3.10"
license = { text = "MIT" }
authors = [
{ name = "lyla" },
]
keywords = ["web scraping", "extraction", "rag", "mcp", "embedding"]
classifiers = [
"Development Status :: 3 - Alpha",
"License :: OSI Approved :: MIT License",
"Programming Language :: Python :: 3",
"Programming Language :: Python :: 3 :: Only",
"Topic :: Internet :: WWW/HTTP",
"Topic :: Text Processing :: Linguistic",
]
dependencies = [
"httpx>=0.27",
# Pinned to match the Chromium revision baked into the Playwright base
# Docker image (mcr.microsoft.com/playwright/python:v1.58.0-jammy).
# Bump this AND the Dockerfile FROM line together — a mismatch makes pip
# fetch a newer Playwright whose required browser binary is absent from
# the image, which surfaces as hard-to-diagnose greenlet/loop errors.
"playwright==1.58.0",
"playwright-stealth>=2.0",
"trafilatura>=1.6",
"beautifulsoup4>=4.12",
"lxml>=5.0",
"pymupdf>=1.24",
"pyyaml>=6.0",
"rank_bm25>=0.2.2",
"mcp>=1.27",
"starlette>=0.37",
"uvicorn>=0.30",
"youtube-transcript-api>=1.0",
]
[project.optional-dependencies]
dev = [
"pytest>=8.0",
"pytest-asyncio>=0.23",
]
[project.scripts]
trawl-mcp = "trawl_mcp.server:_cli_entry"
[project.urls]
Homepage = "https://github.com/bbulb/trawl"
Repository = "https://github.com/bbulb/trawl"
Issues = "https://github.com/bbulb/trawl/issues"
Changelog = "https://github.com/bbulb/trawl/blob/main/CHANGELOG.md"
[tool.setuptools.packages.find]
where = ["src"]
[tool.setuptools.package-data]
"*" = ["*.md"]
[tool.ruff]
line-length = 100
target-version = "py310"
[tool.ruff.lint]
select = ["E", "F", "W", "I", "UP", "B"]
ignore = ["E501"]
[tool.ruff.lint.per-file-ignores]
"tests/*" = ["B"]
[tool.pytest.ini_options]
testpaths = ["tests"]
asyncio_mode = "auto"
pythonpath = ["."]