Skip to content

Commit 514de86

Browse files
authored
feat: server-side save-skill API for Chrome extension (#66)
* feat: move extension save-skill to server-side API Chrome Web Store rejected the content script approach since it doesn't inject on pre-existing tabs. Move webpage-to-skill conversion from client-side to a server-side API route that uses Turndown + weighted tag detection, eliminating the need for content scripts entirely. - Add POST /api/save-skill route with SSRF protection and rate limiting - Rewrite extension to call API with tab URL instead of injecting scripts - Remove content script, scripting permission, and turndown dependency - Add name field validation and YAML-escape source URLs - Bump extension manifest to v1.18.0 * fix: update privacy policy and store listing for server-side save Privacy policy previously stated "no external requests" which is now inaccurate. Updated to clearly disclose that the page URL is sent to agenstskills.com for processing, what data is NOT sent, and that nothing is stored server-side. Store listing updated to match: removed "no server" claims, added data flow explanation, permissions justifications, and data usage disclosure section for the Chrome Web Store Privacy tab. * fix: address Devin and CodeRabbit review findings - Fix SSRF IPv6 checks blocking legit domains (ffmpeg.org, fdroid.org) by requiring colon in fc00:/ff prefixes (Devin) - Add 5MB response body size limit to prevent OOM on large pages, applied to both extractFromUrl and fetchGitHubContent (CodeRabbit) - Guard response.json() in callSaveApi against non-JSON error pages from proxies returning HTML 502/504 (CodeRabbit) - YAML-escape source URL in buildSelectionSkill to match server-side handling of URLs with #, :, or query strings (CodeRabbit)
1 parent bf5903b commit 514de86

13 files changed

Lines changed: 3946 additions & 430 deletions

File tree

docs/fumadocs/package.json

Lines changed: 3 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -21,14 +21,16 @@
2121
"next-themes": "^0.4.6",
2222
"react": "^19.2.3",
2323
"react-dom": "^19.2.3",
24-
"tailwind-merge": "^3.4.0"
24+
"tailwind-merge": "^3.4.0",
25+
"turndown": "^7.2.2"
2526
},
2627
"devDependencies": {
2728
"@tailwindcss/postcss": "^4.1.18",
2829
"@types/mdx": "^2.0.13",
2930
"@types/node": "^24.10.2",
3031
"@types/react": "^19.2.7",
3132
"@types/react-dom": "^19.2.3",
33+
"@types/turndown": "^5.0.6",
3234
"postcss": "^8.5.6",
3335
"tailwindcss": "^4.1.18",
3436
"typescript": "^5.9.3"
Lines changed: 291 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,291 @@
1+
import { NextRequest, NextResponse } from 'next/server';
2+
import TurndownService from 'turndown';
3+
4+
const rateLimitMap = new Map<string, { count: number; resetTime: number }>();
5+
const RATE_LIMIT_WINDOW_MS = 60 * 1000;
6+
const RATE_LIMIT_MAX_REQUESTS = 10;
7+
8+
function checkRateLimit(key: string): { allowed: boolean; remaining: number } {
9+
const now = Date.now();
10+
const entry = rateLimitMap.get(key);
11+
12+
if (!entry || now > entry.resetTime) {
13+
rateLimitMap.set(key, { count: 1, resetTime: now + RATE_LIMIT_WINDOW_MS });
14+
return { allowed: true, remaining: RATE_LIMIT_MAX_REQUESTS - 1 };
15+
}
16+
17+
if (entry.count >= RATE_LIMIT_MAX_REQUESTS) {
18+
return { allowed: false, remaining: 0 };
19+
}
20+
21+
entry.count++;
22+
return { allowed: true, remaining: RATE_LIMIT_MAX_REQUESTS - entry.count };
23+
}
24+
25+
const BLOCKED_HOSTS = new Set(['localhost', '127.0.0.1', '[::1]', '::1', '0.0.0.0']);
26+
27+
function isAllowedUrl(url: string): boolean {
28+
try {
29+
const parsed = new URL(url);
30+
if (parsed.protocol !== 'http:' && parsed.protocol !== 'https:') return false;
31+
32+
const hostname = parsed.hostname.toLowerCase();
33+
const bare = hostname.replace(/^\[|\]$/g, '');
34+
35+
if (BLOCKED_HOSTS.has(hostname) || BLOCKED_HOSTS.has(bare)) return false;
36+
if (bare.startsWith('::ffff:')) return isAllowedUrl(`http://${bare.slice(7)}`);
37+
if (/^127\./.test(bare) || /^0\./.test(bare)) return false;
38+
if (bare.startsWith('10.') || bare.startsWith('192.168.')) return false;
39+
if (/^172\.(1[6-9]|2\d|3[01])\./.test(bare)) return false;
40+
if (bare.startsWith('169.254.')) return false;
41+
if (bare.startsWith('fe80:') || bare.startsWith('fc00:') || bare.startsWith('fd')) return false;
42+
if (/^(22[4-9]|23\d|24\d|25[0-5])\./.test(bare)) return false;
43+
if (/^ff[0-9a-f]{2}:/.test(bare)) return false;
44+
return true;
45+
} catch {
46+
return false;
47+
}
48+
}
49+
50+
const GITHUB_URL_PATTERN = /^https?:\/\/github\.com\/([^/]+)\/([^/]+)\/blob\/([^/]+)\/(.+)$/;
51+
const GITHUB_RAW_PATTERN = /^https?:\/\/raw\.githubusercontent\.com\//;
52+
const FETCH_TIMEOUT = 30_000;
53+
54+
const TECH_KEYWORDS = new Set([
55+
'react', 'vue', 'angular', 'svelte', 'nextjs', 'nuxt', 'remix',
56+
'typescript', 'javascript', 'python', 'rust', 'go', 'java', 'ruby',
57+
'node', 'deno', 'bun', 'docker', 'kubernetes', 'terraform',
58+
'aws', 'gcp', 'azure', 'vercel', 'netlify', 'cloudflare',
59+
'graphql', 'rest', 'grpc', 'websocket', 'redis', 'postgres',
60+
'mongodb', 'sqlite', 'mysql', 'prisma', 'drizzle',
61+
'tailwind', 'css', 'html', 'sass', 'webpack', 'vite', 'esbuild',
62+
'git', 'ci', 'cd', 'testing', 'security', 'authentication',
63+
'api', 'cli', 'sdk', 'mcp', 'llm', 'ai', 'ml', 'openai', 'anthropic',
64+
]);
65+
66+
const TAG_PATTERN = /^[a-z0-9]+(-[a-z0-9]+)*$/;
67+
68+
const turndown = new TurndownService({ headingStyle: 'atx', codeBlockStyle: 'fenced' });
69+
70+
interface ExtractedContent {
71+
title: string;
72+
content: string;
73+
sourceUrl: string;
74+
contentType: string;
75+
language?: string;
76+
}
77+
78+
async function extractFromUrl(url: string): Promise<ExtractedContent> {
79+
if (GITHUB_URL_PATTERN.test(url) || GITHUB_RAW_PATTERN.test(url)) {
80+
return fetchGitHubContent(url);
81+
}
82+
83+
const MAX_BODY_SIZE = 5 * 1024 * 1024;
84+
const response = await fetch(url, { signal: AbortSignal.timeout(FETCH_TIMEOUT) });
85+
if (!response.ok) {
86+
throw new Error(`Failed to fetch ${url}: ${response.status} ${response.statusText}`);
87+
}
88+
89+
const contentLength = Number(response.headers.get('content-length') || '0');
90+
if (contentLength > MAX_BODY_SIZE) {
91+
throw new Error('Response too large');
92+
}
93+
94+
const contentType = response.headers.get('content-type') ?? '';
95+
const body = await response.text();
96+
if (body.length > MAX_BODY_SIZE) {
97+
throw new Error('Response too large');
98+
}
99+
100+
if (contentType.includes('text/html')) {
101+
const titleMatch = body.match(/<title[^>]*>([^<]+)<\/title>/i);
102+
const title = titleMatch?.[1]?.trim() ?? new URL(url).hostname;
103+
const bodyMatch = body.match(/<body[^>]*>([\s\S]*)<\/body>/i);
104+
const content = turndown.turndown(bodyMatch?.[1] ?? body);
105+
return { title, content, sourceUrl: url, contentType: 'webpage' };
106+
}
107+
108+
const title = new URL(url).pathname.split('/').pop() ?? 'Untitled';
109+
return { title, content: body, sourceUrl: url, contentType: 'text' };
110+
}
111+
112+
const LANG_MAP: Record<string, string> = {
113+
'.ts': 'typescript', '.tsx': 'typescript', '.js': 'javascript', '.jsx': 'javascript',
114+
'.py': 'python', '.rb': 'ruby', '.go': 'go', '.rs': 'rust', '.java': 'java',
115+
'.kt': 'kotlin', '.swift': 'swift', '.sh': 'shell', '.yml': 'yaml', '.yaml': 'yaml',
116+
'.json': 'json', '.md': 'markdown', '.html': 'html', '.css': 'css', '.sql': 'sql',
117+
};
118+
119+
async function fetchGitHubContent(url: string): Promise<ExtractedContent> {
120+
let rawUrl = url;
121+
const match = url.match(GITHUB_URL_PATTERN);
122+
if (match) {
123+
const [, owner, repo, branch, path] = match;
124+
rawUrl = `https://raw.githubusercontent.com/${owner}/${repo}/${branch}/${path}`;
125+
}
126+
127+
const MAX_BODY_SIZE = 5 * 1024 * 1024;
128+
const response = await fetch(rawUrl, { signal: AbortSignal.timeout(FETCH_TIMEOUT) });
129+
if (!response.ok) {
130+
throw new Error(`Failed to fetch GitHub content: ${response.status} ${response.statusText}`);
131+
}
132+
133+
const contentLength = Number(response.headers.get('content-length') || '0');
134+
if (contentLength > MAX_BODY_SIZE) {
135+
throw new Error('Response too large');
136+
}
137+
138+
const body = await response.text();
139+
if (body.length > MAX_BODY_SIZE) {
140+
throw new Error('Response too large');
141+
}
142+
const filename = rawUrl.split('/').pop() ?? 'file';
143+
const ext = filename.includes('.') ? '.' + filename.split('.').pop()!.toLowerCase() : '';
144+
const language = LANG_MAP[ext];
145+
const isCode = language !== undefined && language !== 'markdown';
146+
const content = isCode ? `\`\`\`${language}\n${body}\n\`\`\`` : body;
147+
148+
return { title: filename, content, sourceUrl: url, contentType: 'github', language };
149+
}
150+
151+
function addTag(counts: Map<string, number>, tag: string, weight: number): void {
152+
if (TAG_PATTERN.test(tag)) {
153+
counts.set(tag, (counts.get(tag) ?? 0) + weight);
154+
}
155+
}
156+
157+
function detectTags(extracted: ExtractedContent): string[] {
158+
const counts = new Map<string, number>();
159+
160+
try {
161+
const segments = new URL(extracted.sourceUrl).pathname
162+
.split('/').filter(Boolean)
163+
.map((s) => s.toLowerCase().replace(/[^a-z0-9-]/g, ''));
164+
for (const seg of segments) {
165+
if (seg.length >= 2 && seg.length <= 30) {
166+
addTag(counts, seg, 2);
167+
}
168+
}
169+
} catch { /* skip */ }
170+
171+
const headingRe = /^#{1,2}\s+(.+)$/gm;
172+
let m: RegExpExecArray | null;
173+
while ((m = headingRe.exec(extracted.content)) !== null) {
174+
for (const word of m[1].toLowerCase().split(/\s+/)) {
175+
const cleaned = word.replace(/[^a-z0-9-]/g, '');
176+
if (cleaned.length >= 2) {
177+
addTag(counts, cleaned, 2);
178+
}
179+
}
180+
}
181+
182+
const codeBlockRe = /^```(\w+)/gm;
183+
while ((m = codeBlockRe.exec(extracted.content)) !== null) {
184+
const lang = m[1].toLowerCase();
185+
if (lang.length >= 2) {
186+
addTag(counts, lang, 3);
187+
}
188+
}
189+
190+
const lower = extracted.content.toLowerCase();
191+
for (const keyword of TECH_KEYWORDS) {
192+
if (new RegExp(`\\b${keyword}\\b`, 'i').test(lower)) {
193+
addTag(counts, keyword, 1);
194+
}
195+
}
196+
197+
if (extracted.language) {
198+
addTag(counts, extracted.language.toLowerCase(), 3);
199+
}
200+
201+
return Array.from(counts.entries())
202+
.sort((a, b) => b[1] - a[1])
203+
.slice(0, 10)
204+
.map(([tag]) => tag);
205+
}
206+
207+
function slugify(input: string): string {
208+
const slug = input
209+
.toLowerCase()
210+
.replace(/[^a-z0-9]+/g, '-')
211+
.replace(/^-+|-+$/g, '')
212+
.replace(/-{2,}/g, '-');
213+
return slug.slice(0, 64).replace(/-+$/, '') || 'untitled-skill';
214+
}
215+
216+
function yamlEscape(value: string): string {
217+
const singleLine = value.replace(/\r?\n/g, ' ').trim();
218+
if (/[:#{}[\],&*?|>!%@`]/.test(singleLine) || singleLine.startsWith("'") || singleLine.startsWith('"')) {
219+
return `"${singleLine.replace(/\\/g, '\\\\').replace(/"/g, '\\"')}"`;
220+
}
221+
return singleLine;
222+
}
223+
224+
export async function POST(request: NextRequest) {
225+
const ip = request.headers.get('x-forwarded-for')?.split(',')[0]?.trim() ?? 'unknown';
226+
const { allowed, remaining } = checkRateLimit(ip);
227+
228+
if (!allowed) {
229+
return NextResponse.json(
230+
{ error: 'Too many requests. Try again in a minute.' },
231+
{ status: 429, headers: { 'X-RateLimit-Remaining': '0', 'Retry-After': '60' } },
232+
);
233+
}
234+
235+
let body: { url?: string; name?: string };
236+
try {
237+
body = await request.json();
238+
} catch {
239+
return NextResponse.json({ error: 'Invalid JSON body' }, { status: 400 });
240+
}
241+
242+
const { url, name } = body;
243+
if (!url || typeof url !== 'string') {
244+
return NextResponse.json({ error: 'Missing required field: url' }, { status: 400 });
245+
}
246+
247+
if (name !== undefined && typeof name !== 'string') {
248+
return NextResponse.json({ error: 'Field "name" must be a string' }, { status: 400 });
249+
}
250+
251+
if (!isAllowedUrl(url)) {
252+
return NextResponse.json({ error: 'URL not allowed' }, { status: 403 });
253+
}
254+
255+
try {
256+
const extracted = await extractFromUrl(url);
257+
const tags = detectTags(extracted);
258+
259+
const skillName = slugify(name || extracted.title || 'untitled');
260+
const description = extracted.content
261+
.split('\n')
262+
.find((l) => l.trim().length > 0)
263+
?.replace(/^#+\s*/, '')
264+
.trim()
265+
.slice(0, 200) || 'Saved skill';
266+
const savedAt = new Date().toISOString();
267+
268+
const yamlTags = tags.length > 0
269+
? `tags:\n${tags.map((t) => ` - ${t}`).join('\n')}\n`
270+
: '';
271+
272+
const skillMd =
273+
`---\n` +
274+
`name: ${skillName}\n` +
275+
`description: ${yamlEscape(description)}\n` +
276+
yamlTags +
277+
`metadata:\n` +
278+
` source: ${yamlEscape(url)}\n` +
279+
` savedAt: ${savedAt}\n` +
280+
`---\n\n` +
281+
extracted.content + '\n';
282+
283+
return NextResponse.json(
284+
{ name: skillName, skillMd, tags, description },
285+
{ headers: { 'X-RateLimit-Remaining': String(remaining) } },
286+
);
287+
} catch (err) {
288+
const message = err instanceof Error ? err.message : 'Failed to extract content';
289+
return NextResponse.json({ error: message }, { status: 502 });
290+
}
291+
}

docs/skillkit/public/privacy.html

Lines changed: 16 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -24,21 +24,30 @@
2424
<div class="container">
2525
<a href="/" class="back">&larr; Back to SkillKit</a>
2626
<h1>Privacy Policy</h1>
27-
<p class="updated">Last updated: February 10, 2026</p>
27+
<p class="updated">Last updated: February 14, 2026</p>
2828

2929
<h2>Overview</h2>
3030
<p>SkillKit ("we", "our", "us") is an open-source CLI tool and Chrome extension for managing AI agent skills. We are committed to protecting your privacy. This policy explains what data we collect (or don't) across all SkillKit products.</p>
3131

3232
<h2>Chrome Extension — "SkillKit - Save as Skill"</h2>
33-
<p>The Chrome extension operates entirely within your browser. It does not collect, transmit, or store any personal data externally.</p>
33+
<p>The Chrome extension helps you save webpages as AI agent skill files.</p>
3434
<ul>
35-
<li><strong>No data collection:</strong> We do not collect browsing history, page content, personal information, or any other user data.</li>
36-
<li><strong>No external requests:</strong> The extension makes zero network requests. All processing (HTML to markdown conversion, SKILL.md generation) happens locally in your browser.</li>
35+
<li><strong>What data is sent:</strong> When you click "Save as Skill", the extension sends the URL of the current page to our server at <code>agenstskills.com/api/save-skill</code>. This is required to fetch and convert the webpage content into a skill file.</li>
36+
<li><strong>What data is NOT sent:</strong> We do not send page content, browsing history, personal information, cookies, authentication tokens, or any data beyond the single URL you choose to save.</li>
37+
<li><strong>No data stored on server:</strong> The URL is processed in real time to extract content, generate tags, and build the skill file. No URLs, page content, or user data are stored, logged, or retained on the server after the response is returned.</li>
38+
<li><strong>Selection saves are local:</strong> When you save selected text via the right-click menu, the skill file is built entirely in the browser with no network requests.</li>
3739
<li><strong>No analytics or tracking:</strong> We do not use any analytics, telemetry, or tracking services.</li>
38-
<li><strong>Local storage only:</strong> The Chrome storage API is used solely to store user preferences (e.g., default settings) on your device. This data never leaves your browser.</li>
40+
<li><strong>No user accounts:</strong> No login, API key, or account is required.</li>
3941
<li><strong>Downloads:</strong> Generated skill files are saved to your local filesystem via the Chrome Downloads API. No files are uploaded anywhere.</li>
4042
</ul>
4143

44+
<h2>Permissions Used</h2>
45+
<ul>
46+
<li><strong>activeTab:</strong> Read the URL and title of the tab you are viewing when you click the extension. Only accessed when you actively use the extension.</li>
47+
<li><strong>contextMenus:</strong> Add "Save page as Skill" and "Save selection as Skill" to the right-click menu.</li>
48+
<li><strong>downloads:</strong> Save the generated SKILL.md file to your Downloads folder.</li>
49+
</ul>
50+
4251
<h2>CLI Tool</h2>
4352
<p>The SkillKit CLI runs entirely on your local machine. It does not phone home, collect telemetry, or transmit any data to external servers unless you explicitly use network features (e.g., <code>skillkit install</code> fetches public GitHub repositories).</p>
4453

@@ -49,10 +58,10 @@ <h2>Website</h2>
4958
<p>The SkillKit website (<a href="https://agenstskills.com">agenstskills.com</a>) is a static site hosted on Vercel. We do not use cookies, analytics trackers, or collect personal information. Vercel may collect standard web server logs (IP address, user agent) as described in their <a href="https://vercel.com/legal/privacy-policy">privacy policy</a>.</p>
5059

5160
<h2>Third-Party Services</h2>
52-
<p>SkillKit does not integrate with any third-party data processors, advertising networks, or analytics services.</p>
61+
<p>The Chrome extension communicates with <code>agenstskills.com</code> (our own server, hosted on Vercel) to process webpage URLs. No third-party data processors, advertising networks, or analytics services are used.</p>
5362

5463
<h2>Data Retention</h2>
55-
<p>We do not retain any user data because we do not collect any user data.</p>
64+
<p>We do not retain any user data. URLs sent to the API are processed in memory and discarded immediately after the response.</p>
5665

5766
<h2>Changes to This Policy</h2>
5867
<p>We may update this privacy policy from time to time. Changes will be posted on this page with an updated revision date.</p>

packages/extension/package.json

Lines changed: 1 addition & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -9,12 +9,9 @@
99
"dev": "tsup --watch",
1010
"typecheck": "tsc --noEmit"
1111
},
12-
"dependencies": {
13-
"turndown": "^7.2.2"
14-
},
12+
"dependencies": {},
1513
"devDependencies": {
1614
"@types/chrome": "^0.0.280",
17-
"@types/turndown": "^5.0.6",
1815
"tsup": "^8.3.5",
1916
"typescript": "^5.7.2"
2017
}

0 commit comments

Comments
 (0)