Skip to content

Commit fae2e99

Browse files
committed
add plugin for llm generation
1 parent cf4177e commit fae2e99

5 files changed

Lines changed: 385 additions & 237 deletions

File tree

.claude/settings.local.json

Lines changed: 3 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -28,7 +28,9 @@
2828
"mcp__playwright__browser_run_code",
2929
"mcp__playwright__browser_click",
3030
"mcp__playwright__browser_take_screenshot",
31-
"Bash(yarn compile:*)"
31+
"Bash(yarn compile:*)",
32+
"Bash(cat:*)",
33+
"Bash(yarn remove:*)"
3234
],
3335
"deny": [],
3436
"ask": []

homedocs/astro.config.mjs

Lines changed: 3 additions & 24 deletions
Original file line numberDiff line numberDiff line change
@@ -5,7 +5,7 @@ import mdx from "@astrojs/mdx";
55
import tailwindcss from "@tailwindcss/vite";
66
import pagefind from "astro-pagefind";
77
import cxjs from "./src/integrations/cxjs";
8-
import astroLlmsTxt from "@4hse/astro-llms-txt";
8+
import llmsTxt from "./src/integrations/llms-txt";
99

1010
// https://astro.build/config
1111
export default defineConfig({
@@ -15,32 +15,11 @@ export default defineConfig({
1515
react(),
1616
mdx(),
1717
pagefind(),
18-
astroLlmsTxt({
18+
llmsTxt({
1919
title: "CxJS",
2020
description:
2121
"CxJS is a feature-rich JavaScript framework for building complex web front-ends, such as BI tools, dashboards and admin apps.",
22-
details:
23-
"CxJS offers declarative data binding, comprehensive widget library, advanced charting capabilities, and flexible theming system.",
24-
notes:
25-
"- This content is auto-generated from the official CxJS documentation.",
26-
docSet: [
27-
{
28-
title: "Complete Documentation",
29-
description: "Full CxJS documentation including all guides and API references",
30-
url: "/llms-full.txt",
31-
include: ["docs/", "docs/**"],
32-
promote: ["docs/intro/what-is-cxjs", "docs/intro/installation"],
33-
},
34-
{
35-
title: "Documentation Structure",
36-
description: "Index of key documentation pages and sections",
37-
url: "/llms-small.txt",
38-
include: ["docs/", "docs/**"],
39-
onlyStructure: true,
40-
promote: ["docs/intro/what-is-cxjs"],
41-
},
42-
],
43-
pageSeparator: "\n\n---\n\n",
22+
site: "https://cxjs.io",
4423
}),
4524
],
4625
build: {

homedocs/package.json

Lines changed: 8 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -11,7 +11,6 @@
1111
"prettify": "prettier --write ."
1212
},
1313
"dependencies": {
14-
"@4hse/astro-llms-txt": "^1.0.4",
1514
"@astrojs/mdx": "^4.3.13",
1615
"@astrojs/react": "^4.4.2",
1716
"@heroicons/react": "^2.2.0",
@@ -23,12 +22,19 @@
2322
"astro-pagefind": "^1.8.5",
2423
"cx": "workspace:*",
2524
"cx-react": "workspace:*",
25+
"jsdom": "^27.4.0",
2626
"react": "^19.2.3",
2727
"react-dom": "^19.2.3",
28+
"rehype-parse": "^9.0.1",
29+
"rehype-remark": "^10.0.1",
30+
"remark-gfm": "^4.0.1",
31+
"remark-stringify": "^11.0.0",
2832
"sass": "^1.96.0",
29-
"tailwindcss": "^4.1.18"
33+
"tailwindcss": "^4.1.18",
34+
"unified": "^11.0.5"
3035
},
3136
"devDependencies": {
37+
"@types/jsdom": "^27",
3238
"prettier": "^3.7.4"
3339
}
3440
}
Lines changed: 196 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,196 @@
1+
import { readFile, writeFile, access, mkdir } from "node:fs/promises";
2+
import { join, dirname } from "node:path";
3+
import { fileURLToPath } from "node:url";
4+
import { JSDOM } from "jsdom";
5+
import { unified } from "unified";
6+
import rehypeParse from "rehype-parse";
7+
import rehypeRemark from "rehype-remark";
8+
import remarkStringify from "remark-stringify";
9+
import remarkGfm from "remark-gfm";
10+
import { navigation } from "../../data/navigation.js";
11+
12+
/**
13+
* Custom Astro integration for generating llms.txt files with navigation-based ordering
14+
* @param {Object} options - Configuration options
15+
* @param {string} options.title - Title for the llms.txt
16+
* @param {string} options.description - Description of the documentation
17+
* @param {string} options.site - Site URL
18+
* @returns {import('astro').AstroIntegration}
19+
*/
20+
export default function llmsTxt(options = {}) {
21+
const { title = "Documentation", description = "", site = "" } = options;
22+
23+
return {
24+
name: "llms-txt",
25+
hooks: {
26+
"astro:build:done": async ({ dir, pages }) => {
27+
const distDir = fileURLToPath(dir);
28+
29+
// Generate ordered page list from navigation
30+
const orderedPages = [];
31+
for (const category of navigation) {
32+
for (const group of category.groups) {
33+
for (const item of group.items) {
34+
const pagePath = `docs/${category.slug}/${item.slug}`;
35+
orderedPages.push({
36+
path: pagePath,
37+
title: item.title,
38+
category: category.title,
39+
group: group.title,
40+
});
41+
}
42+
}
43+
}
44+
45+
// Generate full documentation file
46+
const fullContent = await generateDocContent(
47+
orderedPages,
48+
distDir,
49+
false
50+
);
51+
await writeFile(
52+
join(distDir, "llms-full.txt"),
53+
`<SYSTEM>${description}</SYSTEM>\n\n${fullContent}`,
54+
"utf-8"
55+
);
56+
console.log("✅ llms-full.txt generated");
57+
58+
// Generate structure-only file
59+
const smallContent = await generateDocContent(orderedPages, distDir, true);
60+
await writeFile(
61+
join(distDir, "llms-small.txt"),
62+
`<SYSTEM>Index of key documentation pages and sections</SYSTEM>\n\n${smallContent}`,
63+
"utf-8"
64+
);
65+
console.log("✅ llms-small.txt generated");
66+
67+
// Generate main llms.txt index
68+
const indexContent = `# ${title}
69+
70+
> ${description}
71+
72+
## Documentation Sets
73+
74+
- [Complete Documentation](${site}/llms-full.txt): Full CxJS documentation including all guides and API references
75+
- [Documentation Structure](${site}/llms-small.txt): Index of key documentation pages and sections
76+
77+
## Notes
78+
79+
- This content is auto-generated from the official CxJS documentation.
80+
- Pages are ordered according to the documentation navigation structure.`;
81+
82+
await writeFile(join(distDir, "llms.txt"), indexContent, "utf-8");
83+
console.log("✅ llms.txt generated");
84+
},
85+
},
86+
};
87+
}
88+
89+
/**
90+
* Generate documentation content from ordered pages
91+
* @param {Array} orderedPages - Array of page objects with path and metadata
92+
* @param {string} distDir - Distribution directory path
93+
* @param {boolean} onlyStructure - If true, only include headings
94+
* @returns {Promise<string>} Generated markdown content
95+
*/
96+
async function generateDocContent(orderedPages, distDir, onlyStructure) {
97+
const entries = [];
98+
let skippedCount = 0;
99+
100+
for (const page of orderedPages) {
101+
const htmlPath = join(distDir, page.path, "index.html");
102+
103+
try {
104+
await access(htmlPath);
105+
const content = await extractPageContent(htmlPath, onlyStructure);
106+
entries.push(content);
107+
} catch (error) {
108+
// Silently skip pages that don't exist (they may be in navigation but not implemented yet)
109+
skippedCount++;
110+
}
111+
}
112+
113+
if (skippedCount > 0) {
114+
console.log(`ℹ️ Skipped ${skippedCount} pages from navigation that don't exist yet`);
115+
}
116+
117+
return entries.join("\n\n---\n\n");
118+
}
119+
120+
/**
121+
* Extract content from an HTML file
122+
* @param {string} htmlPath - Path to HTML file
123+
* @param {boolean} onlyStructure - If true, only extract headings
124+
* @returns {Promise<string>} Extracted markdown content
125+
*/
126+
async function extractPageContent(htmlPath, onlyStructure) {
127+
const html = await readFile(htmlPath, "utf-8");
128+
const dom = new JSDOM(html);
129+
const doc = dom.window.document;
130+
131+
// Get main content
132+
const main = doc.querySelector("main");
133+
if (!main) {
134+
throw new Error(`No <main> element found in ${htmlPath}`);
135+
}
136+
137+
// Extract and remove h1 for separate handling
138+
const h1 = main.querySelector("h1");
139+
const title = h1?.textContent?.trim() || "Untitled";
140+
if (h1) h1.remove();
141+
142+
// Get meta description
143+
const metaDesc = doc
144+
.querySelector('meta[name="description"]')
145+
?.getAttribute("content")
146+
?.trim();
147+
148+
// Remove unwanted elements
149+
const selectorsToRemove = ["nav", "footer", "header", ".toc"];
150+
for (const selector of selectorsToRemove) {
151+
const elements = main.querySelectorAll(selector);
152+
elements.forEach((el) => el.remove());
153+
}
154+
155+
// Convert HTML to Markdown
156+
let markdown = await htmlToMarkdown(main.innerHTML);
157+
158+
// If only structure, keep only headings
159+
if (onlyStructure) {
160+
const lines = markdown.split("\n");
161+
const headings = lines.filter((line) => line.match(/^#+\s/));
162+
markdown = headings.join("\n");
163+
}
164+
165+
// Build final output
166+
const parts = [`# ${title}`];
167+
if (metaDesc && !onlyStructure) {
168+
parts.push(`> ${metaDesc}`);
169+
}
170+
if (markdown.trim()) {
171+
parts.push(markdown.trim());
172+
}
173+
174+
return parts.join("\n\n");
175+
}
176+
177+
/**
178+
* Convert HTML to Markdown using unified/rehype/remark
179+
* @param {string} html - HTML content
180+
* @returns {Promise<string>} Markdown content
181+
*/
182+
async function htmlToMarkdown(html) {
183+
const file = await unified()
184+
.use(rehypeParse)
185+
.use(rehypeRemark)
186+
.use(remarkGfm)
187+
.use(remarkStringify, {
188+
bullet: "-",
189+
fence: "`",
190+
fences: true,
191+
incrementListMarker: false,
192+
})
193+
.process(html);
194+
195+
return String(file);
196+
}

0 commit comments

Comments
 (0)