Skip to content

Commit ba916c3

Browse files
committed
fix
1 parent fae2e99 commit ba916c3

4 files changed

Lines changed: 308 additions & 535 deletions

File tree

.claude/settings.local.json

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -30,7 +30,8 @@
3030
"mcp__playwright__browser_take_screenshot",
3131
"Bash(yarn compile:*)",
3232
"Bash(cat:*)",
33-
"Bash(yarn remove:*)"
33+
"Bash(yarn remove:*)",
34+
"Bash(yarn add:*)"
3435
],
3536
"deny": [],
3637
"ask": []

homedocs/package.json

Lines changed: 2 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -22,19 +22,13 @@
2222
"astro-pagefind": "^1.8.5",
2323
"cx": "workspace:*",
2424
"cx-react": "workspace:*",
25-
"jsdom": "^27.4.0",
25+
"gray-matter": "^4.0.3",
2626
"react": "^19.2.3",
2727
"react-dom": "^19.2.3",
28-
"rehype-parse": "^9.0.1",
29-
"rehype-remark": "^10.0.1",
30-
"remark-gfm": "^4.0.1",
31-
"remark-stringify": "^11.0.0",
3228
"sass": "^1.96.0",
33-
"tailwindcss": "^4.1.18",
34-
"unified": "^11.0.5"
29+
"tailwindcss": "^4.1.18"
3530
},
3631
"devDependencies": {
37-
"@types/jsdom": "^27",
3832
"prettier": "^3.7.4"
3933
}
4034
}
Lines changed: 215 additions & 68 deletions
Original file line numberDiff line numberDiff line change
@@ -1,12 +1,7 @@
1-
import { readFile, writeFile, access, mkdir } from "node:fs/promises";
2-
import { join, dirname } from "node:path";
1+
import { readFile, writeFile, access } from "node:fs/promises";
2+
import { join, dirname, resolve } from "node:path";
33
import { fileURLToPath } from "node:url";
4-
import { JSDOM } from "jsdom";
5-
import { unified } from "unified";
6-
import rehypeParse from "rehype-parse";
7-
import rehypeRemark from "rehype-remark";
8-
import remarkStringify from "remark-stringify";
9-
import remarkGfm from "remark-gfm";
4+
import matter from "gray-matter";
105
import { navigation } from "../../data/navigation.js";
116

127
/**
@@ -25,6 +20,7 @@ export default function llmsTxt(options = {}) {
2520
hooks: {
2621
"astro:build:done": async ({ dir, pages }) => {
2722
const distDir = fileURLToPath(dir);
23+
const srcDir = join(process.cwd(), "src/pages");
2824

2925
// Generate ordered page list from navigation
3026
const orderedPages = [];
@@ -45,7 +41,7 @@ export default function llmsTxt(options = {}) {
4541
// Generate full documentation file
4642
const fullContent = await generateDocContent(
4743
orderedPages,
48-
distDir,
44+
srcDir,
4945
false
5046
);
5147
await writeFile(
@@ -56,7 +52,7 @@ export default function llmsTxt(options = {}) {
5652
console.log("✅ llms-full.txt generated");
5753

5854
// Generate structure-only file
59-
const smallContent = await generateDocContent(orderedPages, distDir, true);
55+
const smallContent = await generateDocContent(orderedPages, srcDir, true);
6056
await writeFile(
6157
join(distDir, "llms-small.txt"),
6258
`<SYSTEM>Index of key documentation pages and sections</SYSTEM>\n\n${smallContent}`,
@@ -89,20 +85,20 @@ export default function llmsTxt(options = {}) {
8985
/**
9086
* Generate documentation content from ordered pages
9187
* @param {Array} orderedPages - Array of page objects with path and metadata
92-
* @param {string} distDir - Distribution directory path
88+
* @param {string} srcDir - Source pages directory path
9389
* @param {boolean} onlyStructure - If true, only include headings
9490
* @returns {Promise<string>} Generated markdown content
9591
*/
96-
async function generateDocContent(orderedPages, distDir, onlyStructure) {
92+
async function generateDocContent(orderedPages, srcDir, onlyStructure) {
9793
const entries = [];
9894
let skippedCount = 0;
9995

10096
for (const page of orderedPages) {
101-
const htmlPath = join(distDir, page.path, "index.html");
97+
const mdxPath = join(srcDir, `${page.path}.mdx`);
10298

10399
try {
104-
await access(htmlPath);
105-
const content = await extractPageContent(htmlPath, onlyStructure);
100+
await access(mdxPath);
101+
const content = await extractMdxContent(mdxPath, srcDir, onlyStructure);
106102
entries.push(content);
107103
} catch (error) {
108104
// Silently skip pages that don't exist (they may be in navigation but not implemented yet)
@@ -118,79 +114,230 @@ async function generateDocContent(orderedPages, distDir, onlyStructure) {
118114
}
119115

120116
/**
121-
* Extract content from an HTML file
122-
* @param {string} htmlPath - Path to HTML file
117+
* Parse sections from code using markers like // @section and // @section-end
118+
* @param {string} code - Source code
119+
* @returns {Object} Sections object with keys like 'model', 'controller', 'index', etc.
120+
*/
121+
function parseSections(code) {
122+
const sections = {};
123+
const sectionRegex = /\/\/\s*@(model|controller|components|index)[^\n]*\n([\s\S]*?)\/\/\s*@\1-end/g;
124+
let match;
125+
126+
while ((match = sectionRegex.exec(code)) !== null) {
127+
const sectionName = match[1];
128+
sections[sectionName] = match[2].trim();
129+
}
130+
131+
return sections;
132+
}
133+
134+
/**
135+
* Trim export default wrapper from code
136+
* @param {string} code - Source code
137+
* @returns {string} Code without export default wrapper
138+
*/
139+
function trimExportDefault(code) {
140+
const trimmed = code.trim();
141+
142+
// Handle single-line export default
143+
if (trimmed.startsWith("export default () => (") && (trimmed.endsWith(");") || trimmed.endsWith(")"))) {
144+
const endChars = trimmed.endsWith(");") ? 2 : 1;
145+
return trimmed.slice("export default () => (".length, -endChars).trim();
146+
}
147+
148+
// Handle multi-line export default
149+
if (trimmed.startsWith("export default () => (")) {
150+
const lines = trimmed.split("\n");
151+
lines.shift(); // Remove first line
152+
const lastLine = lines[lines.length - 1].trim();
153+
if (lastLine === ");" || lastLine === ")") {
154+
lines.pop(); // Remove last line
155+
}
156+
return lines.join("\n").trim();
157+
}
158+
159+
return trimmed;
160+
}
161+
162+
/**
163+
* Extract import statements from code (everything before first // @ marker)
164+
* @param {string} code - Source code
165+
* @returns {string|null} Import statements or null
166+
*/
167+
function extractImports(code) {
168+
// Find the first // @ marker
169+
const firstMarkerMatch = code.match(/\/\/\s*@\w+/);
170+
171+
if (!firstMarkerMatch) {
172+
return null; // No markers found
173+
}
174+
175+
// Extract everything before the first marker
176+
const beforeMarker = code.substring(0, firstMarkerMatch.index).trim();
177+
178+
if (!beforeMarker) {
179+
return null;
180+
}
181+
182+
return beforeMarker;
183+
}
184+
185+
/**
186+
* Extract content from an MDX file
187+
* @param {string} mdxPath - Path to MDX file
188+
* @param {string} srcDir - Source directory for resolving imports
123189
* @param {boolean} onlyStructure - If true, only extract headings
124190
* @returns {Promise<string>} Extracted markdown content
125191
*/
126-
async function extractPageContent(htmlPath, onlyStructure) {
127-
const html = await readFile(htmlPath, "utf-8");
128-
const dom = new JSDOM(html);
129-
const doc = dom.window.document;
130-
131-
// Get main content
132-
const main = doc.querySelector("main");
133-
if (!main) {
134-
throw new Error(`No <main> element found in ${htmlPath}`);
192+
async function extractMdxContent(mdxPath, srcDir, onlyStructure) {
193+
const mdxContent = await readFile(mdxPath, "utf-8");
194+
const { data: frontmatter, content } = matter(mdxContent);
195+
196+
// Extract title from frontmatter
197+
const title = frontmatter.title || "Untitled";
198+
199+
// Parse imports to find ?raw imports
200+
const rawImports = {};
201+
const importRegex = /import\s+(\w+)\s+from\s+["'](.+?)\?raw["'];?/g;
202+
let match;
203+
while ((match = importRegex.exec(content)) !== null) {
204+
const [, varName, importPath] = match;
205+
rawImports[varName] = importPath;
135206
}
136207

137-
// Extract and remove h1 for separate handling
138-
const h1 = main.querySelector("h1");
139-
const title = h1?.textContent?.trim() || "Untitled";
140-
if (h1) h1.remove();
141-
142-
// Get meta description
143-
const metaDesc = doc
144-
.querySelector('meta[name="description"]')
145-
?.getAttribute("content")
146-
?.trim();
147-
148-
// Remove unwanted elements
149-
const selectorsToRemove = ["nav", "footer", "header", ".toc"];
150-
for (const selector of selectorsToRemove) {
151-
const elements = main.querySelectorAll(selector);
152-
elements.forEach((el) => el.remove());
208+
let processedContent = content;
209+
210+
// Replace CodeExample components with actual code blocks
211+
if (!onlyStructure && Object.keys(rawImports).length > 0) {
212+
const codeExampleRegex = /<CodeExample\s+code=\{(\w+)\}[^>]*>[\s\S]*?<\/CodeExample>/g;
213+
214+
processedContent = await replaceAsync(
215+
processedContent,
216+
codeExampleRegex,
217+
async (match, varName) => {
218+
if (rawImports[varName]) {
219+
const codeFilePath = resolveImportPath(mdxPath, rawImports[varName]);
220+
try {
221+
const code = await readFile(codeFilePath, "utf-8");
222+
const sections = parseSections(code);
223+
const imports = extractImports(code);
224+
225+
// Build formatted code blocks for each section
226+
const codeBlocks = [];
227+
228+
// Show imports first if they exist and we have sections
229+
if (imports && Object.keys(sections).length > 0) {
230+
codeBlocks.push(`**Imports:**\n\`\`\`tsx\n${imports}\n\`\`\``);
231+
} else if (imports) {
232+
// Show imports even without sections
233+
codeBlocks.push(`**Imports:**\n\`\`\`tsx\n${imports}\n\`\`\``);
234+
}
235+
236+
// Show model section
237+
if (sections.model) {
238+
codeBlocks.push(`**Model:**\n\`\`\`tsx\n${sections.model}\n\`\`\``);
239+
}
240+
241+
// Show controller section
242+
if (sections.controller) {
243+
codeBlocks.push(`**Controller:**\n\`\`\`tsx\n${sections.controller}\n\`\`\``);
244+
}
245+
246+
// Show components section
247+
if (sections.components) {
248+
codeBlocks.push(`**Components:**\n\`\`\`tsx\n${sections.components}\n\`\`\``);
249+
}
250+
251+
// Show index section (main code) - remove export default wrapper
252+
let indexCode = sections.index || code.replace(/\/\*\*\s*@jsxImportSource\s+\w+\s*\*\/\n?/, "").trim();
253+
indexCode = trimExportDefault(indexCode);
254+
codeBlocks.push(`**TSX:**\n\`\`\`tsx\n${indexCode}\n\`\`\``);
255+
256+
return codeBlocks.join("\n\n");
257+
} catch (error) {
258+
console.warn(`⚠️ Could not read code file: ${codeFilePath}`);
259+
return match; // Keep original if file not found
260+
}
261+
}
262+
return match;
263+
}
264+
);
153265
}
154266

155-
// Convert HTML to Markdown
156-
let markdown = await htmlToMarkdown(main.innerHTML);
267+
// Remove import statements from MDX (but NOT from code blocks)
268+
// Split by code blocks, remove imports from non-code parts only
269+
const codeBlockRegex = /(```[\s\S]*?```)/g;
270+
const contentParts = processedContent.split(codeBlockRegex);
271+
processedContent = contentParts.map((part, index) => {
272+
// Even indices are non-code, odd indices are code blocks
273+
if (index % 2 === 0) {
274+
// Remove import statements only from non-code parts
275+
return part.replace(/import\s+[\s\S]*?from\s+["'][^"']+["'];?\s*/g, "");
276+
}
277+
return part; // Keep code blocks unchanged
278+
}).join("");
279+
280+
// Remove frontmatter section if still present
281+
processedContent = processedContent.replace(/^---[\s\S]*?---\s*/m, "");
157282

158283
// If only structure, keep only headings
159284
if (onlyStructure) {
160-
const lines = markdown.split("\n");
285+
const lines = processedContent.split("\n");
161286
const headings = lines.filter((line) => line.match(/^#+\s/));
162-
markdown = headings.join("\n");
287+
processedContent = headings.join("\n");
163288
}
164289

165290
// Build final output
166-
const parts = [`# ${title}`];
167-
if (metaDesc && !onlyStructure) {
168-
parts.push(`> ${metaDesc}`);
291+
const parts = [];
292+
293+
// Only add title if it's not already in the content
294+
if (!processedContent.trim().startsWith(`# ${title}`)) {
295+
parts.push(`# ${title}`);
169296
}
170-
if (markdown.trim()) {
171-
parts.push(markdown.trim());
297+
298+
if (processedContent.trim()) {
299+
parts.push(processedContent.trim());
172300
}
173301

174302
return parts.join("\n\n");
175303
}
176304

177305
/**
178-
* Convert HTML to Markdown using unified/rehype/remark
179-
* @param {string} html - HTML content
180-
* @returns {Promise<string>} Markdown content
306+
* Resolve import path relative to the MDX file
307+
* @param {string} mdxPath - Path to the MDX file
308+
* @param {string} importPath - Relative import path from the MDX file
309+
* @returns {string} Resolved absolute path
310+
*/
311+
function resolveImportPath(mdxPath, importPath) {
312+
const mdxDir = dirname(mdxPath);
313+
return resolve(mdxDir, importPath);
314+
}
315+
316+
/**
317+
* Async version of String.replace() for async callbacks
318+
* @param {string} str - Input string
319+
* @param {RegExp} regex - Regular expression
320+
* @param {Function} asyncFn - Async replacement function
321+
* @returns {Promise<string>} Replaced string
181322
*/
182-
async function htmlToMarkdown(html) {
183-
const file = await unified()
184-
.use(rehypeParse)
185-
.use(rehypeRemark)
186-
.use(remarkGfm)
187-
.use(remarkStringify, {
188-
bullet: "-",
189-
fence: "`",
190-
fences: true,
191-
incrementListMarker: false,
192-
})
193-
.process(html);
194-
195-
return String(file);
323+
async function replaceAsync(str, regex, asyncFn) {
324+
const matches = [];
325+
let match;
326+
const re = new RegExp(regex, regex.flags);
327+
328+
while ((match = re.exec(str)) !== null) {
329+
matches.push({ match: match[0], index: match.index, args: match.slice(1) });
330+
}
331+
332+
const replacements = await Promise.all(
333+
matches.map(m => asyncFn(m.match, ...m.args))
334+
);
335+
336+
let result = str;
337+
for (let i = matches.length - 1; i >= 0; i--) {
338+
const { index, match } = matches[i];
339+
result = result.substring(0, index) + replacements[i] + result.substring(index + match.length);
340+
}
341+
342+
return result;
196343
}

0 commit comments

Comments
 (0)