1- import { readFile , writeFile , access , mkdir } from "node:fs/promises" ;
2- import { join , dirname } from "node:path" ;
1+ import { readFile , writeFile , access } from "node:fs/promises" ;
2+ import { join , dirname , resolve } from "node:path" ;
33import { fileURLToPath } from "node:url" ;
4- import { JSDOM } from "jsdom" ;
5- import { unified } from "unified" ;
6- import rehypeParse from "rehype-parse" ;
7- import rehypeRemark from "rehype-remark" ;
8- import remarkStringify from "remark-stringify" ;
9- import remarkGfm from "remark-gfm" ;
4+ import matter from "gray-matter" ;
105import { navigation } from "../../data/navigation.js" ;
116
127/**
@@ -25,6 +20,7 @@ export default function llmsTxt(options = {}) {
2520 hooks : {
2621 "astro:build:done" : async ( { dir, pages } ) => {
2722 const distDir = fileURLToPath ( dir ) ;
23+ const srcDir = join ( process . cwd ( ) , "src/pages" ) ;
2824
2925 // Generate ordered page list from navigation
3026 const orderedPages = [ ] ;
@@ -45,7 +41,7 @@ export default function llmsTxt(options = {}) {
4541 // Generate full documentation file
4642 const fullContent = await generateDocContent (
4743 orderedPages ,
48- distDir ,
44+ srcDir ,
4945 false
5046 ) ;
5147 await writeFile (
@@ -56,7 +52,7 @@ export default function llmsTxt(options = {}) {
5652 console . log ( "✅ llms-full.txt generated" ) ;
5753
5854 // Generate structure-only file
59- const smallContent = await generateDocContent ( orderedPages , distDir , true ) ;
55+ const smallContent = await generateDocContent ( orderedPages , srcDir , true ) ;
6056 await writeFile (
6157 join ( distDir , "llms-small.txt" ) ,
6258 `<SYSTEM>Index of key documentation pages and sections</SYSTEM>\n\n${ smallContent } ` ,
@@ -89,20 +85,20 @@ export default function llmsTxt(options = {}) {
8985/**
9086 * Generate documentation content from ordered pages
9187 * @param {Array } orderedPages - Array of page objects with path and metadata
92- * @param {string } distDir - Distribution directory path
88+ * @param {string } srcDir - Source pages directory path
9389 * @param {boolean } onlyStructure - If true, only include headings
9490 * @returns {Promise<string> } Generated markdown content
9591 */
96- async function generateDocContent ( orderedPages , distDir , onlyStructure ) {
92+ async function generateDocContent ( orderedPages , srcDir , onlyStructure ) {
9793 const entries = [ ] ;
9894 let skippedCount = 0 ;
9995
10096 for ( const page of orderedPages ) {
101- const htmlPath = join ( distDir , page . path , "index.html" ) ;
97+ const mdxPath = join ( srcDir , ` ${ page . path } .mdx` ) ;
10298
10399 try {
104- await access ( htmlPath ) ;
105- const content = await extractPageContent ( htmlPath , onlyStructure ) ;
100+ await access ( mdxPath ) ;
101+ const content = await extractMdxContent ( mdxPath , srcDir , onlyStructure ) ;
106102 entries . push ( content ) ;
107103 } catch ( error ) {
108104 // Silently skip pages that don't exist (they may be in navigation but not implemented yet)
@@ -118,79 +114,230 @@ async function generateDocContent(orderedPages, distDir, onlyStructure) {
118114}
119115
120116/**
121- * Extract content from an HTML file
122- * @param {string } htmlPath - Path to HTML file
117+ * Parse sections from code using markers like // @section and // @section-end
118+ * @param {string } code - Source code
119+ * @returns {Object } Sections object with keys like 'model', 'controller', 'index', etc.
120+ */
121+ function parseSections ( code ) {
122+ const sections = { } ;
123+ const sectionRegex = / \/ \/ \s * @ ( m o d e l | c o n t r o l l e r | c o m p o n e n t s | i n d e x ) [ ^ \n ] * \n ( [ \s \S ] * ?) \/ \/ \s * @ \1- e n d / g;
124+ let match ;
125+
126+ while ( ( match = sectionRegex . exec ( code ) ) !== null ) {
127+ const sectionName = match [ 1 ] ;
128+ sections [ sectionName ] = match [ 2 ] . trim ( ) ;
129+ }
130+
131+ return sections ;
132+ }
133+
134+ /**
135+ * Trim export default wrapper from code
136+ * @param {string } code - Source code
137+ * @returns {string } Code without export default wrapper
138+ */
139+ function trimExportDefault ( code ) {
140+ const trimmed = code . trim ( ) ;
141+
142+ // Handle single-line export default
143+ if ( trimmed . startsWith ( "export default () => (" ) && ( trimmed . endsWith ( ");" ) || trimmed . endsWith ( ")" ) ) ) {
144+ const endChars = trimmed . endsWith ( ");" ) ? 2 : 1 ;
145+ return trimmed . slice ( "export default () => (" . length , - endChars ) . trim ( ) ;
146+ }
147+
148+ // Handle multi-line export default
149+ if ( trimmed . startsWith ( "export default () => (" ) ) {
150+ const lines = trimmed . split ( "\n" ) ;
151+ lines . shift ( ) ; // Remove first line
152+ const lastLine = lines [ lines . length - 1 ] . trim ( ) ;
153+ if ( lastLine === ");" || lastLine === ")" ) {
154+ lines . pop ( ) ; // Remove last line
155+ }
156+ return lines . join ( "\n" ) . trim ( ) ;
157+ }
158+
159+ return trimmed ;
160+ }
161+
162+ /**
163+ * Extract import statements from code (everything before first // @ marker)
164+ * @param {string } code - Source code
165+ * @returns {string|null } Import statements or null
166+ */
167+ function extractImports ( code ) {
168+ // Find the first // @ marker
169+ const firstMarkerMatch = code . match ( / \/ \/ \s * @ \w + / ) ;
170+
171+ if ( ! firstMarkerMatch ) {
172+ return null ; // No markers found
173+ }
174+
175+ // Extract everything before the first marker
176+ const beforeMarker = code . substring ( 0 , firstMarkerMatch . index ) . trim ( ) ;
177+
178+ if ( ! beforeMarker ) {
179+ return null ;
180+ }
181+
182+ return beforeMarker ;
183+ }
184+
185+ /**
186+ * Extract content from an MDX file
187+ * @param {string } mdxPath - Path to MDX file
188+ * @param {string } srcDir - Source directory for resolving imports
123189 * @param {boolean } onlyStructure - If true, only extract headings
124190 * @returns {Promise<string> } Extracted markdown content
125191 */
126- async function extractPageContent ( htmlPath , onlyStructure ) {
127- const html = await readFile ( htmlPath , "utf-8" ) ;
128- const dom = new JSDOM ( html ) ;
129- const doc = dom . window . document ;
130-
131- // Get main content
132- const main = doc . querySelector ( "main" ) ;
133- if ( ! main ) {
134- throw new Error ( `No <main> element found in ${ htmlPath } ` ) ;
192+ async function extractMdxContent ( mdxPath , srcDir , onlyStructure ) {
193+ const mdxContent = await readFile ( mdxPath , "utf-8" ) ;
194+ const { data : frontmatter , content } = matter ( mdxContent ) ;
195+
196+ // Extract title from frontmatter
197+ const title = frontmatter . title || "Untitled" ;
198+
199+ // Parse imports to find ?raw imports
200+ const rawImports = { } ;
201+ const importRegex = / i m p o r t \s + ( \w + ) \s + f r o m \s + [ " ' ] ( .+ ?) \? r a w [ " ' ] ; ? / g;
202+ let match ;
203+ while ( ( match = importRegex . exec ( content ) ) !== null ) {
204+ const [ , varName , importPath ] = match ;
205+ rawImports [ varName ] = importPath ;
135206 }
136207
137- // Extract and remove h1 for separate handling
138- const h1 = main . querySelector ( "h1" ) ;
139- const title = h1 ?. textContent ?. trim ( ) || "Untitled" ;
140- if ( h1 ) h1 . remove ( ) ;
141-
142- // Get meta description
143- const metaDesc = doc
144- . querySelector ( 'meta[name="description"]' )
145- ?. getAttribute ( "content" )
146- ?. trim ( ) ;
147-
148- // Remove unwanted elements
149- const selectorsToRemove = [ "nav" , "footer" , "header" , ".toc" ] ;
150- for ( const selector of selectorsToRemove ) {
151- const elements = main . querySelectorAll ( selector ) ;
152- elements . forEach ( ( el ) => el . remove ( ) ) ;
208+ let processedContent = content ;
209+
210+ // Replace CodeExample components with actual code blocks
211+ if ( ! onlyStructure && Object . keys ( rawImports ) . length > 0 ) {
212+ const codeExampleRegex = / < C o d e E x a m p l e \s + c o d e = \{ ( \w + ) \} [ ^ > ] * > [ \s \S ] * ?< \/ C o d e E x a m p l e > / g;
213+
214+ processedContent = await replaceAsync (
215+ processedContent ,
216+ codeExampleRegex ,
217+ async ( match , varName ) => {
218+ if ( rawImports [ varName ] ) {
219+ const codeFilePath = resolveImportPath ( mdxPath , rawImports [ varName ] ) ;
220+ try {
221+ const code = await readFile ( codeFilePath , "utf-8" ) ;
222+ const sections = parseSections ( code ) ;
223+ const imports = extractImports ( code ) ;
224+
225+ // Build formatted code blocks for each section
226+ const codeBlocks = [ ] ;
227+
228+ // Show imports first if they exist and we have sections
229+ if ( imports && Object . keys ( sections ) . length > 0 ) {
230+ codeBlocks . push ( `**Imports:**\n\`\`\`tsx\n${ imports } \n\`\`\`` ) ;
231+ } else if ( imports ) {
232+ // Show imports even without sections
233+ codeBlocks . push ( `**Imports:**\n\`\`\`tsx\n${ imports } \n\`\`\`` ) ;
234+ }
235+
236+ // Show model section
237+ if ( sections . model ) {
238+ codeBlocks . push ( `**Model:**\n\`\`\`tsx\n${ sections . model } \n\`\`\`` ) ;
239+ }
240+
241+ // Show controller section
242+ if ( sections . controller ) {
243+ codeBlocks . push ( `**Controller:**\n\`\`\`tsx\n${ sections . controller } \n\`\`\`` ) ;
244+ }
245+
246+ // Show components section
247+ if ( sections . components ) {
248+ codeBlocks . push ( `**Components:**\n\`\`\`tsx\n${ sections . components } \n\`\`\`` ) ;
249+ }
250+
251+ // Show index section (main code) - remove export default wrapper
252+ let indexCode = sections . index || code . replace ( / \/ \* \* \s * @ j s x I m p o r t S o u r c e \s + \w + \s * \* \/ \n ? / , "" ) . trim ( ) ;
253+ indexCode = trimExportDefault ( indexCode ) ;
254+ codeBlocks . push ( `**TSX:**\n\`\`\`tsx\n${ indexCode } \n\`\`\`` ) ;
255+
256+ return codeBlocks . join ( "\n\n" ) ;
257+ } catch ( error ) {
258+ console . warn ( `⚠️ Could not read code file: ${ codeFilePath } ` ) ;
259+ return match ; // Keep original if file not found
260+ }
261+ }
262+ return match ;
263+ }
264+ ) ;
153265 }
154266
155- // Convert HTML to Markdown
156- let markdown = await htmlToMarkdown ( main . innerHTML ) ;
267+ // Remove import statements from MDX (but NOT from code blocks)
268+ // Split by code blocks, remove imports from non-code parts only
269+ const codeBlockRegex = / ( ` ` ` [ \s \S ] * ?` ` ` ) / g;
270+ const contentParts = processedContent . split ( codeBlockRegex ) ;
271+ processedContent = contentParts . map ( ( part , index ) => {
272+ // Even indices are non-code, odd indices are code blocks
273+ if ( index % 2 === 0 ) {
274+ // Remove import statements only from non-code parts
275+ return part . replace ( / i m p o r t \s + [ \s \S ] * ?f r o m \s + [ " ' ] [ ^ " ' ] + [ " ' ] ; ? \s * / g, "" ) ;
276+ }
277+ return part ; // Keep code blocks unchanged
278+ } ) . join ( "" ) ;
279+
280+ // Remove frontmatter section if still present
281+ processedContent = processedContent . replace ( / ^ - - - [ \s \S ] * ?- - - \s * / m, "" ) ;
157282
158283 // If only structure, keep only headings
159284 if ( onlyStructure ) {
160- const lines = markdown . split ( "\n" ) ;
285+ const lines = processedContent . split ( "\n" ) ;
161286 const headings = lines . filter ( ( line ) => line . match ( / ^ # + \s / ) ) ;
162- markdown = headings . join ( "\n" ) ;
287+ processedContent = headings . join ( "\n" ) ;
163288 }
164289
165290 // Build final output
166- const parts = [ `# ${ title } ` ] ;
167- if ( metaDesc && ! onlyStructure ) {
168- parts . push ( `> ${ metaDesc } ` ) ;
291+ const parts = [ ] ;
292+
293+ // Only add title if it's not already in the content
294+ if ( ! processedContent . trim ( ) . startsWith ( `# ${ title } ` ) ) {
295+ parts . push ( `# ${ title } ` ) ;
169296 }
170- if ( markdown . trim ( ) ) {
171- parts . push ( markdown . trim ( ) ) ;
297+
298+ if ( processedContent . trim ( ) ) {
299+ parts . push ( processedContent . trim ( ) ) ;
172300 }
173301
174302 return parts . join ( "\n\n" ) ;
175303}
176304
177305/**
178- * Convert HTML to Markdown using unified/rehype/remark
179- * @param {string } html - HTML content
180- * @returns {Promise<string> } Markdown content
306+ * Resolve import path relative to the MDX file
307+ * @param {string } mdxPath - Path to the MDX file
308+ * @param {string } importPath - Relative import path from the MDX file
309+ * @returns {string } Resolved absolute path
310+ */
311+ function resolveImportPath ( mdxPath , importPath ) {
312+ const mdxDir = dirname ( mdxPath ) ;
313+ return resolve ( mdxDir , importPath ) ;
314+ }
315+
316+ /**
317+ * Async version of String.replace() for async callbacks
318+ * @param {string } str - Input string
319+ * @param {RegExp } regex - Regular expression
320+ * @param {Function } asyncFn - Async replacement function
321+ * @returns {Promise<string> } Replaced string
181322 */
182- async function htmlToMarkdown ( html ) {
183- const file = await unified ( )
184- . use ( rehypeParse )
185- . use ( rehypeRemark )
186- . use ( remarkGfm )
187- . use ( remarkStringify , {
188- bullet : "-" ,
189- fence : "`" ,
190- fences : true ,
191- incrementListMarker : false ,
192- } )
193- . process ( html ) ;
194-
195- return String ( file ) ;
323+ async function replaceAsync ( str , regex , asyncFn ) {
324+ const matches = [ ] ;
325+ let match ;
326+ const re = new RegExp ( regex , regex . flags ) ;
327+
328+ while ( ( match = re . exec ( str ) ) !== null ) {
329+ matches . push ( { match : match [ 0 ] , index : match . index , args : match . slice ( 1 ) } ) ;
330+ }
331+
332+ const replacements = await Promise . all (
333+ matches . map ( m => asyncFn ( m . match , ...m . args ) )
334+ ) ;
335+
336+ let result = str ;
337+ for ( let i = matches . length - 1 ; i >= 0 ; i -- ) {
338+ const { index, match } = matches [ i ] ;
339+ result = result . substring ( 0 , index ) + replacements [ i ] + result . substring ( index + match . length ) ;
340+ }
341+
342+ return result ;
196343}
0 commit comments