Skip to content

Commit 15ee9a4

Browse files
committed
Add italic, code, headings, and list handling to htmlToMarkdown
- Italic: <em>, <i>, font-style:italic → *text* - Inline code: <code> → `text` - Code blocks: <pre> → fenced ``` blocks - Headings: <h1>–<h6> → # through ###### - Lists: <ul>/<ol> with nested list support and proper indentation https://claude.ai/code/session_018q9Arxy2HrpsyNB13wpwxQ
1 parent cb74a9c commit 15ee9a4

1 file changed

Lines changed: 63 additions & 4 deletions

File tree

rich-text-to-markdown.html

Lines changed: 63 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -115,7 +115,7 @@
115115
<h1>Rich Text to Markdown</h1>
116116

117117
<div class="instructions">
118-
<strong>Instructions:</strong> Paste rich text below. Bold text will be converted to <code>**markdown bold**</code>, links will be converted to <code>[text](url)</code>, and leading spaces will be removed.
118+
<strong>Instructions:</strong> Paste rich text below. Formatting (bold, italic, links, code, headings, lists) will be converted to Markdown and leading spaces will be removed.
119119
</div>
120120

121121
<textarea class="paste-area" placeholder="Click here and paste your rich text (Cmd+V or Ctrl+V)..."></textarea>
@@ -314,7 +314,7 @@ <h1>Rich Text to Markdown</h1>
314314
const parser = new DOMParser();
315315
const doc = parser.parseFromString(html, 'text/html');
316316

317-
function processNode(node) {
317+
function processNode(node, listDepth = 0) {
318318
if (node.nodeType === Node.TEXT_NODE) {
319319
return node.textContent;
320320
}
@@ -324,9 +324,17 @@ <h1>Rich Text to Markdown</h1>
324324
}
325325

326326
const tag = node.tagName.toLowerCase();
327+
328+
// For <pre>, grab raw text content and wrap in a fenced code block
329+
if (tag === 'pre') {
330+
const code = node.querySelector('code');
331+
const text = (code || node).textContent;
332+
return '\n```\n' + text.replace(/\n$/, '') + '\n```\n';
333+
}
334+
327335
let childContent = '';
328336
for (const child of node.childNodes) {
329-
childContent += processNode(child);
337+
childContent += processNode(child, listDepth);
330338
}
331339

332340
// Check for bold - either tag or style
@@ -338,13 +346,31 @@ <h1>Rich Text to Markdown</h1>
338346
));
339347

340348
if (isBold && childContent.trim()) {
341-
// Preserve spacing around the bold content
342349
const leadingSpace = childContent.match(/^\s*/)[0];
343350
const trailingSpace = childContent.match(/\s*$/)[0];
344351
const trimmed = childContent.trim();
345352
return `${leadingSpace}**${trimmed}**${trailingSpace}`;
346353
}
347354

355+
// Check for italic - either tag or style
356+
const isItalic = tag === 'em' || tag === 'i' ||
357+
(node.style && node.style.fontStyle === 'italic');
358+
359+
if (isItalic && childContent.trim()) {
360+
const leadingSpace = childContent.match(/^\s*/)[0];
361+
const trailingSpace = childContent.match(/\s*$/)[0];
362+
const trimmed = childContent.trim();
363+
return `${leadingSpace}*${trimmed}*${trailingSpace}`;
364+
}
365+
366+
// Handle inline code
367+
if (tag === 'code') {
368+
if (childContent.trim()) {
369+
return '`' + childContent.trim() + '`';
370+
}
371+
return childContent;
372+
}
373+
348374
// Handle links
349375
if (tag === 'a') {
350376
const href = node.getAttribute('href');
@@ -354,6 +380,39 @@ <h1>Rich Text to Markdown</h1>
354380
return childContent;
355381
}
356382

383+
// Handle headings
384+
const headingMatch = tag.match(/^h([1-6])$/);
385+
if (headingMatch && childContent.trim()) {
386+
const level = parseInt(headingMatch[1]);
387+
const prefix = '#'.repeat(level);
388+
return `\n${prefix} ${childContent.trim()}\n`;
389+
}
390+
391+
// Handle lists
392+
if (tag === 'ul' || tag === 'ol') {
393+
let result = '';
394+
let index = 1;
395+
for (const child of node.children) {
396+
if (child.tagName.toLowerCase() === 'li') {
397+
const indent = ' '.repeat(listDepth);
398+
const bullet = tag === 'ul' ? '-' : `${index}.`;
399+
let liContent = '';
400+
for (const liChild of child.childNodes) {
401+
liContent += processNode(liChild, listDepth + 1);
402+
}
403+
// Trim trailing newlines from the li content but keep one newline at the end
404+
result += `${indent}${bullet} ${liContent.trim()}\n`;
405+
index++;
406+
}
407+
}
408+
return (listDepth === 0 ? '\n' : '') + result + (listDepth === 0 ? '\n' : '');
409+
}
410+
411+
// Skip <li> processed by parent list handler
412+
if (tag === 'li') {
413+
return childContent;
414+
}
415+
357416
// Handle line breaks
358417
if (tag === 'br') {
359418
return '\n';

0 commit comments

Comments
 (0)