Merge pull request #1920 from lightpanda-io/markdown-renderer-refactor

markdown: refactor renderer into a struct to simplify argument passing
This commit is contained in:
Adrià Arrufat
2026-03-19 18:26:57 +09:00
committed by GitHub

View File

@@ -124,53 +124,49 @@ fn hasVisibleContent(root: *Node) bool {
return false; return false;
} }
fn ensureNewline(state: *State, writer: *std.Io.Writer) !void { const Context = struct {
if (!state.last_char_was_newline) { state: State,
try writer.writeByte('\n'); writer: *std.Io.Writer,
state.last_char_was_newline = true; page: *Page,
}
}
pub fn dump(node: *Node, opts: Opts, writer: *std.Io.Writer, page: *Page) !void { fn ensureNewline(self: *Context) !void {
_ = opts; if (!self.state.last_char_was_newline) {
var state = State{}; try self.writer.writeByte('\n');
try render(node, &state, writer, page); self.state.last_char_was_newline = true;
if (!state.last_char_was_newline) { }
try writer.writeByte('\n');
} }
}
fn render(node: *Node, state: *State, writer: *std.Io.Writer, page: *Page) error{WriteFailed}!void { fn render(self: *Context, node: *Node) error{WriteFailed}!void {
switch (node._type) { switch (node._type) {
.document, .document_fragment => { .document, .document_fragment => {
try renderChildren(node, state, writer, page); try self.renderChildren(node);
}, },
.element => |el| { .element => |el| {
try renderElement(el, state, writer, page); try self.renderElement(el);
}, },
.cdata => |cd| { .cdata => |cd| {
if (node.is(Node.CData.Text)) |_| { if (node.is(Node.CData.Text)) |_| {
var text = cd.getData().str(); var text = cd.getData().str();
if (state.pre_node) |pre| { if (self.state.pre_node) |pre| {
if (node.parentNode() == pre and node.nextSibling() == null) { if (node.parentNode() == pre and node.nextSibling() == null) {
text = std.mem.trimRight(u8, text, " \t\r\n"); text = std.mem.trimRight(u8, text, " \t\r\n");
} }
} }
try renderText(text, state, writer); try self.renderText(text);
} }
}, },
else => {}, else => {},
} }
} }
fn renderChildren(parent: *Node, state: *State, writer: *std.Io.Writer, page: *Page) !void { fn renderChildren(self: *Context, parent: *Node) !void {
var it = parent.childrenIterator(); var it = parent.childrenIterator();
while (it.next()) |child| { while (it.next()) |child| {
try render(child, state, writer, page); try self.render(child);
}
} }
}
fn renderElement(el: *Element, state: *State, writer: *std.Io.Writer, page: *Page) !void { fn renderElement(self: *Context, el: *Element) !void {
const tag = el.getTag(); const tag = el.getTag();
if (!isVisibleElement(el)) return; if (!isVisibleElement(el)) return;
@@ -178,116 +174,116 @@ fn renderElement(el: *Element, state: *State, writer: *std.Io.Writer, page: *Pag
// --- Opening Tag Logic --- // --- Opening Tag Logic ---
// Ensure block elements start on a new line (double newline for paragraphs etc) // Ensure block elements start on a new line (double newline for paragraphs etc)
if (tag.isBlock() and !state.in_table) { if (tag.isBlock() and !self.state.in_table) {
try ensureNewline(state, writer); try self.ensureNewline();
if (shouldAddSpacing(tag)) { if (shouldAddSpacing(tag)) {
try writer.writeByte('\n'); try self.writer.writeByte('\n');
} }
} else if (tag == .li or tag == .tr) { } else if (tag == .li or tag == .tr) {
try ensureNewline(state, writer); try self.ensureNewline();
} }
// Prefixes // Prefixes
switch (tag) { switch (tag) {
.h1 => try writer.writeAll("# "), .h1 => try self.writer.writeAll("# "),
.h2 => try writer.writeAll("## "), .h2 => try self.writer.writeAll("## "),
.h3 => try writer.writeAll("### "), .h3 => try self.writer.writeAll("### "),
.h4 => try writer.writeAll("#### "), .h4 => try self.writer.writeAll("#### "),
.h5 => try writer.writeAll("##### "), .h5 => try self.writer.writeAll("##### "),
.h6 => try writer.writeAll("###### "), .h6 => try self.writer.writeAll("###### "),
.ul => { .ul => {
if (state.list_depth < state.list_stack.len) { if (self.state.list_depth < self.state.list_stack.len) {
state.list_stack[state.list_depth] = .{ .type = .unordered, .index = 0 }; self.state.list_stack[self.state.list_depth] = .{ .type = .unordered, .index = 0 };
state.list_depth += 1; self.state.list_depth += 1;
} }
}, },
.ol => { .ol => {
if (state.list_depth < state.list_stack.len) { if (self.state.list_depth < self.state.list_stack.len) {
state.list_stack[state.list_depth] = .{ .type = .ordered, .index = 1 }; self.state.list_stack[self.state.list_depth] = .{ .type = .ordered, .index = 1 };
state.list_depth += 1; self.state.list_depth += 1;
} }
}, },
.li => { .li => {
const indent = if (state.list_depth > 0) state.list_depth - 1 else 0; const indent = if (self.state.list_depth > 0) self.state.list_depth - 1 else 0;
for (0..indent) |_| try writer.writeAll(" "); for (0..indent) |_| try self.writer.writeAll(" ");
if (state.list_depth > 0 and state.list_stack[state.list_depth - 1].type == .ordered) { if (self.state.list_depth > 0 and self.state.list_stack[self.state.list_depth - 1].type == .ordered) {
const current_list = &state.list_stack[state.list_depth - 1]; const current_list = &self.state.list_stack[self.state.list_depth - 1];
try writer.print("{d}. ", .{current_list.index}); try self.writer.print("{d}. ", .{current_list.index});
current_list.index += 1; current_list.index += 1;
} else { } else {
try writer.writeAll("- "); try self.writer.writeAll("- ");
} }
state.last_char_was_newline = false; self.state.last_char_was_newline = false;
}, },
.table => { .table => {
state.in_table = true; self.state.in_table = true;
state.table_row_index = 0; self.state.table_row_index = 0;
state.table_col_count = 0; self.state.table_col_count = 0;
}, },
.tr => { .tr => {
state.table_col_count = 0; self.state.table_col_count = 0;
try writer.writeByte('|'); try self.writer.writeByte('|');
}, },
.td, .th => { .td, .th => {
// Note: leading pipe handled by previous cell closing or tr opening // Note: leading pipe handled by previous cell closing or tr opening
state.last_char_was_newline = false; self.state.last_char_was_newline = false;
try writer.writeByte(' '); try self.writer.writeByte(' ');
}, },
.blockquote => { .blockquote => {
try writer.writeAll("> "); try self.writer.writeAll("> ");
state.last_char_was_newline = false; self.state.last_char_was_newline = false;
}, },
.pre => { .pre => {
try writer.writeAll("```\n"); try self.writer.writeAll("```\n");
state.pre_node = el.asNode(); self.state.pre_node = el.asNode();
state.last_char_was_newline = true; self.state.last_char_was_newline = true;
}, },
.code => { .code => {
if (state.pre_node == null) { if (self.state.pre_node == null) {
try writer.writeByte('`'); try self.writer.writeByte('`');
state.in_code = true; self.state.in_code = true;
state.last_char_was_newline = false; self.state.last_char_was_newline = false;
} }
}, },
.b, .strong => { .b, .strong => {
try writer.writeAll("**"); try self.writer.writeAll("**");
state.last_char_was_newline = false; self.state.last_char_was_newline = false;
}, },
.i, .em => { .i, .em => {
try writer.writeAll("*"); try self.writer.writeAll("*");
state.last_char_was_newline = false; self.state.last_char_was_newline = false;
}, },
.s, .del => { .s, .del => {
try writer.writeAll("~~"); try self.writer.writeAll("~~");
state.last_char_was_newline = false; self.state.last_char_was_newline = false;
}, },
.hr => { .hr => {
try writer.writeAll("---\n"); try self.writer.writeAll("---\n");
state.last_char_was_newline = true; self.state.last_char_was_newline = true;
return; return;
}, },
.br => { .br => {
if (state.in_table) { if (self.state.in_table) {
try writer.writeByte(' '); try self.writer.writeByte(' ');
} else { } else {
try writer.writeByte('\n'); try self.writer.writeByte('\n');
state.last_char_was_newline = true; self.state.last_char_was_newline = true;
} }
return; return;
}, },
.img => { .img => {
try writer.writeAll("!["); try self.writer.writeAll("![");
if (el.getAttributeSafe(comptime .wrap("alt"))) |alt| { if (el.getAttributeSafe(comptime .wrap("alt"))) |alt| {
try escapeMarkdown(writer, alt); try self.escape(alt);
} }
try writer.writeAll("]("); try self.writer.writeAll("](");
if (el.getAttributeSafe(comptime .wrap("src"))) |src| { if (el.getAttributeSafe(comptime .wrap("src"))) |src| {
const absolute_src = URL.resolve(page.call_arena, page.base(), src, .{ .encode = true }) catch src; const absolute_src = URL.resolve(self.page.call_arena, self.page.base(), src, .{ .encode = true }) catch src;
try writer.writeAll(absolute_src); try self.writer.writeAll(absolute_src);
} }
try writer.writeAll(")"); try self.writer.writeAll(")");
state.last_char_was_newline = false; self.state.last_char_was_newline = false;
return; return;
}, },
.anchor => { .anchor => {
@@ -298,57 +294,57 @@ fn renderElement(el: *Element, state: *State, writer: *std.Io.Writer, page: *Pag
if (!has_content and label == null and href_raw == null) return; if (!has_content and label == null and href_raw == null) return;
const has_block = hasBlockDescendant(el.asNode()); const has_block = hasBlockDescendant(el.asNode());
const href = if (href_raw) |h| URL.resolve(page.call_arena, page.base(), h, .{ .encode = true }) catch h else null; const href = if (href_raw) |h| URL.resolve(self.page.call_arena, self.page.base(), h, .{ .encode = true }) catch h else null;
if (has_block) { if (has_block) {
try renderChildren(el.asNode(), state, writer, page); try self.renderChildren(el.asNode());
if (href) |h| { if (href) |h| {
if (!state.last_char_was_newline) try writer.writeByte('\n'); if (!self.state.last_char_was_newline) try self.writer.writeByte('\n');
try writer.writeAll("([]("); try self.writer.writeAll("([](");
try writer.writeAll(h); try self.writer.writeAll(h);
try writer.writeAll("))\n"); try self.writer.writeAll("))\n");
state.last_char_was_newline = true; self.state.last_char_was_newline = true;
} }
return; return;
} }
if (isStandaloneAnchor(el)) { if (isStandaloneAnchor(el)) {
if (!state.last_char_was_newline) try writer.writeByte('\n'); if (!self.state.last_char_was_newline) try self.writer.writeByte('\n');
try writer.writeByte('['); try self.writer.writeByte('[');
if (has_content) { if (has_content) {
try renderChildren(el.asNode(), state, writer, page); try self.renderChildren(el.asNode());
} else { } else {
try writer.writeAll(label orelse ""); try self.writer.writeAll(label orelse "");
} }
try writer.writeAll("]("); try self.writer.writeAll("](");
if (href) |h| { if (href) |h| {
try writer.writeAll(h); try self.writer.writeAll(h);
} }
try writer.writeAll(")\n"); try self.writer.writeAll(")\n");
state.last_char_was_newline = true; self.state.last_char_was_newline = true;
return; return;
} }
try writer.writeByte('['); try self.writer.writeByte('[');
if (has_content) { if (has_content) {
try renderChildren(el.asNode(), state, writer, page); try self.renderChildren(el.asNode());
} else { } else {
try writer.writeAll(label orelse ""); try self.writer.writeAll(label orelse "");
} }
try writer.writeAll("]("); try self.writer.writeAll("](");
if (href) |h| { if (href) |h| {
try writer.writeAll(h); try self.writer.writeAll(h);
} }
try writer.writeByte(')'); try self.writer.writeByte(')');
state.last_char_was_newline = false; self.state.last_char_was_newline = false;
return; return;
}, },
.input => { .input => {
const type_attr = el.getAttributeSafe(comptime .wrap("type")) orelse return; const type_attr = el.getAttributeSafe(comptime .wrap("type")) orelse return;
if (std.ascii.eqlIgnoreCase(type_attr, "checkbox")) { if (std.ascii.eqlIgnoreCase(type_attr, "checkbox")) {
const checked = el.getAttributeSafe(comptime .wrap("checked")) != null; const checked = el.getAttributeSafe(comptime .wrap("checked")) != null;
try writer.writeAll(if (checked) "[x] " else "[ ] "); try self.writer.writeAll(if (checked) "[x] " else "[ ] ");
state.last_char_was_newline = false; self.state.last_char_was_newline = false;
} }
return; return;
}, },
@@ -356,85 +352,85 @@ fn renderElement(el: *Element, state: *State, writer: *std.Io.Writer, page: *Pag
} }
// --- Render Children --- // --- Render Children ---
try renderChildren(el.asNode(), state, writer, page); try self.renderChildren(el.asNode());
// --- Closing Tag Logic --- // --- Closing Tag Logic ---
// Suffixes // Suffixes
switch (tag) { switch (tag) {
.pre => { .pre => {
if (!state.last_char_was_newline) { if (!self.state.last_char_was_newline) {
try writer.writeByte('\n'); try self.writer.writeByte('\n');
} }
try writer.writeAll("```\n"); try self.writer.writeAll("```\n");
state.pre_node = null; self.state.pre_node = null;
state.last_char_was_newline = true; self.state.last_char_was_newline = true;
}, },
.code => { .code => {
if (state.pre_node == null) { if (self.state.pre_node == null) {
try writer.writeByte('`'); try self.writer.writeByte('`');
state.in_code = false; self.state.in_code = false;
state.last_char_was_newline = false; self.state.last_char_was_newline = false;
} }
}, },
.b, .strong => { .b, .strong => {
try writer.writeAll("**"); try self.writer.writeAll("**");
state.last_char_was_newline = false; self.state.last_char_was_newline = false;
}, },
.i, .em => { .i, .em => {
try writer.writeAll("*"); try self.writer.writeAll("*");
state.last_char_was_newline = false; self.state.last_char_was_newline = false;
}, },
.s, .del => { .s, .del => {
try writer.writeAll("~~"); try self.writer.writeAll("~~");
state.last_char_was_newline = false; self.state.last_char_was_newline = false;
}, },
.blockquote => {}, .blockquote => {},
.ul, .ol => { .ul, .ol => {
if (state.list_depth > 0) state.list_depth -= 1; if (self.state.list_depth > 0) self.state.list_depth -= 1;
}, },
.table => { .table => {
state.in_table = false; self.state.in_table = false;
}, },
.tr => { .tr => {
try writer.writeByte('\n'); try self.writer.writeByte('\n');
if (state.table_row_index == 0) { if (self.state.table_row_index == 0) {
try writer.writeByte('|'); try self.writer.writeByte('|');
for (0..state.table_col_count) |_| { for (0..self.state.table_col_count) |_| {
try writer.writeAll("---|"); try self.writer.writeAll("---|");
} }
try writer.writeByte('\n'); try self.writer.writeByte('\n');
} }
state.table_row_index += 1; self.state.table_row_index += 1;
state.last_char_was_newline = true; self.state.last_char_was_newline = true;
}, },
.td, .th => { .td, .th => {
try writer.writeAll(" |"); try self.writer.writeAll(" |");
state.table_col_count += 1; self.state.table_col_count += 1;
state.last_char_was_newline = false; self.state.last_char_was_newline = false;
}, },
else => {}, else => {},
} }
// Post-block newlines // Post-block newlines
if (tag.isBlock() and !state.in_table) { if (tag.isBlock() and !self.state.in_table) {
try ensureNewline(state, writer); try self.ensureNewline();
}
} }
}
fn renderText(text: []const u8, state: *State, writer: *std.Io.Writer) !void { fn renderText(self: *Context, text: []const u8) !void {
if (text.len == 0) return; if (text.len == 0) return;
if (state.pre_node) |_| { if (self.state.pre_node) |_| {
try writer.writeAll(text); try self.writer.writeAll(text);
state.last_char_was_newline = text[text.len - 1] == '\n'; self.state.last_char_was_newline = text[text.len - 1] == '\n';
return; return;
} }
// Check for pure whitespace // Check for pure whitespace
if (isAllWhitespace(text)) { if (isAllWhitespace(text)) {
if (!state.last_char_was_newline) { if (!self.state.last_char_was_newline) {
try writer.writeByte(' '); try self.writer.writeByte(' ');
} }
return; return;
} }
@@ -443,31 +439,45 @@ fn renderText(text: []const u8, state: *State, writer: *std.Io.Writer) !void {
var it = std.mem.tokenizeAny(u8, text, " \t\n\r"); var it = std.mem.tokenizeAny(u8, text, " \t\n\r");
var first = true; var first = true;
while (it.next()) |word| { while (it.next()) |word| {
if (!first or (!state.last_char_was_newline and std.ascii.isWhitespace(text[0]))) { if (!first or (!self.state.last_char_was_newline and std.ascii.isWhitespace(text[0]))) {
try writer.writeByte(' '); try self.writer.writeByte(' ');
} }
try escapeMarkdown(writer, word); try self.escape(word);
state.last_char_was_newline = false; self.state.last_char_was_newline = false;
first = false; first = false;
} }
// Handle trailing whitespace from the original text // Handle trailing whitespace from the original text
if (!first and !state.last_char_was_newline and std.ascii.isWhitespace(text[text.len - 1])) { if (!first and !self.state.last_char_was_newline and std.ascii.isWhitespace(text[text.len - 1])) {
try writer.writeByte(' '); try self.writer.writeByte(' ');
}
} }
}
fn escapeMarkdown(writer: *std.Io.Writer, text: []const u8) !void { fn escape(self: *Context, text: []const u8) !void {
for (text) |c| { for (text) |c| {
switch (c) { switch (c) {
'\\', '`', '*', '_', '{', '}', '[', ']', '(', ')', '#', '+', '-', '!', '|' => { '\\', '`', '*', '_', '{', '}', '[', ']', '(', ')', '#', '+', '-', '!', '|' => {
try writer.writeByte('\\'); try self.writer.writeByte('\\');
try writer.writeByte(c); try self.writer.writeByte(c);
}, },
else => try writer.writeByte(c), else => try self.writer.writeByte(c),
} }
} }
}
};
pub fn dump(node: *Node, opts: Opts, writer: *std.Io.Writer, page: *Page) !void {
_ = opts;
var ctx: Context = .{
.state = .{},
.writer = writer,
.page = page,
};
try ctx.render(node);
if (!ctx.state.last_char_was_newline) {
try writer.writeByte('\n');
}
} }
fn testMarkdownHTML(html: []const u8, expected: []const u8) !void { fn testMarkdownHTML(html: []const u8, expected: []const u8) !void {