mirror of
https://github.com/lightpanda-io/browser.git
synced 2026-03-21 20:24:42 +00:00
Merge pull request #1920 from lightpanda-io/markdown-renderer-refactor
markdown: refactor renderer into a struct to simplify argument passing
This commit is contained in:
@@ -124,53 +124,49 @@ fn hasVisibleContent(root: *Node) bool {
|
||||
return false;
|
||||
}
|
||||
|
||||
fn ensureNewline(state: *State, writer: *std.Io.Writer) !void {
|
||||
if (!state.last_char_was_newline) {
|
||||
try writer.writeByte('\n');
|
||||
state.last_char_was_newline = true;
|
||||
}
|
||||
}
|
||||
const Context = struct {
|
||||
state: State,
|
||||
writer: *std.Io.Writer,
|
||||
page: *Page,
|
||||
|
||||
pub fn dump(node: *Node, opts: Opts, writer: *std.Io.Writer, page: *Page) !void {
|
||||
_ = opts;
|
||||
var state = State{};
|
||||
try render(node, &state, writer, page);
|
||||
if (!state.last_char_was_newline) {
|
||||
try writer.writeByte('\n');
|
||||
fn ensureNewline(self: *Context) !void {
|
||||
if (!self.state.last_char_was_newline) {
|
||||
try self.writer.writeByte('\n');
|
||||
self.state.last_char_was_newline = true;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
fn render(node: *Node, state: *State, writer: *std.Io.Writer, page: *Page) error{WriteFailed}!void {
|
||||
fn render(self: *Context, node: *Node) error{WriteFailed}!void {
|
||||
switch (node._type) {
|
||||
.document, .document_fragment => {
|
||||
try renderChildren(node, state, writer, page);
|
||||
try self.renderChildren(node);
|
||||
},
|
||||
.element => |el| {
|
||||
try renderElement(el, state, writer, page);
|
||||
try self.renderElement(el);
|
||||
},
|
||||
.cdata => |cd| {
|
||||
if (node.is(Node.CData.Text)) |_| {
|
||||
var text = cd.getData().str();
|
||||
if (state.pre_node) |pre| {
|
||||
if (self.state.pre_node) |pre| {
|
||||
if (node.parentNode() == pre and node.nextSibling() == null) {
|
||||
text = std.mem.trimRight(u8, text, " \t\r\n");
|
||||
}
|
||||
}
|
||||
try renderText(text, state, writer);
|
||||
try self.renderText(text);
|
||||
}
|
||||
},
|
||||
else => {},
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
fn renderChildren(parent: *Node, state: *State, writer: *std.Io.Writer, page: *Page) !void {
|
||||
fn renderChildren(self: *Context, parent: *Node) !void {
|
||||
var it = parent.childrenIterator();
|
||||
while (it.next()) |child| {
|
||||
try render(child, state, writer, page);
|
||||
try self.render(child);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
fn renderElement(el: *Element, state: *State, writer: *std.Io.Writer, page: *Page) !void {
|
||||
fn renderElement(self: *Context, el: *Element) !void {
|
||||
const tag = el.getTag();
|
||||
|
||||
if (!isVisibleElement(el)) return;
|
||||
@@ -178,116 +174,116 @@ fn renderElement(el: *Element, state: *State, writer: *std.Io.Writer, page: *Pag
|
||||
// --- Opening Tag Logic ---
|
||||
|
||||
// Ensure block elements start on a new line (double newline for paragraphs etc)
|
||||
if (tag.isBlock() and !state.in_table) {
|
||||
try ensureNewline(state, writer);
|
||||
if (tag.isBlock() and !self.state.in_table) {
|
||||
try self.ensureNewline();
|
||||
if (shouldAddSpacing(tag)) {
|
||||
try writer.writeByte('\n');
|
||||
try self.writer.writeByte('\n');
|
||||
}
|
||||
} else if (tag == .li or tag == .tr) {
|
||||
try ensureNewline(state, writer);
|
||||
try self.ensureNewline();
|
||||
}
|
||||
|
||||
// Prefixes
|
||||
switch (tag) {
|
||||
.h1 => try writer.writeAll("# "),
|
||||
.h2 => try writer.writeAll("## "),
|
||||
.h3 => try writer.writeAll("### "),
|
||||
.h4 => try writer.writeAll("#### "),
|
||||
.h5 => try writer.writeAll("##### "),
|
||||
.h6 => try writer.writeAll("###### "),
|
||||
.h1 => try self.writer.writeAll("# "),
|
||||
.h2 => try self.writer.writeAll("## "),
|
||||
.h3 => try self.writer.writeAll("### "),
|
||||
.h4 => try self.writer.writeAll("#### "),
|
||||
.h5 => try self.writer.writeAll("##### "),
|
||||
.h6 => try self.writer.writeAll("###### "),
|
||||
.ul => {
|
||||
if (state.list_depth < state.list_stack.len) {
|
||||
state.list_stack[state.list_depth] = .{ .type = .unordered, .index = 0 };
|
||||
state.list_depth += 1;
|
||||
if (self.state.list_depth < self.state.list_stack.len) {
|
||||
self.state.list_stack[self.state.list_depth] = .{ .type = .unordered, .index = 0 };
|
||||
self.state.list_depth += 1;
|
||||
}
|
||||
},
|
||||
.ol => {
|
||||
if (state.list_depth < state.list_stack.len) {
|
||||
state.list_stack[state.list_depth] = .{ .type = .ordered, .index = 1 };
|
||||
state.list_depth += 1;
|
||||
if (self.state.list_depth < self.state.list_stack.len) {
|
||||
self.state.list_stack[self.state.list_depth] = .{ .type = .ordered, .index = 1 };
|
||||
self.state.list_depth += 1;
|
||||
}
|
||||
},
|
||||
.li => {
|
||||
const indent = if (state.list_depth > 0) state.list_depth - 1 else 0;
|
||||
for (0..indent) |_| try writer.writeAll(" ");
|
||||
const indent = if (self.state.list_depth > 0) self.state.list_depth - 1 else 0;
|
||||
for (0..indent) |_| try self.writer.writeAll(" ");
|
||||
|
||||
if (state.list_depth > 0 and state.list_stack[state.list_depth - 1].type == .ordered) {
|
||||
const current_list = &state.list_stack[state.list_depth - 1];
|
||||
try writer.print("{d}. ", .{current_list.index});
|
||||
if (self.state.list_depth > 0 and self.state.list_stack[self.state.list_depth - 1].type == .ordered) {
|
||||
const current_list = &self.state.list_stack[self.state.list_depth - 1];
|
||||
try self.writer.print("{d}. ", .{current_list.index});
|
||||
current_list.index += 1;
|
||||
} else {
|
||||
try writer.writeAll("- ");
|
||||
try self.writer.writeAll("- ");
|
||||
}
|
||||
state.last_char_was_newline = false;
|
||||
self.state.last_char_was_newline = false;
|
||||
},
|
||||
.table => {
|
||||
state.in_table = true;
|
||||
state.table_row_index = 0;
|
||||
state.table_col_count = 0;
|
||||
self.state.in_table = true;
|
||||
self.state.table_row_index = 0;
|
||||
self.state.table_col_count = 0;
|
||||
},
|
||||
.tr => {
|
||||
state.table_col_count = 0;
|
||||
try writer.writeByte('|');
|
||||
self.state.table_col_count = 0;
|
||||
try self.writer.writeByte('|');
|
||||
},
|
||||
.td, .th => {
|
||||
// Note: leading pipe handled by previous cell closing or tr opening
|
||||
state.last_char_was_newline = false;
|
||||
try writer.writeByte(' ');
|
||||
self.state.last_char_was_newline = false;
|
||||
try self.writer.writeByte(' ');
|
||||
},
|
||||
.blockquote => {
|
||||
try writer.writeAll("> ");
|
||||
state.last_char_was_newline = false;
|
||||
try self.writer.writeAll("> ");
|
||||
self.state.last_char_was_newline = false;
|
||||
},
|
||||
.pre => {
|
||||
try writer.writeAll("```\n");
|
||||
state.pre_node = el.asNode();
|
||||
state.last_char_was_newline = true;
|
||||
try self.writer.writeAll("```\n");
|
||||
self.state.pre_node = el.asNode();
|
||||
self.state.last_char_was_newline = true;
|
||||
},
|
||||
.code => {
|
||||
if (state.pre_node == null) {
|
||||
try writer.writeByte('`');
|
||||
state.in_code = true;
|
||||
state.last_char_was_newline = false;
|
||||
if (self.state.pre_node == null) {
|
||||
try self.writer.writeByte('`');
|
||||
self.state.in_code = true;
|
||||
self.state.last_char_was_newline = false;
|
||||
}
|
||||
},
|
||||
.b, .strong => {
|
||||
try writer.writeAll("**");
|
||||
state.last_char_was_newline = false;
|
||||
try self.writer.writeAll("**");
|
||||
self.state.last_char_was_newline = false;
|
||||
},
|
||||
.i, .em => {
|
||||
try writer.writeAll("*");
|
||||
state.last_char_was_newline = false;
|
||||
try self.writer.writeAll("*");
|
||||
self.state.last_char_was_newline = false;
|
||||
},
|
||||
.s, .del => {
|
||||
try writer.writeAll("~~");
|
||||
state.last_char_was_newline = false;
|
||||
try self.writer.writeAll("~~");
|
||||
self.state.last_char_was_newline = false;
|
||||
},
|
||||
.hr => {
|
||||
try writer.writeAll("---\n");
|
||||
state.last_char_was_newline = true;
|
||||
try self.writer.writeAll("---\n");
|
||||
self.state.last_char_was_newline = true;
|
||||
return;
|
||||
},
|
||||
.br => {
|
||||
if (state.in_table) {
|
||||
try writer.writeByte(' ');
|
||||
if (self.state.in_table) {
|
||||
try self.writer.writeByte(' ');
|
||||
} else {
|
||||
try writer.writeByte('\n');
|
||||
state.last_char_was_newline = true;
|
||||
try self.writer.writeByte('\n');
|
||||
self.state.last_char_was_newline = true;
|
||||
}
|
||||
return;
|
||||
},
|
||||
.img => {
|
||||
try writer.writeAll(";
|
||||
try self.writer.writeAll("](");
|
||||
if (el.getAttributeSafe(comptime .wrap("src"))) |src| {
|
||||
const absolute_src = URL.resolve(page.call_arena, page.base(), src, .{ .encode = true }) catch src;
|
||||
try writer.writeAll(absolute_src);
|
||||
const absolute_src = URL.resolve(self.page.call_arena, self.page.base(), src, .{ .encode = true }) catch src;
|
||||
try self.writer.writeAll(absolute_src);
|
||||
}
|
||||
try writer.writeAll(")");
|
||||
state.last_char_was_newline = false;
|
||||
try self.writer.writeAll(")");
|
||||
self.state.last_char_was_newline = false;
|
||||
return;
|
||||
},
|
||||
.anchor => {
|
||||
@@ -298,57 +294,57 @@ fn renderElement(el: *Element, state: *State, writer: *std.Io.Writer, page: *Pag
|
||||
if (!has_content and label == null and href_raw == null) return;
|
||||
|
||||
const has_block = hasBlockDescendant(el.asNode());
|
||||
const href = if (href_raw) |h| URL.resolve(page.call_arena, page.base(), h, .{ .encode = true }) catch h else null;
|
||||
const href = if (href_raw) |h| URL.resolve(self.page.call_arena, self.page.base(), h, .{ .encode = true }) catch h else null;
|
||||
|
||||
if (has_block) {
|
||||
try renderChildren(el.asNode(), state, writer, page);
|
||||
try self.renderChildren(el.asNode());
|
||||
if (href) |h| {
|
||||
if (!state.last_char_was_newline) try writer.writeByte('\n');
|
||||
try writer.writeAll("([](");
|
||||
try writer.writeAll(h);
|
||||
try writer.writeAll("))\n");
|
||||
state.last_char_was_newline = true;
|
||||
if (!self.state.last_char_was_newline) try self.writer.writeByte('\n');
|
||||
try self.writer.writeAll("([](");
|
||||
try self.writer.writeAll(h);
|
||||
try self.writer.writeAll("))\n");
|
||||
self.state.last_char_was_newline = true;
|
||||
}
|
||||
return;
|
||||
}
|
||||
|
||||
if (isStandaloneAnchor(el)) {
|
||||
if (!state.last_char_was_newline) try writer.writeByte('\n');
|
||||
try writer.writeByte('[');
|
||||
if (!self.state.last_char_was_newline) try self.writer.writeByte('\n');
|
||||
try self.writer.writeByte('[');
|
||||
if (has_content) {
|
||||
try renderChildren(el.asNode(), state, writer, page);
|
||||
try self.renderChildren(el.asNode());
|
||||
} else {
|
||||
try writer.writeAll(label orelse "");
|
||||
try self.writer.writeAll(label orelse "");
|
||||
}
|
||||
try writer.writeAll("](");
|
||||
try self.writer.writeAll("](");
|
||||
if (href) |h| {
|
||||
try writer.writeAll(h);
|
||||
try self.writer.writeAll(h);
|
||||
}
|
||||
try writer.writeAll(")\n");
|
||||
state.last_char_was_newline = true;
|
||||
try self.writer.writeAll(")\n");
|
||||
self.state.last_char_was_newline = true;
|
||||
return;
|
||||
}
|
||||
|
||||
try writer.writeByte('[');
|
||||
try self.writer.writeByte('[');
|
||||
if (has_content) {
|
||||
try renderChildren(el.asNode(), state, writer, page);
|
||||
try self.renderChildren(el.asNode());
|
||||
} else {
|
||||
try writer.writeAll(label orelse "");
|
||||
try self.writer.writeAll(label orelse "");
|
||||
}
|
||||
try writer.writeAll("](");
|
||||
try self.writer.writeAll("](");
|
||||
if (href) |h| {
|
||||
try writer.writeAll(h);
|
||||
try self.writer.writeAll(h);
|
||||
}
|
||||
try writer.writeByte(')');
|
||||
state.last_char_was_newline = false;
|
||||
try self.writer.writeByte(')');
|
||||
self.state.last_char_was_newline = false;
|
||||
return;
|
||||
},
|
||||
.input => {
|
||||
const type_attr = el.getAttributeSafe(comptime .wrap("type")) orelse return;
|
||||
if (std.ascii.eqlIgnoreCase(type_attr, "checkbox")) {
|
||||
const checked = el.getAttributeSafe(comptime .wrap("checked")) != null;
|
||||
try writer.writeAll(if (checked) "[x] " else "[ ] ");
|
||||
state.last_char_was_newline = false;
|
||||
try self.writer.writeAll(if (checked) "[x] " else "[ ] ");
|
||||
self.state.last_char_was_newline = false;
|
||||
}
|
||||
return;
|
||||
},
|
||||
@@ -356,85 +352,85 @@ fn renderElement(el: *Element, state: *State, writer: *std.Io.Writer, page: *Pag
|
||||
}
|
||||
|
||||
// --- Render Children ---
|
||||
try renderChildren(el.asNode(), state, writer, page);
|
||||
try self.renderChildren(el.asNode());
|
||||
|
||||
// --- Closing Tag Logic ---
|
||||
|
||||
// Suffixes
|
||||
switch (tag) {
|
||||
.pre => {
|
||||
if (!state.last_char_was_newline) {
|
||||
try writer.writeByte('\n');
|
||||
if (!self.state.last_char_was_newline) {
|
||||
try self.writer.writeByte('\n');
|
||||
}
|
||||
try writer.writeAll("```\n");
|
||||
state.pre_node = null;
|
||||
state.last_char_was_newline = true;
|
||||
try self.writer.writeAll("```\n");
|
||||
self.state.pre_node = null;
|
||||
self.state.last_char_was_newline = true;
|
||||
},
|
||||
.code => {
|
||||
if (state.pre_node == null) {
|
||||
try writer.writeByte('`');
|
||||
state.in_code = false;
|
||||
state.last_char_was_newline = false;
|
||||
if (self.state.pre_node == null) {
|
||||
try self.writer.writeByte('`');
|
||||
self.state.in_code = false;
|
||||
self.state.last_char_was_newline = false;
|
||||
}
|
||||
},
|
||||
.b, .strong => {
|
||||
try writer.writeAll("**");
|
||||
state.last_char_was_newline = false;
|
||||
try self.writer.writeAll("**");
|
||||
self.state.last_char_was_newline = false;
|
||||
},
|
||||
.i, .em => {
|
||||
try writer.writeAll("*");
|
||||
state.last_char_was_newline = false;
|
||||
try self.writer.writeAll("*");
|
||||
self.state.last_char_was_newline = false;
|
||||
},
|
||||
.s, .del => {
|
||||
try writer.writeAll("~~");
|
||||
state.last_char_was_newline = false;
|
||||
try self.writer.writeAll("~~");
|
||||
self.state.last_char_was_newline = false;
|
||||
},
|
||||
.blockquote => {},
|
||||
.ul, .ol => {
|
||||
if (state.list_depth > 0) state.list_depth -= 1;
|
||||
if (self.state.list_depth > 0) self.state.list_depth -= 1;
|
||||
},
|
||||
.table => {
|
||||
state.in_table = false;
|
||||
self.state.in_table = false;
|
||||
},
|
||||
.tr => {
|
||||
try writer.writeByte('\n');
|
||||
if (state.table_row_index == 0) {
|
||||
try writer.writeByte('|');
|
||||
for (0..state.table_col_count) |_| {
|
||||
try writer.writeAll("---|");
|
||||
try self.writer.writeByte('\n');
|
||||
if (self.state.table_row_index == 0) {
|
||||
try self.writer.writeByte('|');
|
||||
for (0..self.state.table_col_count) |_| {
|
||||
try self.writer.writeAll("---|");
|
||||
}
|
||||
try writer.writeByte('\n');
|
||||
try self.writer.writeByte('\n');
|
||||
}
|
||||
state.table_row_index += 1;
|
||||
state.last_char_was_newline = true;
|
||||
self.state.table_row_index += 1;
|
||||
self.state.last_char_was_newline = true;
|
||||
},
|
||||
.td, .th => {
|
||||
try writer.writeAll(" |");
|
||||
state.table_col_count += 1;
|
||||
state.last_char_was_newline = false;
|
||||
try self.writer.writeAll(" |");
|
||||
self.state.table_col_count += 1;
|
||||
self.state.last_char_was_newline = false;
|
||||
},
|
||||
else => {},
|
||||
}
|
||||
|
||||
// Post-block newlines
|
||||
if (tag.isBlock() and !state.in_table) {
|
||||
try ensureNewline(state, writer);
|
||||
if (tag.isBlock() and !self.state.in_table) {
|
||||
try self.ensureNewline();
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
fn renderText(text: []const u8, state: *State, writer: *std.Io.Writer) !void {
|
||||
fn renderText(self: *Context, text: []const u8) !void {
|
||||
if (text.len == 0) return;
|
||||
|
||||
if (state.pre_node) |_| {
|
||||
try writer.writeAll(text);
|
||||
state.last_char_was_newline = text[text.len - 1] == '\n';
|
||||
if (self.state.pre_node) |_| {
|
||||
try self.writer.writeAll(text);
|
||||
self.state.last_char_was_newline = text[text.len - 1] == '\n';
|
||||
return;
|
||||
}
|
||||
|
||||
// Check for pure whitespace
|
||||
if (isAllWhitespace(text)) {
|
||||
if (!state.last_char_was_newline) {
|
||||
try writer.writeByte(' ');
|
||||
if (!self.state.last_char_was_newline) {
|
||||
try self.writer.writeByte(' ');
|
||||
}
|
||||
return;
|
||||
}
|
||||
@@ -443,31 +439,45 @@ fn renderText(text: []const u8, state: *State, writer: *std.Io.Writer) !void {
|
||||
var it = std.mem.tokenizeAny(u8, text, " \t\n\r");
|
||||
var first = true;
|
||||
while (it.next()) |word| {
|
||||
if (!first or (!state.last_char_was_newline and std.ascii.isWhitespace(text[0]))) {
|
||||
try writer.writeByte(' ');
|
||||
if (!first or (!self.state.last_char_was_newline and std.ascii.isWhitespace(text[0]))) {
|
||||
try self.writer.writeByte(' ');
|
||||
}
|
||||
|
||||
try escapeMarkdown(writer, word);
|
||||
state.last_char_was_newline = false;
|
||||
try self.escape(word);
|
||||
self.state.last_char_was_newline = false;
|
||||
first = false;
|
||||
}
|
||||
|
||||
// Handle trailing whitespace from the original text
|
||||
if (!first and !state.last_char_was_newline and std.ascii.isWhitespace(text[text.len - 1])) {
|
||||
try writer.writeByte(' ');
|
||||
if (!first and !self.state.last_char_was_newline and std.ascii.isWhitespace(text[text.len - 1])) {
|
||||
try self.writer.writeByte(' ');
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
fn escapeMarkdown(writer: *std.Io.Writer, text: []const u8) !void {
|
||||
fn escape(self: *Context, text: []const u8) !void {
|
||||
for (text) |c| {
|
||||
switch (c) {
|
||||
'\\', '`', '*', '_', '{', '}', '[', ']', '(', ')', '#', '+', '-', '!', '|' => {
|
||||
try writer.writeByte('\\');
|
||||
try writer.writeByte(c);
|
||||
try self.writer.writeByte('\\');
|
||||
try self.writer.writeByte(c);
|
||||
},
|
||||
else => try writer.writeByte(c),
|
||||
else => try self.writer.writeByte(c),
|
||||
}
|
||||
}
|
||||
}
|
||||
};
|
||||
|
||||
pub fn dump(node: *Node, opts: Opts, writer: *std.Io.Writer, page: *Page) !void {
|
||||
_ = opts;
|
||||
var ctx: Context = .{
|
||||
.state = .{},
|
||||
.writer = writer,
|
||||
.page = page,
|
||||
};
|
||||
try ctx.render(node);
|
||||
if (!ctx.state.last_char_was_newline) {
|
||||
try writer.writeByte('\n');
|
||||
}
|
||||
}
|
||||
|
||||
fn testMarkdownHTML(html: []const u8, expected: []const u8) !void {
|
||||
|
||||
Reference in New Issue
Block a user