Trim trailing whitespace in pre blocks in Markdown

This commit is contained in:
Adrià Arrufat
2026-02-16 21:34:46 +09:00
parent b49b2af11f
commit 3c14dbe382

View File

@@ -33,143 +33,154 @@ const State = struct {
index: usize, index: usize,
}; };
list_depth: usize = 0, list_depth: usize = 0,
list_stack: [32]ListState = undefined, list_stack: [32]ListState = undefined,
in_pre: bool = false, in_pre: bool = false,
in_code: bool = false, pre_node: ?*Node = null,
in_blockquote: bool = false, in_code: bool = false,
in_table: bool = false, in_blockquote: bool = false,
table_row_index: usize = 0, in_table: bool = false,
table_col_count: usize = 0, table_row_index: usize = 0,
last_char_was_newline: bool = true, table_col_count: usize = 0,
}; last_char_was_newline: bool = true,
};
pub fn dump(node: *Node, opts: Opts, writer: *std.Io.Writer, page: *Page) !void { pub fn dump(node: *Node, opts: Opts, writer: *std.Io.Writer, page: *Page) !void {
_ = opts; _ = opts;
var state = State{}; var state = State{};
try render(node, &state, writer, page); try render(node, &state, writer, page);
if (!state.last_char_was_newline) { if (!state.last_char_was_newline) {
try writer.writeByte('\n'); try writer.writeByte('\n');
}
} }
}
fn render(node: *Node, state: *State, writer: *std.Io.Writer, page: *Page) anyerror!void { fn render(node: *Node, state: *State, writer: *std.Io.Writer, page: *Page) anyerror!void {
switch (node._type) { switch (node._type) {
.document, .document_fragment => { .document, .document_fragment => {
try renderChildren(node, state, writer, page); try renderChildren(node, state, writer, page);
}, },
.element => |el| { .element => |el| {
try renderElement(el, state, writer, page); try renderElement(el, state, writer, page);
}, },
.cdata => |cd| { .cdata => |cd| {
if (node.is(Node.CData.Text)) |_| { if (node.is(Node.CData.Text)) |_| {
try renderText(cd.getData(), state, writer); var text = cd.getData();
} if (state.in_pre) {
}, if (state.pre_node) |pre| {
else => {}, // Ignore other node types if (node.parentNode() == pre and node.nextSibling() == null) {
} text = std.mem.trimRight(u8, text, " \t\r\n");
} }
fn renderChildren(parent: *Node, state: *State, writer: *std.Io.Writer, page: *Page) anyerror!void {
var it = parent.childrenIterator();
while (it.next()) |child| {
try render(child, state, writer, page);
}
}
fn renderElement(el: *Element, state: *State, writer: *std.Io.Writer, page: *Page) anyerror!void {
const tag = el.getTag();
// Skip hidden/metadata elements
switch (tag) {
.script, .style, .noscript, .template, .head, .meta, .link, .title, .svg => return,
else => {},
}
// --- Opening Tag Logic ---
// Ensure block elements start on a new line (double newline for paragraphs etc)
switch (tag) {
.p, .div, .section, .article, .header, .footer, .nav, .aside, .h1, .h2, .h3, .h4, .h5, .h6, .ul, .ol, .blockquote, .pre, .table, .hr => {
if (!state.in_table) {
if (!state.last_char_was_newline) {
try writer.writeByte('\n');
state.last_char_was_newline = true;
}
if (tag == .p or tag == .h1 or tag == .h2 or tag == .h3 or tag == .h4 or tag == .h5 or tag == .h6 or tag == .blockquote or tag == .pre or tag == .table) {
// Add an extra newline for spacing between blocks
try writer.writeByte('\n');
} }
} }
}, try renderText(text, state, writer);
.li, .tr => { }
},
else => {}, // Ignore other node types
}
}
fn renderChildren(parent: *Node, state: *State, writer: *std.Io.Writer, page: *Page) anyerror!void {
var it = parent.childrenIterator();
while (it.next()) |child| {
try render(child, state, writer, page);
}
}
fn renderElement(el: *Element, state: *State, writer: *std.Io.Writer, page: *Page) anyerror!void {
const tag = el.getTag();
// Skip hidden/metadata elements
switch (tag) {
.script, .style, .noscript, .template, .head, .meta, .link, .title, .svg => return,
else => {},
}
// --- Opening Tag Logic ---
// Ensure block elements start on a new line (double newline for paragraphs etc)
switch (tag) {
.p, .div, .section, .article, .header, .footer, .nav, .aside, .h1, .h2, .h3, .h4, .h5, .h6, .ul, .ol, .blockquote, .pre, .table, .hr => {
if (!state.in_table) {
if (!state.last_char_was_newline) { if (!state.last_char_was_newline) {
try writer.writeByte('\n'); try writer.writeByte('\n');
state.last_char_was_newline = true; state.last_char_was_newline = true;
} }
}, if (tag == .p or tag == .h1 or tag == .h2 or tag == .h3 or tag == .h4 or tag == .h5 or tag == .h6 or tag == .blockquote or tag == .pre or tag == .table) {
else => {}, // Add an extra newline for spacing between blocks
} try writer.writeByte('\n');
// Prefixes
switch (tag) {
.h1 => try writer.writeAll("# "),
.h2 => try writer.writeAll("## "),
.h3 => try writer.writeAll("### "),
.h4 => try writer.writeAll("#### "),
.h5 => try writer.writeAll("##### "),
.h6 => try writer.writeAll("###### "),
.ul => {
if (state.list_depth < state.list_stack.len) {
state.list_stack[state.list_depth] = .{ .type = .unordered, .index = 0 };
state.list_depth += 1;
} }
}, }
.ol => { },
if (state.list_depth < state.list_stack.len) { .li, .tr => {
state.list_stack[state.list_depth] = .{ .type = .ordered, .index = 1 }; if (!state.last_char_was_newline) {
state.list_depth += 1; try writer.writeByte('\n');
} state.last_char_was_newline = true;
}, }
.li => { },
const indent = if (state.list_depth > 0) state.list_depth - 1 else 0; else => {},
try writeIndentation(indent, writer); }
if (state.list_depth > 0) { // Prefixes
const current_list = &state.list_stack[state.list_depth - 1]; switch (tag) {
if (current_list.type == .ordered) { .h1 => try writer.writeAll("# "),
try writer.print("{d}. ", .{current_list.index}); .h2 => try writer.writeAll("## "),
current_list.index += 1; .h3 => try writer.writeAll("### "),
} else { .h4 => try writer.writeAll("#### "),
try writer.writeAll("- "); .h5 => try writer.writeAll("##### "),
} .h6 => try writer.writeAll("###### "),
.ul => {
if (state.list_depth < state.list_stack.len) {
state.list_stack[state.list_depth] = .{ .type = .unordered, .index = 0 };
state.list_depth += 1;
}
},
.ol => {
if (state.list_depth < state.list_stack.len) {
state.list_stack[state.list_depth] = .{ .type = .ordered, .index = 1 };
state.list_depth += 1;
}
},
.li => {
const indent = if (state.list_depth > 0) state.list_depth - 1 else 0;
try writeIndentation(indent, writer);
if (state.list_depth > 0) {
const current_list = &state.list_stack[state.list_depth - 1];
if (current_list.type == .ordered) {
try writer.print("{d}. ", .{current_list.index});
current_list.index += 1;
} else { } else {
try writer.writeAll("- "); try writer.writeAll("- ");
} }
state.last_char_was_newline = false; } else {
}, try writer.writeAll("- ");
.table => { }
state.in_table = true; state.last_char_was_newline = false;
state.table_row_index = 0; },
state.table_col_count = 0; .table => {
}, state.in_table = true;
.tr => { state.table_row_index = 0;
state.table_col_count = 0; state.table_col_count = 0;
try writer.writeByte('|'); },
}, .tr => {
.td, .th => { state.table_col_count = 0;
// Note: leading pipe handled by previous cell closing or tr opening try writer.writeByte('|');
state.last_char_was_newline = false; },
// Add spacing .td, .th => {
try writer.writeByte(' '); // Note: leading pipe handled by previous cell closing or tr opening
}, state.last_char_was_newline = false;
.blockquote => { try writer.writeAll("> "); // Add spacing
try writer.writeByte(' ');
},
.blockquote => {
try writer.writeAll("> ");
state.in_blockquote = true; state.in_blockquote = true;
state.last_char_was_newline = false; state.last_char_was_newline = false;
}, },
.pre => { .pre => {
try writer.writeAll("```\n"); try writer.writeAll("```\n");
state.in_pre = true; state.in_pre = true;
state.pre_node = el.asNode();
state.last_char_was_newline = true; state.last_char_was_newline = true;
}, },
.code => { .code => {
@@ -259,6 +270,7 @@ const State = struct {
} }
try writer.writeAll("```\n"); try writer.writeAll("```\n");
state.in_pre = false; state.in_pre = false;
state.pre_node = null;
state.last_char_was_newline = true; state.last_char_was_newline = true;
}, },
.code => { .code => {