markdown: handle block-level and standalone anchors in

Adds logic to detect if an anchor contains block descendants or is a
standalone element within a layout block. These are now rendered with
appropriate spacing and link formatting. Also adds `.main` to the list
of block elements.
This commit is contained in:
Adrià Arrufat
2026-02-20 00:11:38 +09:00
parent 282b64278e
commit 423034d5c4

View File

@@ -46,7 +46,7 @@ const State = struct {
fn isBlock(tag: Element.Tag) bool {
return switch (tag) {
.p, .div, .section, .article, .header, .footer, .nav, .aside, .h1, .h2, .h3, .h4, .h5, .h6, .ul, .ol, .blockquote, .pre, .table, .hr => true,
.p, .div, .section, .article, .main, .header, .footer, .nav, .aside, .h1, .h2, .h3, .h4, .h5, .h6, .ul, .ol, .blockquote, .pre, .table, .hr => true,
else => false,
};
}
@@ -58,6 +58,74 @@ fn shouldAddSpacing(tag: Element.Tag) bool {
};
}
fn isLayoutBlock(tag: Element.Tag) bool {
return switch (tag) {
.main, .section, .article, .nav, .aside, .header, .footer, .div, .ul, .ol => true,
else => false,
};
}
fn isStandaloneAnchor(el: *Element) bool {
const node = el.asNode();
const parent = node.parentNode() orelse return false;
if (parent._type != .element) return false;
const parent_el = parent.as(Element);
if (!isLayoutBlock(parent_el.getTag())) return false;
var prev = node.previousSibling();
while (prev) |p| : (prev = p.previousSibling()) {
if (isSignificantText(p)) return false;
if (p._type == .element) {
if (isVisibleElement(p.as(Element))) break;
}
}
var next = node.nextSibling();
while (next) |n| : (next = n.nextSibling()) {
if (isSignificantText(n)) return false;
if (n._type == .element) {
if (isVisibleElement(n.as(Element))) break;
}
}
return true;
}
fn isSignificantText(node: *Node) bool {
if (node._type != .cdata) return false;
const cd = node.as(Node.CData);
if (node.is(Node.CData.Text)) |_| {
return !isAllWhitespace(cd.getData());
}
return false;
}
fn isVisibleElement(el: *Element) bool {
return switch (el.getTag()) {
.script, .style, .noscript, .template, .head, .meta, .link, .title, .svg => false,
else => true,
};
}
fn isAllWhitespace(text: []const u8) bool {
return for (text) |c| {
if (!std.ascii.isWhitespace(c)) break false;
} else true;
}
fn hasBlockDescendant(node: *Node) bool {
var it = node.childrenIterator();
while (it.next()) |child| {
if (child._type == .element) {
const el = child.as(Element);
if (isBlock(el.getTag())) return true;
if (hasBlockDescendant(child)) return true;
}
}
return false;
}
fn ensureNewline(state: *State, writer: *std.Io.Writer) !void {
if (!state.last_char_was_newline) {
try writer.writeByte('\n');
@@ -110,10 +178,7 @@ fn renderElement(el: *Element, state: *State, writer: *std.Io.Writer, page: *Pag
const tag = el.getTag();
// Skip hidden/metadata elements
switch (tag) {
.script, .style, .noscript, .template, .head, .meta, .link, .title, .svg => return,
else => {},
}
if (!isVisibleElement(el)) return;
// --- Opening Tag Logic ---
@@ -238,6 +303,32 @@ fn renderElement(el: *Element, state: *State, writer: *std.Io.Writer, page: *Pag
return; // Treat as void
},
.anchor => {
const has_block = hasBlockDescendant(el.asNode());
if (has_block) {
try renderChildren(el.asNode(), state, writer, page);
if (el.getAttributeSafe(comptime .wrap("href"))) |href| {
if (!state.last_char_was_newline) try writer.writeByte('\n');
try writer.writeAll("([Link](");
try writer.writeAll(href);
try writer.writeAll("))\n");
state.last_char_was_newline = true;
}
return;
}
if (isStandaloneAnchor(el)) {
if (!state.last_char_was_newline) try writer.writeByte('\n');
try writer.writeByte('[');
try renderChildren(el.asNode(), state, writer, page);
try writer.writeAll("](");
if (el.getAttributeSafe(comptime .wrap("href"))) |href| {
try writer.writeAll(href);
}
try writer.writeAll(")\n");
state.last_char_was_newline = true;
return;
}
try writer.writeByte('[');
try renderChildren(el.asNode(), state, writer, page);
try writer.writeAll("](");
@@ -504,3 +595,55 @@ test "markdown: code" {
\\
);
}
test "markdown: block link" {
try testMarkdownHTML(
\\<a href="https://example.com">
\\ <h3>Title</h3>
\\ <p>Description</p>
\\</a>
,
\\
\\### Title
\\
\\Description
\\([Link](https://example.com))
\\
);
}
test "markdown: inline link" {
try testMarkdownHTML(
\\<p>Visit <a href="https://example.com">Example</a>.</p>
,
\\
\\Visit [Example](https://example.com).
\\
);
}
test "markdown: standalone anchors" {
// Inside main, with whitespace between anchors -> treated as blocks
try testMarkdownHTML(
\\<main>
\\ <a href="1">Link 1</a>
\\ <a href="2">Link 2</a>
\\</main>
,
\\[Link 1](1)
\\[Link 2](2)
\\
);
}
test "markdown: mixed anchors in main" {
// Anchors surrounded by text should remain inline
try testMarkdownHTML(
\\<main>
\\ Welcome <a href="1">Link 1</a>.
\\</main>
,
\\Welcome [Link 1](1).
\\
);
}