diff --git a/src/browser/Page.zig b/src/browser/Page.zig index b5bc29af..2061fdcb 100644 --- a/src/browser/Page.zig +++ b/src/browser/Page.zig @@ -1292,7 +1292,7 @@ pub fn appendNew(self: *Page, parent: *Node, child: Node.NodeOrText) !void { return; } } - break :blk try self.createTextNode(txt); + break :blk try self.createTextNode(txt, parent); }, }; @@ -2112,10 +2112,45 @@ fn populateElementAttributes(self: *Page, element: *Element, list: anytype) !voi } } -pub fn createTextNode(self: *Page, text: []const u8) !*Node { +pub fn createTextNode(self: *Page, text: []const u8, parent: ?*Node) !*Node { + var normalized = text; + if (parent) |p| { + if (text.len > 0 and text.len <= self.buf.len and canCollapseWhiteSpace(p)) { + const has_leading_ws = switch (text[0]) { + ' ', '\t', '\r', '\n' => true, + else => false, + }; + const has_trailing_ws = switch (text[text.len - 1]) { + ' ', '\t', '\r', '\n' => true, + else => false, + }; + + if (has_leading_ws or has_trailing_ws) { + const trimmed = std.mem.trim(u8, text, " \t\r\n"); + var idx: usize = 0; + + var buf = &self.buf; + if (has_leading_ws) { + buf[idx] = ' '; + idx += 1; + } + + @memcpy(buf[idx..][0..trimmed.len], trimmed); + idx += trimmed.len; + + if (has_trailing_ws) { + buf[idx] = ' '; + idx += 1; + } + + normalized = buf[0..idx]; + } + } + } + // might seem unlikely that we get an intern hit, but we'll get some nodes // with just '\n' - const owned_text = try self.dupeString(text); + const owned_text = try self.dupeString(normalized); const cd = try self._factory.node(CData{ ._proto = undefined, ._type = .{ .text = .{ @@ -2127,6 +2162,16 @@ pub fn createTextNode(self: *Page, text: []const u8) !*Node { return cd.asNode(); } +fn canCollapseWhiteSpace(node: *Node) bool { + // it's possible that some of these are safe to collapse, but it isn't worth + // the risk/complexity. + const el = node.is(Element.Html) orelse return false; + return switch (el._type) { + .script, .style, .textarea, .pre => false, + else => true, + }; +} + pub fn createComment(self: *Page, text: []const u8) !*Node { const owned_text = try self.dupeString(text); const cd = try self._factory.node(CData{ diff --git a/src/browser/parser/Parser.zig b/src/browser/parser/Parser.zig index d2d952f4..5655be6f 100644 --- a/src/browser/parser/Parser.zig +++ b/src/browser/parser/Parser.zig @@ -410,7 +410,7 @@ fn _appendBeforeSiblingCallback(self: *Parser, sibling: *Node, node_or_text: h5e const parent = sibling.parentNode() orelse return error.NoParent; const node: *Node = switch (node_or_text.toUnion()) { .node => |cpn| getNode(cpn), - .text => |txt| try self.page.createTextNode(txt), + .text => |txt| try self.page.createTextNode(txt, parent), }; try self.page.insertNodeRelative(parent, node, .{ .before = sibling }, .{}); } diff --git a/src/browser/webapi/DOMImplementation.zig b/src/browser/webapi/DOMImplementation.zig index 82405cdd..00e8dc5a 100644 --- a/src/browser/webapi/DOMImplementation.zig +++ b/src/browser/webapi/DOMImplementation.zig @@ -54,7 +54,7 @@ pub fn createHTMLDocument(_: *const DOMImplementation, title: ?[]const u8, page: if (title) |t| { const title_node = try page.createElementNS(.html, "title", null); _ = try head_node.appendChild(title_node, page); - const text_node = try page.createTextNode(t); + const text_node = try page.createTextNode(t, title_node); _ = try title_node.appendChild(text_node, page); } diff --git a/src/browser/webapi/Document.zig b/src/browser/webapi/Document.zig index 34a96976..d8838a0a 100644 --- a/src/browser/webapi/Document.zig +++ b/src/browser/webapi/Document.zig @@ -313,7 +313,7 @@ pub fn createComment(self: *Document, data: []const u8, page: *Page) !*Node { } pub fn createTextNode(self: *Document, data: []const u8, page: *Page) !*Node { - const node = try page.createTextNode(data); + const node = try page.createTextNode(data, null); // Track owner document if it's not the main document if (self != page.document) { try page.setNodeOwnerDocument(node, self); diff --git a/src/browser/webapi/Element.zig b/src/browser/webapi/Element.zig index d73b5ab5..54112cb7 100644 --- a/src/browser/webapi/Element.zig +++ b/src/browser/webapi/Element.zig @@ -589,8 +589,9 @@ pub fn insertAdjacentText( data: []const u8, page: *Page, ) !void { - const text_node = try page.createTextNode(data); - const target_node, const prev_node = try self.asNode().findAdjacentNodes(where); + const node = self.asNode(); + const text_node = try page.createTextNode(data, node); + const target_node, const prev_node = try node.findAdjacentNodes(where); _ = try target_node.insertBefore(text_node, prev_node, page); } diff --git a/src/browser/webapi/Node.zig b/src/browser/webapi/Node.zig index 295d957b..11b8d71a 100644 --- a/src/browser/webapi/Node.zig +++ b/src/browser/webapi/Node.zig @@ -676,7 +676,7 @@ pub fn cloneNode(self: *Node, deep_: ?bool, page: *Page) error{ OutOfMemory, Str .cdata => |cd| { const data = cd.getData(); return switch (cd._type) { - .text => page.createTextNode(data), + .text => page.createTextNode(data, null), .cdata_section => page.createCDATASection(data), .comment => page.createComment(data), .processing_instruction => |pi| page.createProcessingInstruction(pi._target, data), @@ -994,7 +994,7 @@ pub const NodeOrText = union(enum) { pub fn toNode(self: *const NodeOrText, page: *Page) !*Node { return switch (self.*) { .node => |n| n, - .text => |txt| page.createTextNode(txt), + .text => |txt| page.createTextNode(txt, null), }; } }; diff --git a/src/browser/webapi/Range.zig b/src/browser/webapi/Range.zig index e8d08b84..a3c32c9d 100644 --- a/src/browser/webapi/Range.zig +++ b/src/browser/webapi/Range.zig @@ -322,8 +322,8 @@ pub fn insertNode(self: *Range, node: *Node, page: *Page) !void { const before_text = text_data[0..offset]; const after_text = text_data[offset..]; - const before = try page.createTextNode(before_text); - const after = try page.createTextNode(after_text); + const before = try page.createTextNode(before_text, parent); + const after = try page.createTextNode(after_text, parent); _ = try parent.replaceChild(before, container, page); _ = try parent.insertBefore(node, before.nextSibling(), page); @@ -423,8 +423,9 @@ pub fn cloneContents(self: *const Range, page: *Page) !*DocumentFragment { const text_data = self._proto._start_container.getData(); if (self._proto._start_offset < text_data.len and self._proto._end_offset <= text_data.len) { const cloned_text = text_data[self._proto._start_offset..self._proto._end_offset]; - const text_node = try page.createTextNode(cloned_text); - _ = try fragment.asNode().appendChild(text_node, page); + const parent = fragment.asNode(); + const text_node = try page.createTextNode(cloned_text, parent); + _ = try parent.appendChild(text_node, page); } } else { // Clone child nodes in range @@ -444,7 +445,7 @@ pub fn cloneContents(self: *const Range, page: *Page) !*DocumentFragment { if (self._proto._start_offset < text_data.len) { // Clone from start_offset to end of text const cloned_text = text_data[self._proto._start_offset..]; - const text_node = try page.createTextNode(cloned_text); + const text_node = try page.createTextNode(cloned_text, null); _ = try fragment.asNode().appendChild(text_node, page); } } @@ -465,8 +466,9 @@ pub fn cloneContents(self: *const Range, page: *Page) !*DocumentFragment { if (self._proto._end_offset > 0 and self._proto._end_offset <= text_data.len) { // Clone from start to end_offset const cloned_text = text_data[0..self._proto._end_offset]; - const text_node = try page.createTextNode(cloned_text); - _ = try fragment.asNode().appendChild(text_node, page); + const parent = fragment.asNode(); + const text_node = try page.createTextNode(cloned_text, parent); + _ = try parent.appendChild(text_node, page); } } } diff --git a/src/browser/webapi/cdata/Text.zig b/src/browser/webapi/cdata/Text.zig index 8037db2e..6e103d28 100644 --- a/src/browser/webapi/cdata/Text.zig +++ b/src/browser/webapi/cdata/Text.zig @@ -24,7 +24,7 @@ const Text = @This(); _proto: *CData, pub fn init(str: ?[]const u8, page: *Page) !*Text { - const node = try page.createTextNode(str orelse ""); + const node = try page.createTextNode(str orelse "", null); return node.as(Text); } @@ -40,7 +40,7 @@ pub fn splitText(self: *Text, offset: usize, page: *Page) !*Text { } const new_data = data[offset..]; - const new_node = try page.createTextNode(new_data); + const new_node = try page.createTextNode(new_data, null); const new_text = new_node.as(Text); const old_data = data[0..offset]; diff --git a/src/browser/webapi/element/Html.zig b/src/browser/webapi/element/Html.zig index f6354355..8c04943c 100644 --- a/src/browser/webapi/element/Html.zig +++ b/src/browser/webapi/element/Html.zig @@ -264,7 +264,7 @@ pub fn setInnerText(self: *HtmlElement, text: []const u8, page: *Page) !void { } // Create and append text node - const text_node = try page.createTextNode(text); + const text_node = try page.createTextNode(text, parent); try page.appendNode(parent, text_node, .{ .child_already_connected = false }); } diff --git a/src/browser/webapi/element/html/TextArea.zig b/src/browser/webapi/element/html/TextArea.zig index bcac5cb2..37403074 100644 --- a/src/browser/webapi/element/html/TextArea.zig +++ b/src/browser/webapi/element/html/TextArea.zig @@ -79,7 +79,7 @@ pub fn setDefaultValue(self: *TextArea, value: []const u8, page: *Page) !void { } // No text child exists, create one - const text_node = try page.createTextNode(owned); + const text_node = try page.createTextNode(owned, node); _ = try node.appendChild(text_node, page); } diff --git a/src/string.zig b/src/string.zig index 90966d88..d2d009db 100644 --- a/src/string.zig +++ b/src/string.zig @@ -134,6 +134,7 @@ pub const String = packed struct { // This can be used outside of the small string optimization pub fn intern(input: []const u8) ?[]const u8 { switch (input.len) { + 0 => return "", 1 => switch (input[0]) { '\n' => return "\n", '\r' => return "\r",