mirror of
https://github.com/lightpanda-io/browser.git
synced 2026-02-03 22:13:46 +00:00
Experiment with collapsing whitespace
This might break things, and in the scale of things, probably doesn't save enough memory, BUT...if you look at the most common text nones on a page, you'll likely see hundreds or thousands of nodes containing just space and newline. On an Amazon product page, for example, there are 80 text nodes containing just a newline + 80 spaces. In fact, there are over 4000 text nodes containing just whitespace. Ideally, we could simply ignore them and not even generate the Node. But that's likely to break some JavaScript (e.g. it would mess up the child count, the firstChild, etc...). So this simply attempts to collapse the leading and trailing whitespace (it doesn't trim them, it merely replaces \s+ with " "). This could _still_ break some JavaScript, but seems safer. The real win is here is that " " gets interned, so doesn't require an dupe/allocation.
This commit is contained in:
@@ -1292,7 +1292,7 @@ pub fn appendNew(self: *Page, parent: *Node, child: Node.NodeOrText) !void {
|
||||
return;
|
||||
}
|
||||
}
|
||||
break :blk try self.createTextNode(txt);
|
||||
break :blk try self.createTextNode(txt, parent);
|
||||
},
|
||||
};
|
||||
|
||||
@@ -2112,10 +2112,45 @@ fn populateElementAttributes(self: *Page, element: *Element, list: anytype) !voi
|
||||
}
|
||||
}
|
||||
|
||||
pub fn createTextNode(self: *Page, text: []const u8) !*Node {
|
||||
pub fn createTextNode(self: *Page, text: []const u8, parent: ?*Node) !*Node {
|
||||
var normalized = text;
|
||||
if (parent) |p| {
|
||||
if (text.len > 0 and text.len <= self.buf.len and canCollapseWhiteSpace(p)) {
|
||||
const has_leading_ws = switch (text[0]) {
|
||||
' ', '\t', '\r', '\n' => true,
|
||||
else => false,
|
||||
};
|
||||
const has_trailing_ws = switch (text[text.len - 1]) {
|
||||
' ', '\t', '\r', '\n' => true,
|
||||
else => false,
|
||||
};
|
||||
|
||||
if (has_leading_ws or has_trailing_ws) {
|
||||
const trimmed = std.mem.trim(u8, text, " \t\r\n");
|
||||
var idx: usize = 0;
|
||||
|
||||
var buf = &self.buf;
|
||||
if (has_leading_ws) {
|
||||
buf[idx] = ' ';
|
||||
idx += 1;
|
||||
}
|
||||
|
||||
@memcpy(buf[idx..][0..trimmed.len], trimmed);
|
||||
idx += trimmed.len;
|
||||
|
||||
if (has_trailing_ws) {
|
||||
buf[idx] = ' ';
|
||||
idx += 1;
|
||||
}
|
||||
|
||||
normalized = buf[0..idx];
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// might seem unlikely that we get an intern hit, but we'll get some nodes
|
||||
// with just '\n'
|
||||
const owned_text = try self.dupeString(text);
|
||||
const owned_text = try self.dupeString(normalized);
|
||||
const cd = try self._factory.node(CData{
|
||||
._proto = undefined,
|
||||
._type = .{ .text = .{
|
||||
@@ -2127,6 +2162,16 @@ pub fn createTextNode(self: *Page, text: []const u8) !*Node {
|
||||
return cd.asNode();
|
||||
}
|
||||
|
||||
fn canCollapseWhiteSpace(node: *Node) bool {
|
||||
// it's possible that some of these are safe to collapse, but it isn't worth
|
||||
// the risk/complexity.
|
||||
const el = node.is(Element.Html) orelse return false;
|
||||
return switch (el._type) {
|
||||
.script, .style, .textarea, .pre => false,
|
||||
else => true,
|
||||
};
|
||||
}
|
||||
|
||||
pub fn createComment(self: *Page, text: []const u8) !*Node {
|
||||
const owned_text = try self.dupeString(text);
|
||||
const cd = try self._factory.node(CData{
|
||||
|
||||
@@ -410,7 +410,7 @@ fn _appendBeforeSiblingCallback(self: *Parser, sibling: *Node, node_or_text: h5e
|
||||
const parent = sibling.parentNode() orelse return error.NoParent;
|
||||
const node: *Node = switch (node_or_text.toUnion()) {
|
||||
.node => |cpn| getNode(cpn),
|
||||
.text => |txt| try self.page.createTextNode(txt),
|
||||
.text => |txt| try self.page.createTextNode(txt, parent),
|
||||
};
|
||||
try self.page.insertNodeRelative(parent, node, .{ .before = sibling }, .{});
|
||||
}
|
||||
|
||||
@@ -54,7 +54,7 @@ pub fn createHTMLDocument(_: *const DOMImplementation, title: ?[]const u8, page:
|
||||
if (title) |t| {
|
||||
const title_node = try page.createElementNS(.html, "title", null);
|
||||
_ = try head_node.appendChild(title_node, page);
|
||||
const text_node = try page.createTextNode(t);
|
||||
const text_node = try page.createTextNode(t, title_node);
|
||||
_ = try title_node.appendChild(text_node, page);
|
||||
}
|
||||
|
||||
|
||||
@@ -313,7 +313,7 @@ pub fn createComment(self: *Document, data: []const u8, page: *Page) !*Node {
|
||||
}
|
||||
|
||||
pub fn createTextNode(self: *Document, data: []const u8, page: *Page) !*Node {
|
||||
const node = try page.createTextNode(data);
|
||||
const node = try page.createTextNode(data, null);
|
||||
// Track owner document if it's not the main document
|
||||
if (self != page.document) {
|
||||
try page.setNodeOwnerDocument(node, self);
|
||||
|
||||
@@ -589,8 +589,9 @@ pub fn insertAdjacentText(
|
||||
data: []const u8,
|
||||
page: *Page,
|
||||
) !void {
|
||||
const text_node = try page.createTextNode(data);
|
||||
const target_node, const prev_node = try self.asNode().findAdjacentNodes(where);
|
||||
const node = self.asNode();
|
||||
const text_node = try page.createTextNode(data, node);
|
||||
const target_node, const prev_node = try node.findAdjacentNodes(where);
|
||||
_ = try target_node.insertBefore(text_node, prev_node, page);
|
||||
}
|
||||
|
||||
|
||||
@@ -676,7 +676,7 @@ pub fn cloneNode(self: *Node, deep_: ?bool, page: *Page) error{ OutOfMemory, Str
|
||||
.cdata => |cd| {
|
||||
const data = cd.getData();
|
||||
return switch (cd._type) {
|
||||
.text => page.createTextNode(data),
|
||||
.text => page.createTextNode(data, null),
|
||||
.cdata_section => page.createCDATASection(data),
|
||||
.comment => page.createComment(data),
|
||||
.processing_instruction => |pi| page.createProcessingInstruction(pi._target, data),
|
||||
@@ -994,7 +994,7 @@ pub const NodeOrText = union(enum) {
|
||||
pub fn toNode(self: *const NodeOrText, page: *Page) !*Node {
|
||||
return switch (self.*) {
|
||||
.node => |n| n,
|
||||
.text => |txt| page.createTextNode(txt),
|
||||
.text => |txt| page.createTextNode(txt, null),
|
||||
};
|
||||
}
|
||||
};
|
||||
|
||||
@@ -322,8 +322,8 @@ pub fn insertNode(self: *Range, node: *Node, page: *Page) !void {
|
||||
const before_text = text_data[0..offset];
|
||||
const after_text = text_data[offset..];
|
||||
|
||||
const before = try page.createTextNode(before_text);
|
||||
const after = try page.createTextNode(after_text);
|
||||
const before = try page.createTextNode(before_text, parent);
|
||||
const after = try page.createTextNode(after_text, parent);
|
||||
|
||||
_ = try parent.replaceChild(before, container, page);
|
||||
_ = try parent.insertBefore(node, before.nextSibling(), page);
|
||||
@@ -423,8 +423,9 @@ pub fn cloneContents(self: *const Range, page: *Page) !*DocumentFragment {
|
||||
const text_data = self._proto._start_container.getData();
|
||||
if (self._proto._start_offset < text_data.len and self._proto._end_offset <= text_data.len) {
|
||||
const cloned_text = text_data[self._proto._start_offset..self._proto._end_offset];
|
||||
const text_node = try page.createTextNode(cloned_text);
|
||||
_ = try fragment.asNode().appendChild(text_node, page);
|
||||
const parent = fragment.asNode();
|
||||
const text_node = try page.createTextNode(cloned_text, parent);
|
||||
_ = try parent.appendChild(text_node, page);
|
||||
}
|
||||
} else {
|
||||
// Clone child nodes in range
|
||||
@@ -444,7 +445,7 @@ pub fn cloneContents(self: *const Range, page: *Page) !*DocumentFragment {
|
||||
if (self._proto._start_offset < text_data.len) {
|
||||
// Clone from start_offset to end of text
|
||||
const cloned_text = text_data[self._proto._start_offset..];
|
||||
const text_node = try page.createTextNode(cloned_text);
|
||||
const text_node = try page.createTextNode(cloned_text, null);
|
||||
_ = try fragment.asNode().appendChild(text_node, page);
|
||||
}
|
||||
}
|
||||
@@ -465,8 +466,9 @@ pub fn cloneContents(self: *const Range, page: *Page) !*DocumentFragment {
|
||||
if (self._proto._end_offset > 0 and self._proto._end_offset <= text_data.len) {
|
||||
// Clone from start to end_offset
|
||||
const cloned_text = text_data[0..self._proto._end_offset];
|
||||
const text_node = try page.createTextNode(cloned_text);
|
||||
_ = try fragment.asNode().appendChild(text_node, page);
|
||||
const parent = fragment.asNode();
|
||||
const text_node = try page.createTextNode(cloned_text, parent);
|
||||
_ = try parent.appendChild(text_node, page);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
@@ -24,7 +24,7 @@ const Text = @This();
|
||||
_proto: *CData,
|
||||
|
||||
pub fn init(str: ?[]const u8, page: *Page) !*Text {
|
||||
const node = try page.createTextNode(str orelse "");
|
||||
const node = try page.createTextNode(str orelse "", null);
|
||||
return node.as(Text);
|
||||
}
|
||||
|
||||
@@ -40,7 +40,7 @@ pub fn splitText(self: *Text, offset: usize, page: *Page) !*Text {
|
||||
}
|
||||
|
||||
const new_data = data[offset..];
|
||||
const new_node = try page.createTextNode(new_data);
|
||||
const new_node = try page.createTextNode(new_data, null);
|
||||
const new_text = new_node.as(Text);
|
||||
|
||||
const old_data = data[0..offset];
|
||||
|
||||
@@ -264,7 +264,7 @@ pub fn setInnerText(self: *HtmlElement, text: []const u8, page: *Page) !void {
|
||||
}
|
||||
|
||||
// Create and append text node
|
||||
const text_node = try page.createTextNode(text);
|
||||
const text_node = try page.createTextNode(text, parent);
|
||||
try page.appendNode(parent, text_node, .{ .child_already_connected = false });
|
||||
}
|
||||
|
||||
|
||||
@@ -79,7 +79,7 @@ pub fn setDefaultValue(self: *TextArea, value: []const u8, page: *Page) !void {
|
||||
}
|
||||
|
||||
// No text child exists, create one
|
||||
const text_node = try page.createTextNode(owned);
|
||||
const text_node = try page.createTextNode(owned, node);
|
||||
_ = try node.appendChild(text_node, page);
|
||||
}
|
||||
|
||||
|
||||
@@ -134,6 +134,7 @@ pub const String = packed struct {
|
||||
// This can be used outside of the small string optimization
|
||||
pub fn intern(input: []const u8) ?[]const u8 {
|
||||
switch (input.len) {
|
||||
0 => return "",
|
||||
1 => switch (input[0]) {
|
||||
'\n' => return "\n",
|
||||
'\r' => return "\r",
|
||||
|
||||
Reference in New Issue
Block a user