Change CData._data from []const to String (SSO)

After looking at a handful of websites, the # of Text and Commend nodes
that are small (<= 12 bytes) is _really_ high. Ranging from 85% to 98%. I
thought that was high, but a lot of it is indentation or a sentence that's
broken down into multiple nodes, eg:

<div><b>sale!</b> <span class=price>$1.99</span> buy now<div>

So what looks like 1 sentence to us, is actually 3 text nodes.

On a typical website, we should see thousands of fewer allocations in the
page arena for the text in text nodes.
This commit is contained in:
Karl Seguin
2026-02-27 12:53:54 +08:00
parent a14ad6f700
commit 870fd1654d
12 changed files with 252 additions and 98 deletions

View File

@@ -1360,10 +1360,8 @@ pub fn appendNew(self: *Page, parent: *Node, child: Node.NodeOrText) !void {
if (parent.lastChild()) |sibling| { if (parent.lastChild()) |sibling| {
if (sibling.is(CData.Text)) |tn| { if (sibling.is(CData.Text)) |tn| {
const cdata = tn._proto; const cdata = tn._proto;
const existing = cdata.getData(); const existing = cdata.getData().str();
// @metric cdata._data = try String.concat(self.arena, &.{ existing, txt });
// Inefficient, but we don't expect this to happen often.
cdata._data = try std.mem.concat(self.arena, u8, &.{ existing, txt });
return; return;
} }
} }
@@ -2193,28 +2191,24 @@ fn populateElementAttributes(self: *Page, element: *Element, list: anytype) !voi
} }
pub fn createTextNode(self: *Page, text: []const u8) !*Node { pub fn createTextNode(self: *Page, text: []const u8) !*Node {
// might seem unlikely that we get an intern hit, but we'll get some nodes
// with just '\n'
const owned_text = try self.dupeString(text);
const cd = try self._factory.node(CData{ const cd = try self._factory.node(CData{
._proto = undefined, ._proto = undefined,
._type = .{ .text = .{ ._type = .{ .text = .{
._proto = undefined, ._proto = undefined,
} }, } },
._data = owned_text, ._data = try self.dupeSSO(text),
}); });
cd._type.text._proto = cd; cd._type.text._proto = cd;
return cd.asNode(); return cd.asNode();
} }
pub fn createComment(self: *Page, text: []const u8) !*Node { pub fn createComment(self: *Page, text: []const u8) !*Node {
const owned_text = try self.dupeString(text);
const cd = try self._factory.node(CData{ const cd = try self._factory.node(CData{
._proto = undefined, ._proto = undefined,
._type = .{ .comment = .{ ._type = .{ .comment = .{
._proto = undefined, ._proto = undefined,
} }, } },
._data = owned_text, ._data = try self.dupeSSO(text),
}); });
cd._type.comment._proto = cd; cd._type.comment._proto = cd;
return cd.asNode(); return cd.asNode();
@@ -2226,8 +2220,6 @@ pub fn createCDATASection(self: *Page, data: []const u8) !*Node {
return error.InvalidCharacterError; return error.InvalidCharacterError;
} }
const owned_data = try self.dupeString(data);
// First allocate the Text node separately // First allocate the Text node separately
const text_node = try self._factory.create(CData.Text{ const text_node = try self._factory.create(CData.Text{
._proto = undefined, ._proto = undefined,
@@ -2239,7 +2231,7 @@ pub fn createCDATASection(self: *Page, data: []const u8) !*Node {
._type = .{ .cdata_section = .{ ._type = .{ .cdata_section = .{
._proto = text_node, ._proto = text_node,
} }, } },
._data = owned_data, ._data = try self.dupeSSO(data),
}); });
// Set up the back pointer from Text to CData // Set up the back pointer from Text to CData
@@ -2261,7 +2253,6 @@ pub fn createProcessingInstruction(self: *Page, target: []const u8, data: []cons
try validateXmlName(target); try validateXmlName(target);
const owned_target = try self.dupeString(target); const owned_target = try self.dupeString(target);
const owned_data = try self.dupeString(data);
const pi = try self._factory.create(CData.ProcessingInstruction{ const pi = try self._factory.create(CData.ProcessingInstruction{
._proto = undefined, ._proto = undefined,
@@ -2271,7 +2262,7 @@ pub fn createProcessingInstruction(self: *Page, target: []const u8, data: []cons
const cd = try self._factory.node(CData{ const cd = try self._factory.node(CData{
._proto = undefined, ._proto = undefined,
._type = .{ .processing_instruction = pi }, ._type = .{ .processing_instruction = pi },
._data = owned_data, ._data = try self.dupeSSO(data),
}); });
// Set up the back pointer from ProcessingInstruction to CData // Set up the back pointer from ProcessingInstruction to CData
@@ -2344,6 +2335,10 @@ pub fn dupeString(self: *Page, value: []const u8) ![]const u8 {
return self.arena.dupe(u8, value); return self.arena.dupe(u8, value);
} }
pub fn dupeSSO(self: *Page, value: []const u8) !String {
return String.init(self.arena, value, .{ .dupe = true });
}
const RemoveNodeOpts = struct { const RemoveNodeOpts = struct {
will_be_reconnected: bool, will_be_reconnected: bool,
}; };
@@ -2747,7 +2742,7 @@ pub fn setCustomizedBuiltInDefinition(self: *Page, element: *Element, definition
pub fn characterDataChange( pub fn characterDataChange(
self: *Page, self: *Page,
target: *Node, target: *Node,
old_value: []const u8, old_value: String,
) void { ) void {
var it: ?*std.DoublyLinkedList.Node = self._mutation_observers.first; var it: ?*std.DoublyLinkedList.Node = self._mutation_observers.first;
while (it) |node| : (it = node.next) { while (it) |node| : (it = node.next) {

View File

@@ -82,19 +82,19 @@ fn _deep(node: *Node, opts: Opts, comptime force_slot: bool, writer: *std.Io.Wri
.cdata => |cd| { .cdata => |cd| {
if (node.is(Node.CData.Comment)) |_| { if (node.is(Node.CData.Comment)) |_| {
try writer.writeAll("<!--"); try writer.writeAll("<!--");
try writer.writeAll(cd.getData()); try writer.writeAll(cd.getData().str());
try writer.writeAll("-->"); try writer.writeAll("-->");
} else if (node.is(Node.CData.ProcessingInstruction)) |pi| { } else if (node.is(Node.CData.ProcessingInstruction)) |pi| {
try writer.writeAll("<?"); try writer.writeAll("<?");
try writer.writeAll(pi._target); try writer.writeAll(pi._target);
try writer.writeAll(" "); try writer.writeAll(" ");
try writer.writeAll(cd.getData()); try writer.writeAll(cd.getData().str());
try writer.writeAll("?>"); try writer.writeAll("?>");
} else { } else {
if (shouldEscapeText(node._parent)) { if (shouldEscapeText(node._parent)) {
try writeEscapedText(cd.getData(), writer); try writeEscapedText(cd.getData().str(), writer);
} else { } else {
try writer.writeAll(cd.getData()); try writer.writeAll(cd.getData().str());
} }
} }
}, },

View File

@@ -145,7 +145,7 @@ fn render(node: *Node, state: *State, writer: *std.Io.Writer, page: *Page) error
}, },
.cdata => |cd| { .cdata => |cd| {
if (node.is(Node.CData.Text)) |_| { if (node.is(Node.CData.Text)) |_| {
var text = cd.getData(); var text = cd.getData().str();
if (state.pre_node) |pre| { if (state.pre_node) |pre| {
if (node.parentNode() == pre and node.nextSibling() == null) { if (node.parentNode() == pre and node.nextSibling() == null) {
text = std.mem.trimRight(u8, text, " \t\r\n"); text = std.mem.trimRight(u8, text, " \t\r\n");

View File

@@ -17,6 +17,7 @@
// along with this program. If not, see <https://www.gnu.org/licenses/>. // along with this program. If not, see <https://www.gnu.org/licenses/>.
const std = @import("std"); const std = @import("std");
const String = @import("../../string.zig").String;
const js = @import("../js/js.zig"); const js = @import("../js/js.zig");
const Page = @import("../Page.zig"); const Page = @import("../Page.zig");
@@ -31,7 +32,7 @@ const CData = @This();
_type: Type, _type: Type,
_proto: *Node, _proto: *Node,
_data: []const u8 = "", _data: String = .empty,
/// Count UTF-16 code units in a UTF-8 string. /// Count UTF-16 code units in a UTF-8 string.
/// 4-byte UTF-8 sequences (codepoints >= U+10000) produce 2 UTF-16 code units (surrogate pair), /// 4-byte UTF-8 sequences (codepoints >= U+10000) produce 2 UTF-16 code units (surrogate pair),
@@ -157,7 +158,7 @@ pub fn is(self: *CData, comptime T: type) ?*T {
return null; return null;
} }
pub fn getData(self: *const CData) []const u8 { pub fn getData(self: *const CData) String {
return self._data; return self._data;
} }
@@ -172,7 +173,7 @@ pub fn render(self: *const CData, writer: *std.io.Writer, opts: RenderOpts) !boo
var start: usize = 0; var start: usize = 0;
var prev_w: ?bool = null; var prev_w: ?bool = null;
var is_w: bool = undefined; var is_w: bool = undefined;
const s = self._data; const s = self._data.str();
for (s, 0..) |c, i| { for (s, 0..) |c, i| {
is_w = std.ascii.isWhitespace(c); is_w = std.ascii.isWhitespace(c);
@@ -222,9 +223,9 @@ pub fn setData(self: *CData, value: ?[]const u8, page: *Page) !void {
const old_value = self._data; const old_value = self._data;
if (value) |v| { if (value) |v| {
self._data = try page.dupeString(v); self._data = try page.dupeSSO(v);
} else { } else {
self._data = ""; self._data = .empty;
} }
page.characterDataChange(self.asNode(), old_value); page.characterDataChange(self.asNode(), old_value);
@@ -243,15 +244,15 @@ pub fn _setData(self: *CData, value: js.Value, page: *Page) !void {
pub fn format(self: *const CData, writer: *std.io.Writer) !void { pub fn format(self: *const CData, writer: *std.io.Writer) !void {
return switch (self._type) { return switch (self._type) {
.text => writer.print("<text>{s}</text>", .{self._data}), .text => writer.print("<text>{f}</text>", .{self._data}),
.comment => writer.print("<!-- {s} -->", .{self._data}), .comment => writer.print("<!-- {f} -->", .{self._data}),
.cdata_section => writer.print("<![CDATA[{s}]]>", .{self._data}), .cdata_section => writer.print("<![CDATA[{f}]]>", .{self._data}),
.processing_instruction => |pi| writer.print("<?{s} {s}?>", .{ pi._target, self._data }), .processing_instruction => |pi| writer.print("<?{s} {f}?>", .{ pi._target, self._data }),
}; };
} }
pub fn getLength(self: *const CData) usize { pub fn getLength(self: *const CData) usize {
return utf16Len(self._data); return utf16Len(self._data.str());
} }
pub fn isEqualNode(self: *const CData, other: *const CData) bool { pub fn isEqualNode(self: *const CData, other: *const CData) bool {
@@ -267,58 +268,64 @@ pub fn isEqualNode(self: *const CData, other: *const CData) bool {
// if the _targets are equal, we still want to compare the data // if the _targets are equal, we still want to compare the data
} }
return std.mem.eql(u8, self.getData(), other.getData()); return self._data.eql(other._data);
} }
pub fn appendData(self: *CData, data: []const u8, page: *Page) !void { pub fn appendData(self: *CData, data: []const u8, page: *Page) !void {
const new_data = try std.mem.concat(page.arena, u8, &.{ self._data, data }); const old_value = self._data;
try self.setData(new_data, page); self._data = try String.concat(page.arena, &.{ self._data.str(), data });
page.characterDataChange(self.asNode(), old_value);
} }
pub fn deleteData(self: *CData, offset: usize, count: usize, page: *Page) !void { pub fn deleteData(self: *CData, offset: usize, count: usize, page: *Page) !void {
const end_utf16 = std.math.add(usize, offset, count) catch std.math.maxInt(usize); const end_utf16 = std.math.add(usize, offset, count) catch std.math.maxInt(usize);
const range = try utf16RangeToUtf8(self._data, offset, end_utf16); const range = try utf16RangeToUtf8(self._data.str(), offset, end_utf16);
// Just slice - original data stays in arena const old_data = self._data;
const old_value = self._data; const old_value = old_data.str();
if (range.start == 0) { if (range.start == 0) {
self._data = self._data[range.end..]; self._data = try page.dupeSSO(old_value[range.end..]);
} else if (range.end >= self._data.len) { } else if (range.end >= old_value.len) {
self._data = self._data[0..range.start]; self._data = try page.dupeSSO(old_value[0..range.start]);
} else { } else {
self._data = try std.mem.concat(page.arena, u8, &.{ // Deleting from middle - concat prefix and suffix
self._data[0..range.start], self._data = try String.concat(page.arena, &.{
self._data[range.end..], old_value[0..range.start],
old_value[range.end..],
}); });
} }
page.characterDataChange(self.asNode(), old_value); page.characterDataChange(self.asNode(), old_data);
} }
pub fn insertData(self: *CData, offset: usize, data: []const u8, page: *Page) !void { pub fn insertData(self: *CData, offset: usize, data: []const u8, page: *Page) !void {
const byte_offset = try utf16OffsetToUtf8(self._data, offset); const byte_offset = try utf16OffsetToUtf8(self._data.str(), offset);
const new_data = try std.mem.concat(page.arena, u8, &.{ const old_value = self._data;
self._data[0..byte_offset], const existing = old_value.str();
self._data = try String.concat(page.arena, &.{
existing[0..byte_offset],
data, data,
self._data[byte_offset..], existing[byte_offset..],
}); });
try self.setData(new_data, page); page.characterDataChange(self.asNode(), old_value);
} }
pub fn replaceData(self: *CData, offset: usize, count: usize, data: []const u8, page: *Page) !void { pub fn replaceData(self: *CData, offset: usize, count: usize, data: []const u8, page: *Page) !void {
const end_utf16 = std.math.add(usize, offset, count) catch std.math.maxInt(usize); const end_utf16 = std.math.add(usize, offset, count) catch std.math.maxInt(usize);
const range = try utf16RangeToUtf8(self._data, offset, end_utf16); const range = try utf16RangeToUtf8(self._data.str(), offset, end_utf16);
const new_data = try std.mem.concat(page.arena, u8, &.{ const old_value = self._data;
self._data[0..range.start], const existing = old_value.str();
self._data = try String.concat(page.arena, &.{
existing[0..range.start],
data, data,
self._data[range.end..], existing[range.end..],
}); });
try self.setData(new_data, page); page.characterDataChange(self.asNode(), old_value);
} }
pub fn substringData(self: *const CData, offset: usize, count: usize) ![]const u8 { pub fn substringData(self: *const CData, offset: usize, count: usize) ![]const u8 {
const end_utf16 = std.math.add(usize, offset, count) catch std.math.maxInt(usize); const end_utf16 = std.math.add(usize, offset, count) catch std.math.maxInt(usize);
const range = try utf16RangeToUtf8(self._data, offset, end_utf16); const range = try utf16RangeToUtf8(self._data.str(), offset, end_utf16);
return self._data[range.start..range.end]; return self._data.str()[range.start..range.end];
} }
pub fn remove(self: *CData, page: *Page) !void { pub fn remove(self: *CData, page: *Page) !void {
@@ -451,7 +458,7 @@ test "WebApi: CData.render" {
const cdata = CData{ const cdata = CData{
._type = .{ .text = undefined }, ._type = .{ .text = undefined },
._proto = undefined, ._proto = undefined,
._data = test_case.value, ._data = .wrap(test_case.value),
}; };
const result = try cdata.render(&buffer.writer, test_case.opts); const result = try cdata.render(&buffer.writer, test_case.opts);

View File

@@ -243,7 +243,7 @@ pub fn notifyAttributeChange(
pub fn notifyCharacterDataChange( pub fn notifyCharacterDataChange(
self: *MutationObserver, self: *MutationObserver,
target: *Node, target: *Node,
old_value: ?[]const u8, old_value: ?String,
page: *Page, page: *Page,
) !void { ) !void {
for (self._observing.items) |obs| { for (self._observing.items) |obs| {
@@ -267,7 +267,7 @@ pub fn notifyCharacterDataChange(
._target = target, ._target = target,
._attribute_name = null, ._attribute_name = null,
._old_value = if (obs.options.characterDataOldValue and old_value != null) ._old_value = if (obs.options.characterDataOldValue and old_value != null)
try arena.dupe(u8, old_value.?) try arena.dupe(u8, old_value.?.str())
else else
null, null,
._added_nodes = &.{}, ._added_nodes = &.{},

View File

@@ -270,7 +270,7 @@ pub fn getTextContent(self: *Node, writer: *std.Io.Writer) error{WriteFailed}!vo
try child.getTextContent(writer); try child.getTextContent(writer);
} }
}, },
.cdata => |c| try writer.writeAll(c.getData()), .cdata => |c| try writer.writeAll(c.getData().str()),
.document => {}, .document => {},
.document_type => {}, .document_type => {},
.attribute => |attr| try writer.writeAll(attr._value.str()), .attribute => |attr| try writer.writeAll(attr._value.str()),
@@ -293,7 +293,7 @@ pub fn setTextContent(self: *Node, data: []const u8, page: *Page) !void {
} }
return el.replaceChildren(&.{.{ .text = data }}, page); return el.replaceChildren(&.{.{ .text = data }}, page);
}, },
.cdata => |c| c._data = try page.arena.dupe(u8, data), .cdata => |c| c._data = try page.dupeSSO(data),
.document => {}, .document => {},
.document_type => {}, .document_type => {},
.document_fragment => |frag| { .document_fragment => |frag| {
@@ -599,10 +599,10 @@ pub fn replaceChild(self: *Node, new_child: *Node, old_child: *Node, page: *Page
return old_child; return old_child;
} }
pub fn getNodeValue(self: *const Node) ?[]const u8 { pub fn getNodeValue(self: *const Node) ?String {
return switch (self._type) { return switch (self._type) {
.cdata => |c| c.getData(), .cdata => |c| c.getData(),
.attribute => |attr| attr._value.str(), .attribute => |attr| attr._value,
.element => null, .element => null,
.document => null, .document => null,
.document_type => null, .document_type => null,
@@ -694,10 +694,10 @@ pub fn getChildAt(self: *Node, index: u32) ?*Node {
return null; return null;
} }
pub fn getData(self: *const Node) []const u8 { pub fn getData(self: *const Node) String {
return switch (self._type) { return switch (self._type) {
.cdata => |c| c.getData(), .cdata => |c| c.getData(),
else => "", else => .empty,
}; };
} }
@@ -729,7 +729,7 @@ pub fn cloneNode(self: *Node, deep_: ?bool, page: *Page) CloneError!*Node {
const deep = deep_ orelse false; const deep = deep_ orelse false;
switch (self._type) { switch (self._type) {
.cdata => |cd| { .cdata => |cd| {
const data = cd.getData(); const data = cd.getData().str();
return switch (cd._type) { return switch (cd._type) {
.text => page.createTextNode(data), .text => page.createTextNode(data),
.cdata_section => page.createCDATASection(data), .cdata_section => page.createCDATASection(data),
@@ -884,7 +884,7 @@ fn _normalize(self: *Node, allocator: Allocator, buffer: *std.ArrayList(u8), pag
next_node = node_to_merge.nextSibling(); next_node = node_to_merge.nextSibling();
page.removeNode(self, to_remove, .{ .will_be_reconnected = false }); page.removeNode(self, to_remove, .{ .will_be_reconnected = false });
} }
text_node._proto._data = try page.dupeString(buffer.items); text_node._proto._data = try page.dupeSSO(buffer.items);
buffer.clearRetainingCapacity(); buffer.clearRetainingCapacity();
} }
} }
@@ -1028,7 +1028,7 @@ pub const JsApi = struct {
try self.getTextContent(&buf.writer); try self.getTextContent(&buf.writer);
return buf.written(); return buf.written();
}, },
.cdata => |cdata| return cdata.getData(), .cdata => |cdata| return cdata.getData().str(),
.attribute => |attr| return attr._value.str(), .attribute => |attr| return attr._value.str(),
.document => return null, .document => return null,
.document_type => return null, .document_type => return null,

View File

@@ -17,9 +17,11 @@
// along with this program. If not, see <https://www.gnu.org/licenses/>. // along with this program. If not, see <https://www.gnu.org/licenses/>.
const std = @import("std"); const std = @import("std");
const js = @import("../js/js.zig"); const String = @import("../../string.zig").String;
const js = @import("../js/js.zig");
const Page = @import("../Page.zig"); const Page = @import("../Page.zig");
const Node = @import("Node.zig"); const Node = @import("Node.zig");
const DocumentFragment = @import("DocumentFragment.zig"); const DocumentFragment = @import("DocumentFragment.zig");
const AbstractRange = @import("AbstractRange.zig"); const AbstractRange = @import("AbstractRange.zig");
@@ -326,7 +328,7 @@ pub fn insertNode(self: *Range, node: *Node, page: *Page) !void {
if (offset == 0) { if (offset == 0) {
_ = try parent.insertBefore(node, container, page); _ = try parent.insertBefore(node, container, page);
} else { } else {
const text_data = container.getData(); const text_data = container.getData().str();
if (offset >= text_data.len) { if (offset >= text_data.len) {
_ = try parent.insertBefore(node, container.nextSibling(), page); _ = try parent.insertBefore(node, container.nextSibling(), page);
} else { } else {
@@ -362,15 +364,15 @@ pub fn deleteContents(self: *Range, page: *Page) !void {
// Simple case: same container // Simple case: same container
if (self._proto._start_container == self._proto._end_container) { if (self._proto._start_container == self._proto._end_container) {
if (self._proto._start_container.is(Node.CData)) |_| { if (self._proto._start_container.is(Node.CData)) |cdata| {
// Delete part of text node // Delete part of text node
const text_data = self._proto._start_container.getData(); const old_value = cdata.getData();
const new_text = try std.mem.concat( const text_data = old_value.str();
cdata._data = try String.concat(
page.arena, page.arena,
u8,
&.{ text_data[0..self._proto._start_offset], text_data[self._proto._end_offset..] }, &.{ text_data[0..self._proto._start_offset], text_data[self._proto._end_offset..] },
); );
try self._proto._start_container.setData(new_text, page); page.characterDataChange(self._proto._start_container, old_value);
} else { } else {
// Delete child nodes in range // Delete child nodes in range
var offset = self._proto._start_offset; var offset = self._proto._start_offset;
@@ -387,7 +389,7 @@ pub fn deleteContents(self: *Range, page: *Page) !void {
// Complex case: different containers // Complex case: different containers
// Handle start container - if it's a text node, truncate it // Handle start container - if it's a text node, truncate it
if (self._proto._start_container.is(Node.CData)) |_| { if (self._proto._start_container.is(Node.CData)) |_| {
const text_data = self._proto._start_container.getData(); const text_data = self._proto._start_container.getData().str();
if (self._proto._start_offset < text_data.len) { if (self._proto._start_offset < text_data.len) {
// Keep only the part before start_offset // Keep only the part before start_offset
const new_text = text_data[0..self._proto._start_offset]; const new_text = text_data[0..self._proto._start_offset];
@@ -397,7 +399,7 @@ pub fn deleteContents(self: *Range, page: *Page) !void {
// Handle end container - if it's a text node, truncate it // Handle end container - if it's a text node, truncate it
if (self._proto._end_container.is(Node.CData)) |_| { if (self._proto._end_container.is(Node.CData)) |_| {
const text_data = self._proto._end_container.getData(); const text_data = self._proto._end_container.getData().str();
if (self._proto._end_offset < text_data.len) { if (self._proto._end_offset < text_data.len) {
// Keep only the part from end_offset onwards // Keep only the part from end_offset onwards
const new_text = text_data[self._proto._end_offset..]; const new_text = text_data[self._proto._end_offset..];
@@ -433,7 +435,7 @@ pub fn cloneContents(self: *const Range, page: *Page) !*DocumentFragment {
if (self._proto._start_container == self._proto._end_container) { if (self._proto._start_container == self._proto._end_container) {
if (self._proto._start_container.is(Node.CData)) |_| { if (self._proto._start_container.is(Node.CData)) |_| {
// Clone part of text node // Clone part of text node
const text_data = self._proto._start_container.getData(); const text_data = self._proto._start_container.getData().str();
if (self._proto._start_offset < text_data.len and self._proto._end_offset <= text_data.len) { if (self._proto._start_offset < text_data.len and self._proto._end_offset <= text_data.len) {
const cloned_text = text_data[self._proto._start_offset..self._proto._end_offset]; const cloned_text = text_data[self._proto._start_offset..self._proto._end_offset];
const text_node = try page.createTextNode(cloned_text); const text_node = try page.createTextNode(cloned_text);
@@ -453,7 +455,7 @@ pub fn cloneContents(self: *const Range, page: *Page) !*DocumentFragment {
// Complex case: different containers // Complex case: different containers
// Clone partial start container // Clone partial start container
if (self._proto._start_container.is(Node.CData)) |_| { if (self._proto._start_container.is(Node.CData)) |_| {
const text_data = self._proto._start_container.getData(); const text_data = self._proto._start_container.getData().str();
if (self._proto._start_offset < text_data.len) { if (self._proto._start_offset < text_data.len) {
// Clone from start_offset to end of text // Clone from start_offset to end of text
const cloned_text = text_data[self._proto._start_offset..]; const cloned_text = text_data[self._proto._start_offset..];
@@ -474,7 +476,7 @@ pub fn cloneContents(self: *const Range, page: *Page) !*DocumentFragment {
// Clone partial end container // Clone partial end container
if (self._proto._end_container.is(Node.CData)) |_| { if (self._proto._end_container.is(Node.CData)) |_| {
const text_data = self._proto._end_container.getData(); const text_data = self._proto._end_container.getData().str();
if (self._proto._end_offset > 0 and self._proto._end_offset <= text_data.len) { if (self._proto._end_offset > 0 and self._proto._end_offset <= text_data.len) {
// Clone from start to end_offset // Clone from start to end_offset
const cloned_text = text_data[0..self._proto._end_offset]; const cloned_text = text_data[0..self._proto._end_offset];
@@ -560,7 +562,7 @@ fn writeTextContent(self: *const Range, writer: *std.Io.Writer) !void {
if (start_node == end_node) { if (start_node == end_node) {
if (start_node.is(Node.CData)) |cdata| { if (start_node.is(Node.CData)) |cdata| {
if (!isCommentOrPI(cdata)) { if (!isCommentOrPI(cdata)) {
const data = cdata.getData(); const data = cdata.getData().str();
const s = @min(start_offset, data.len); const s = @min(start_offset, data.len);
const e = @min(end_offset, data.len); const e = @min(end_offset, data.len);
try writer.writeAll(data[s..e]); try writer.writeAll(data[s..e]);
@@ -574,7 +576,7 @@ fn writeTextContent(self: *const Range, writer: *std.Io.Writer) !void {
// Partial start: if start container is a text node, write from offset to end // Partial start: if start container is a text node, write from offset to end
if (start_node.is(Node.CData)) |cdata| { if (start_node.is(Node.CData)) |cdata| {
if (!isCommentOrPI(cdata)) { if (!isCommentOrPI(cdata)) {
const data = cdata.getData(); const data = cdata.getData().str();
const s = @min(start_offset, data.len); const s = @min(start_offset, data.len);
try writer.writeAll(data[s..]); try writer.writeAll(data[s..]);
} }
@@ -601,7 +603,7 @@ fn writeTextContent(self: *const Range, writer: *std.Io.Writer) !void {
} }
if (n.is(Node.CData)) |cdata| { if (n.is(Node.CData)) |cdata| {
if (!isCommentOrPI(cdata)) { if (!isCommentOrPI(cdata)) {
try writer.writeAll(cdata.getData()); try writer.writeAll(cdata.getData().str());
} }
} }
current = nextInTreeOrder(n, root); current = nextInTreeOrder(n, root);
@@ -612,7 +614,7 @@ fn writeTextContent(self: *const Range, writer: *std.Io.Writer) !void {
if (start_node != end_node) { if (start_node != end_node) {
if (end_node.is(Node.CData)) |cdata| { if (end_node.is(Node.CData)) |cdata| {
if (!isCommentOrPI(cdata)) { if (!isCommentOrPI(cdata)) {
const data = cdata.getData(); const data = cdata.getData().str();
const e = @min(end_offset, data.len); const e = @min(end_offset, data.len);
try writer.writeAll(data[0..e]); try writer.writeAll(data[0..e]);
} }

View File

@@ -500,20 +500,20 @@ fn modifyByWord(self: *Selection, alter: ModifyAlter, forward: bool, range: *Ran
if (isTextNode(focus_node)) { if (isTextNode(focus_node)) {
if (forward) { if (forward) {
const i = nextWordEnd(new_node.getData(), new_offset); const i = nextWordEnd(new_node.getData().str(), new_offset);
if (i > new_offset) { if (i > new_offset) {
new_offset = i; new_offset = i;
} else if (nextTextNode(focus_node)) |next| { } else if (nextTextNode(focus_node)) |next| {
new_node = next; new_node = next;
new_offset = nextWordEnd(next.getData(), 0); new_offset = nextWordEnd(next.getData().str(), 0);
} }
} else { } else {
const i = prevWordStart(new_node.getData(), new_offset); const i = prevWordStart(new_node.getData().str(), new_offset);
if (i < new_offset) { if (i < new_offset) {
new_offset = i; new_offset = i;
} else if (prevTextNode(focus_node)) |prev| { } else if (prevTextNode(focus_node)) |prev| {
new_node = prev; new_node = prev;
new_offset = prevWordStart(prev.getData(), @intCast(prev.getData().len)); new_offset = prevWordStart(prev.getData().str(), @intCast(prev.getData().len));
} }
} }
} else { } else {
@@ -524,7 +524,7 @@ fn modifyByWord(self: *Selection, alter: ModifyAlter, forward: bool, range: *Ran
const child = focus_node.getChildAt(focus_offset) orelse { const child = focus_node.getChildAt(focus_offset) orelse {
if (nextTextNodeAfter(focus_node)) |next| { if (nextTextNodeAfter(focus_node)) |next| {
new_node = next; new_node = next;
new_offset = nextWordEnd(next.getData(), 0); new_offset = nextWordEnd(next.getData().str(), 0);
} }
return self.applyModify(alter, new_node, new_offset, page); return self.applyModify(alter, new_node, new_offset, page);
}; };
@@ -534,7 +534,7 @@ fn modifyByWord(self: *Selection, alter: ModifyAlter, forward: bool, range: *Ran
}; };
new_node = t; new_node = t;
new_offset = nextWordEnd(t.getData(), 0); new_offset = nextWordEnd(t.getData().str(), 0);
} else { } else {
var idx = focus_offset; var idx = focus_offset;
while (idx > 0) { while (idx > 0) {
@@ -544,7 +544,7 @@ fn modifyByWord(self: *Selection, alter: ModifyAlter, forward: bool, range: *Ran
while (bottom.lastChild()) |c| bottom = c; while (bottom.lastChild()) |c| bottom = c;
if (isTextNode(bottom)) { if (isTextNode(bottom)) {
new_node = bottom; new_node = bottom;
new_offset = prevWordStart(bottom.getData(), bottom.getLength()); new_offset = prevWordStart(bottom.getData().str(), bottom.getLength());
break; break;
} }
} }

View File

@@ -16,6 +16,7 @@
// You should have received a copy of the GNU Affero General Public License // You should have received a copy of the GNU Affero General Public License
// along with this program. If not, see <https://www.gnu.org/licenses/>. // along with this program. If not, see <https://www.gnu.org/licenses/>.
const String = @import("../../../string.zig").String;
const js = @import("../../js/js.zig"); const js = @import("../../js/js.zig");
const Page = @import("../../Page.zig"); const Page = @import("../../Page.zig");
const CData = @import("../CData.zig"); const CData = @import("../CData.zig");
@@ -30,11 +31,11 @@ pub fn init(str: ?js.NullableString, page: *Page) !*Text {
} }
pub fn getWholeText(self: *Text) []const u8 { pub fn getWholeText(self: *Text) []const u8 {
return self._proto._data; return self._proto._data.str();
} }
pub fn splitText(self: *Text, offset: usize, page: *Page) !*Text { pub fn splitText(self: *Text, offset: usize, page: *Page) !*Text {
const data = self._proto._data; const data = self._proto._data.str();
const byte_offset = CData.utf16OffsetToUtf8(data, offset) catch return error.IndexSizeError; const byte_offset = CData.utf16OffsetToUtf8(data, offset) catch return error.IndexSizeError;

View File

@@ -88,18 +88,16 @@ pub fn getDefaultValue(self: *const TextArea) []const u8 {
} }
pub fn setDefaultValue(self: *TextArea, value: []const u8, page: *Page) !void { pub fn setDefaultValue(self: *TextArea, value: []const u8, page: *Page) !void {
const owned = try page.dupeString(value);
const node = self.asNode(); const node = self.asNode();
if (node.firstChild()) |child| { if (node.firstChild()) |child| {
if (child.is(Node.CData.Text)) |txt| { if (child.is(Node.CData.Text)) |txt| {
txt._proto._data = owned; txt._proto._data = try page.dupeSSO(value);
return; return;
} }
} }
// No text child exists, create one // No text child exists, create one
const text_node = try page.createTextNode(owned); const text_node = try page.createTextNode(value);
_ = try node.appendChild(text_node, page); _ = try node.appendChild(text_node, page);
} }

View File

@@ -307,7 +307,11 @@ pub const Writer = struct {
try w.write(dom_node.getNodeName(&name_buf)); try w.write(dom_node.getNodeName(&name_buf));
try w.objectField("nodeValue"); try w.objectField("nodeValue");
try w.write(dom_node.getNodeValue() orelse ""); if (dom_node.getNodeValue()) |nv| {
try w.write(nv.str());
} else {
try w.write("");
}
if (include_child_count) { if (include_child_count) {
try w.objectField("childNodeCount"); try w.objectField("childNodeCount");

View File

@@ -111,6 +111,38 @@ pub const String = packed struct {
return .init(allocator, self.str(), .{ .dupe = true }); return .init(allocator, self.str(), .{ .dupe = true });
} }
pub fn concat(allocator: Allocator, parts: []const []const u8) !String {
var total_len: usize = 0;
for (parts) |part| {
total_len += part.len;
}
if (total_len <= 12) {
var content: [12]u8 = @splat(0);
var pos: usize = 0;
for (parts) |part| {
@memcpy(content[pos..][0..part.len], part);
pos += part.len;
}
return .{ .len = @intCast(total_len), .payload = .{ .content = content } };
}
const result = try allocator.alloc(u8, total_len);
var pos: usize = 0;
for (parts) |part| {
@memcpy(result[pos..][0..part.len], part);
pos += part.len;
}
return .{
.len = @intCast(total_len),
.payload = .{ .heap = .{
.prefix = result[0..4].*,
.ptr = (intern(result) orelse result).ptr,
} },
};
}
pub fn str(self: *const String) []const u8 { pub fn str(self: *const String) []const u8 {
const l = self.len; const l = self.len;
if (l < 0) { if (l < 0) {
@@ -272,3 +304,118 @@ test "String" {
try testing.expectEqual(false, str.eqlSlice("other_long" ** 100)); try testing.expectEqual(false, str.eqlSlice("other_long" ** 100));
} }
} }
test "String.concat" {
{
const result = try String.concat(testing.allocator, &.{});
defer result.deinit(testing.allocator);
try testing.expectEqual(@as(usize, 0), result.str().len);
try testing.expectEqual("", result.str());
}
{
const result = try String.concat(testing.allocator, &.{"hello"});
defer result.deinit(testing.allocator);
try testing.expectEqual("hello", result.str());
}
{
const result = try String.concat(testing.allocator, &.{ "foo", "bar" });
defer result.deinit(testing.allocator);
try testing.expectEqual("foobar", result.str());
try testing.expectEqual(@as(i32, 6), result.len);
}
{
const result = try String.concat(testing.allocator, &.{ "test", "ing", "1234" });
defer result.deinit(testing.allocator);
try testing.expectEqual("testing1234", result.str());
try testing.expectEqual(@as(i32, 11), result.len);
}
{
const result = try String.concat(testing.allocator, &.{ "foo", "bar", "baz", "qux" });
defer result.deinit(testing.allocator);
try testing.expectEqual("foobarbazqux", result.str());
try testing.expectEqual(@as(i32, 12), result.len);
}
{
const result = try String.concat(testing.allocator, &.{ "hello", " world!" });
defer result.deinit(testing.allocator);
try testing.expectEqual("hello world!", result.str());
try testing.expectEqual(@as(i32, 12), result.len);
}
{
const result = try String.concat(testing.allocator, &.{ "a", "b", "c", "d", "e" });
defer result.deinit(testing.allocator);
try testing.expectEqual("abcde", result.str());
try testing.expectEqual(@as(i32, 5), result.len);
}
{
const result = try String.concat(testing.allocator, &.{ "one", " ", "two", " ", "three", " ", "four" });
defer result.deinit(testing.allocator);
try testing.expectEqual("one two three four", result.str());
try testing.expectEqual(@as(i32, 18), result.len);
}
{
const result = try String.concat(testing.allocator, &.{ "hello", "", "world" });
defer result.deinit(testing.allocator);
try testing.expectEqual("helloworld", result.str());
}
{
const result = try String.concat(testing.allocator, &.{ "", "", "" });
defer result.deinit(testing.allocator);
try testing.expectEqual("", result.str());
try testing.expectEqual(@as(i32, 0), result.len);
}
{
const result = try String.concat(testing.allocator, &.{ "café", "" });
defer result.deinit(testing.allocator);
try testing.expectEqual("café ☕", result.str());
}
{
const result = try String.concat(testing.allocator, &.{ "Hello ", "世界", " and ", "مرحبا" });
defer result.deinit(testing.allocator);
try testing.expectEqual("Hello 世界 and مرحبا", result.str());
}
{
const result = try String.concat(testing.allocator, &.{ " ", "test", " " });
defer result.deinit(testing.allocator);
try testing.expectEqual(" test ", result.str());
}
{
const result = try String.concat(testing.allocator, &.{ " ", " " });
defer result.deinit(testing.allocator);
try testing.expectEqual(" ", result.str());
try testing.expectEqual(@as(i32, 4), result.len);
}
{
const result = try String.concat(testing.allocator, &.{ "Item ", "1", "2", "3" });
defer result.deinit(testing.allocator);
try testing.expectEqual("Item 123", result.str());
}
{
const original = "Hello, world!";
const result = try String.concat(testing.allocator, &.{ original[0..5], original[7..] });
defer result.deinit(testing.allocator);
try testing.expectEqual("Helloworld!", result.str());
}
{
const original = "Hello!";
const result = try String.concat(testing.allocator, &.{ original[0..5], " world", original[5..] });
defer result.deinit(testing.allocator);
try testing.expectEqual("Hello world!", result.str());
}
}