implement html5ever append_based_on_parent_node and append_before_sibling

This commit is contained in:
Karl Seguin
2025-12-24 07:37:21 +08:00
parent 0ca97d01ac
commit 9969ff7165
6 changed files with 121 additions and 12 deletions

View File

@@ -70,6 +70,8 @@ const Error = struct {
get_template_content,
remove_from_parent,
reparent_children,
append_before_sibling,
append_based_on_parent_node,
};
};
@@ -91,6 +93,8 @@ pub fn parse(self: *Parser, html: []const u8) void {
getTemplateContentsCallback,
removeFromParentCallback,
reparentChildrenCallback,
appendBeforeSiblingCallback,
appendBasedOnParentNodeCallback,
);
}
@@ -112,6 +116,8 @@ pub fn parseFragment(self: *Parser, html: []const u8) void {
getTemplateContentsCallback,
removeFromParentCallback,
reparentChildrenCallback,
appendBeforeSiblingCallback,
appendBasedOnParentNodeCallback,
);
}
@@ -150,6 +156,8 @@ pub const Streaming = struct {
getTemplateContentsCallback,
removeFromParentCallback,
reparentChildrenCallback,
appendBeforeSiblingCallback,
appendBasedOnParentNodeCallback,
) orelse return error.ParserCreationFailed;
}
@@ -318,15 +326,13 @@ fn appendCallback(ctx: *anyopaque, parent_ref: *anyopaque, node_or_text: h5e.Nod
};
}
fn _appendCallback(self: *Parser, parent: *Node, node_or_text: h5e.NodeOrText) !void {
// child node is guaranteed not to belong to another parent
switch (node_or_text.toUnion()) {
.node => |cpn| {
const child = getNode(cpn);
// child node is guaranteed not to belong to another parent
try self.page.appendNew(parent, .{ .node = child });
},
.text => |txt| {
try self.page.appendNew(parent, .{ .text = txt });
},
.text => |txt| try self.page.appendNew(parent, .{ .text = txt }),
}
}
@@ -351,6 +357,35 @@ fn _reparentChildrenCallback(self: *Parser, node: *Node, new_parent: *Node) !voi
try self.page.appendAllChildren(node, new_parent);
}
fn appendBeforeSiblingCallback(ctx: *anyopaque, sibling_ref: *anyopaque, node_or_text: h5e.NodeOrText) callconv(.c) void {
const self: *Parser = @ptrCast(@alignCast(ctx));
self._appendBeforeSiblingCallback(getNode(sibling_ref), node_or_text) catch |err| {
self.err = .{ .err = err, .source = .append_before_sibling };
};
}
fn _appendBeforeSiblingCallback(self: *Parser, sibling: *Node, node_or_text: h5e.NodeOrText) !void {
const parent = sibling.parentNode() orelse return error.NoParent;
const node: *Node = switch (node_or_text.toUnion()) {
.node => |cpn| getNode(cpn),
.text => |txt| try self.page.createTextNode(txt),
};
try self.page.insertNodeRelative(parent, node, .{ .before = sibling }, .{});
}
fn appendBasedOnParentNodeCallback(ctx: *anyopaque, element_ref: *anyopaque, prev_element_ref: *anyopaque, node_or_text: h5e.NodeOrText) callconv(.c) void {
const self: *Parser = @ptrCast(@alignCast(ctx));
self._appendBasedOnParentNodeCallback(getNode(element_ref), getNode(prev_element_ref), node_or_text) catch |err| {
self.err = .{ .err = err, .source = .append_based_on_parent_node };
};
}
fn _appendBasedOnParentNodeCallback(self: *Parser, element: *Node, prev_element: *Node, node_or_text: h5e.NodeOrText) !void {
if (element.parentNode()) |_| {
try self._appendBeforeSiblingCallback(element, node_or_text);
} else {
try self._appendCallback(prev_element, node_or_text);
}
}
fn getNode(ref: *anyopaque) *Node {
const pn: *ParsedNode = @ptrCast(@alignCast(ref));
return pn.node;

View File

@@ -35,6 +35,8 @@ pub extern "c" fn html5ever_parse_document(
getTemplateContentsCallback: *const fn (ctx: *anyopaque, target_ref: *anyopaque) callconv(.c) ?*anyopaque,
removeFromParentCallback: *const fn (ctx: *anyopaque, target_ref: *anyopaque) callconv(.c) void,
reparentChildrenCallback: *const fn (ctx: *anyopaque, node_ref: *anyopaque, new_parent_ref: *anyopaque) callconv(.c) void,
appendBeforeSiblingCallback: *const fn (ctx: *anyopaque, sibling_ref: *anyopaque, NodeOrText) callconv(.c) void,
appendBasedOnParentNodeCallback: *const fn (ctx: *anyopaque, element_ref: *anyopaque, prev_element_ref: *anyopaque, NodeOrText) callconv(.c) void,
) void;
pub extern "c" fn html5ever_parse_fragment(
@@ -54,6 +56,8 @@ pub extern "c" fn html5ever_parse_fragment(
getTemplateContentsCallback: *const fn (ctx: *anyopaque, target_ref: *anyopaque) callconv(.c) ?*anyopaque,
removeFromParentCallback: *const fn (ctx: *anyopaque, target_ref: *anyopaque) callconv(.c) void,
reparentChildrenCallback: *const fn (ctx: *anyopaque, node_ref: *anyopaque, new_parent_ref: *anyopaque) callconv(.c) void,
appendBeforeSiblingCallback: *const fn (ctx: *anyopaque, sibling_ref: *anyopaque, NodeOrText) callconv(.c) void,
appendBasedOnParentNodeCallback: *const fn (ctx: *anyopaque, element_ref: *anyopaque, prev_element_ref: *anyopaque, NodeOrText) callconv(.c) void,
) void;
pub extern "c" fn html5ever_attribute_iterator_next(ctx: *anyopaque) Nullable(Attribute);
@@ -82,6 +86,8 @@ pub extern "c" fn html5ever_streaming_parser_create(
getTemplateContentsCallback: *const fn (ctx: *anyopaque, target_ref: *anyopaque) callconv(.c) ?*anyopaque,
removeFromParentCallback: *const fn (ctx: *anyopaque, target_ref: *anyopaque) callconv(.c) void,
reparentChildrenCallback: *const fn (ctx: *anyopaque, node_ref: *anyopaque, new_parent_ref: *anyopaque) callconv(.c) void,
appendBeforeSiblingCallback: *const fn (ctx: *anyopaque, sibling_ref: *anyopaque, NodeOrText) callconv(.c) void,
appendBasedOnParentNodeCallback: *const fn (ctx: *anyopaque, element_ref: *anyopaque, prev_element_ref: *anyopaque, NodeOrText) callconv(.c) void,
) ?*anyopaque;
pub extern "c" fn html5ever_streaming_parser_feed(

View File

@@ -49,6 +49,8 @@ pub extern "C" fn html5ever_parse_document(
get_template_contents_callback: GetTemplateContentsCallback,
remove_from_parent_callback: RemoveFromParentCallback,
reparent_children_callback: ReparentChildrenCallback,
append_before_sibling_callback: AppendBeforeSiblingCallback,
append_based_on_parent_node_callback: AppendBasedOnParentNodeCallback,
) -> () {
if html.is_null() || len == 0 {
return ();
@@ -73,6 +75,8 @@ pub extern "C" fn html5ever_parse_document(
get_template_contents_callback: get_template_contents_callback,
remove_from_parent_callback: remove_from_parent_callback,
reparent_children_callback: reparent_children_callback,
append_before_sibling_callback: append_before_sibling_callback,
append_based_on_parent_node_callback: append_based_on_parent_node_callback,
};
let bytes = unsafe { std::slice::from_raw_parts(html, len) };
@@ -99,6 +103,8 @@ pub extern "C" fn html5ever_parse_fragment(
get_template_contents_callback: GetTemplateContentsCallback,
remove_from_parent_callback: RemoveFromParentCallback,
reparent_children_callback: ReparentChildrenCallback,
append_before_sibling_callback: AppendBeforeSiblingCallback,
append_based_on_parent_node_callback: AppendBasedOnParentNodeCallback,
) -> () {
if html.is_null() || len == 0 {
return ();
@@ -123,6 +129,8 @@ pub extern "C" fn html5ever_parse_fragment(
get_template_contents_callback: get_template_contents_callback,
remove_from_parent_callback: remove_from_parent_callback,
reparent_children_callback: reparent_children_callback,
append_before_sibling_callback: append_before_sibling_callback,
append_based_on_parent_node_callback: append_based_on_parent_node_callback,
};
let bytes = unsafe { std::slice::from_raw_parts(html, len) };
@@ -209,6 +217,8 @@ pub extern "C" fn html5ever_streaming_parser_create(
get_template_contents_callback: GetTemplateContentsCallback,
remove_from_parent_callback: RemoveFromParentCallback,
reparent_children_callback: ReparentChildrenCallback,
append_before_sibling_callback: AppendBeforeSiblingCallback,
append_based_on_parent_node_callback: AppendBasedOnParentNodeCallback,
) -> *mut c_void {
let arena = Box::new(typed_arena::Arena::new());
@@ -236,6 +246,8 @@ pub extern "C" fn html5ever_streaming_parser_create(
get_template_contents_callback: get_template_contents_callback,
remove_from_parent_callback: remove_from_parent_callback,
reparent_children_callback: reparent_children_callback,
append_before_sibling_callback: append_before_sibling_callback,
append_based_on_parent_node_callback: append_based_on_parent_node_callback,
};
// Create a parser which implements TendrilSink for streaming parsing

View File

@@ -60,6 +60,8 @@ pub struct Sink<'arena> {
pub get_template_contents_callback: GetTemplateContentsCallback,
pub remove_from_parent_callback: RemoveFromParentCallback,
pub reparent_children_callback: ReparentChildrenCallback,
pub append_before_sibling_callback: AppendBeforeSiblingCallback,
pub append_based_on_parent_node_callback: AppendBasedOnParentNodeCallback,
}
impl<'arena> TreeSink for Sink<'arena> {
@@ -189,9 +191,30 @@ impl<'arena> TreeSink for Sink<'arena> {
}
fn append_before_sibling(&self, sibling: &Ref, child: NodeOrText<Ref>) {
_ = sibling;
_ = child;
panic!("append_before_sibling");
match child {
NodeOrText::AppendText(ref t) => {
let byte_slice = t.as_ref().as_bytes();
let static_slice: &'static [u8] = unsafe {
std::mem::transmute(byte_slice)
};
unsafe {
(self.append_before_sibling_callback)(self.ctx, *sibling, CNodeOrText{
tag: 1,
node: ptr::null(),
text: StringSlice { ptr: static_slice.as_ptr(), len: static_slice.len()},
});
};
},
NodeOrText::AppendNode(node) => {
unsafe {
(self.append_before_sibling_callback)(self.ctx, *sibling, CNodeOrText{
tag: 0,
node: node,
text: StringSlice::default()
});
};
}
}
}
fn append_based_on_parent_node(
@@ -200,10 +223,30 @@ impl<'arena> TreeSink for Sink<'arena> {
prev_element: &Ref,
child: NodeOrText<Ref>,
) {
_ = element;
_ = prev_element;
_ = child;
panic!("append_based_on_parent_node");
match child {
NodeOrText::AppendText(ref t) => {
let byte_slice = t.as_ref().as_bytes();
let static_slice: &'static [u8] = unsafe {
std::mem::transmute(byte_slice)
};
unsafe {
(self.append_based_on_parent_node_callback)(self.ctx, *element, *prev_element, CNodeOrText{
tag: 1,
node: ptr::null(),
text: StringSlice { ptr: static_slice.as_ptr(), len: static_slice.len()},
});
};
},
NodeOrText::AppendNode(node) => {
unsafe {
(self.append_based_on_parent_node_callback)(self.ctx, *element, *prev_element, CNodeOrText{
tag: 0,
node: node,
text: StringSlice::default()
});
};
}
}
}
fn append_doctype_to_document(

View File

@@ -69,6 +69,19 @@ pub type RemoveFromParentCallback = unsafe extern "C" fn(ctx: Ref, target: Ref)
pub type ReparentChildrenCallback = unsafe extern "C" fn(ctx: Ref, node: Ref, new_parent: Ref) -> ();
pub type AppendBeforeSiblingCallback = unsafe extern "C" fn(
ctx: Ref,
sibling: Ref,
node_or_text: CNodeOrText
) -> ();
pub type AppendBasedOnParentNodeCallback = unsafe extern "C" fn(
ctx: Ref,
element: Ref,
prev_element: Ref,
node_or_text: CNodeOrText
) -> ();
pub type Ref = *const c_void;
#[repr(C)]