implement html5ever append_based_on_parent_node and append_before_sibling

This commit is contained in:
Karl Seguin
2025-12-24 07:37:21 +08:00
parent 0ca97d01ac
commit 9969ff7165
6 changed files with 121 additions and 12 deletions

View File

@@ -70,6 +70,8 @@ const Error = struct {
get_template_content, get_template_content,
remove_from_parent, remove_from_parent,
reparent_children, reparent_children,
append_before_sibling,
append_based_on_parent_node,
}; };
}; };
@@ -91,6 +93,8 @@ pub fn parse(self: *Parser, html: []const u8) void {
getTemplateContentsCallback, getTemplateContentsCallback,
removeFromParentCallback, removeFromParentCallback,
reparentChildrenCallback, reparentChildrenCallback,
appendBeforeSiblingCallback,
appendBasedOnParentNodeCallback,
); );
} }
@@ -112,6 +116,8 @@ pub fn parseFragment(self: *Parser, html: []const u8) void {
getTemplateContentsCallback, getTemplateContentsCallback,
removeFromParentCallback, removeFromParentCallback,
reparentChildrenCallback, reparentChildrenCallback,
appendBeforeSiblingCallback,
appendBasedOnParentNodeCallback,
); );
} }
@@ -150,6 +156,8 @@ pub const Streaming = struct {
getTemplateContentsCallback, getTemplateContentsCallback,
removeFromParentCallback, removeFromParentCallback,
reparentChildrenCallback, reparentChildrenCallback,
appendBeforeSiblingCallback,
appendBasedOnParentNodeCallback,
) orelse return error.ParserCreationFailed; ) orelse return error.ParserCreationFailed;
} }
@@ -318,15 +326,13 @@ fn appendCallback(ctx: *anyopaque, parent_ref: *anyopaque, node_or_text: h5e.Nod
}; };
} }
fn _appendCallback(self: *Parser, parent: *Node, node_or_text: h5e.NodeOrText) !void { fn _appendCallback(self: *Parser, parent: *Node, node_or_text: h5e.NodeOrText) !void {
// child node is guaranteed not to belong to another parent
switch (node_or_text.toUnion()) { switch (node_or_text.toUnion()) {
.node => |cpn| { .node => |cpn| {
const child = getNode(cpn); const child = getNode(cpn);
// child node is guaranteed not to belong to another parent
try self.page.appendNew(parent, .{ .node = child }); try self.page.appendNew(parent, .{ .node = child });
}, },
.text => |txt| { .text => |txt| try self.page.appendNew(parent, .{ .text = txt }),
try self.page.appendNew(parent, .{ .text = txt });
},
} }
} }
@@ -351,6 +357,35 @@ fn _reparentChildrenCallback(self: *Parser, node: *Node, new_parent: *Node) !voi
try self.page.appendAllChildren(node, new_parent); try self.page.appendAllChildren(node, new_parent);
} }
fn appendBeforeSiblingCallback(ctx: *anyopaque, sibling_ref: *anyopaque, node_or_text: h5e.NodeOrText) callconv(.c) void {
const self: *Parser = @ptrCast(@alignCast(ctx));
self._appendBeforeSiblingCallback(getNode(sibling_ref), node_or_text) catch |err| {
self.err = .{ .err = err, .source = .append_before_sibling };
};
}
fn _appendBeforeSiblingCallback(self: *Parser, sibling: *Node, node_or_text: h5e.NodeOrText) !void {
const parent = sibling.parentNode() orelse return error.NoParent;
const node: *Node = switch (node_or_text.toUnion()) {
.node => |cpn| getNode(cpn),
.text => |txt| try self.page.createTextNode(txt),
};
try self.page.insertNodeRelative(parent, node, .{ .before = sibling }, .{});
}
fn appendBasedOnParentNodeCallback(ctx: *anyopaque, element_ref: *anyopaque, prev_element_ref: *anyopaque, node_or_text: h5e.NodeOrText) callconv(.c) void {
const self: *Parser = @ptrCast(@alignCast(ctx));
self._appendBasedOnParentNodeCallback(getNode(element_ref), getNode(prev_element_ref), node_or_text) catch |err| {
self.err = .{ .err = err, .source = .append_based_on_parent_node };
};
}
fn _appendBasedOnParentNodeCallback(self: *Parser, element: *Node, prev_element: *Node, node_or_text: h5e.NodeOrText) !void {
if (element.parentNode()) |_| {
try self._appendBeforeSiblingCallback(element, node_or_text);
} else {
try self._appendCallback(prev_element, node_or_text);
}
}
fn getNode(ref: *anyopaque) *Node { fn getNode(ref: *anyopaque) *Node {
const pn: *ParsedNode = @ptrCast(@alignCast(ref)); const pn: *ParsedNode = @ptrCast(@alignCast(ref));
return pn.node; return pn.node;

View File

@@ -35,6 +35,8 @@ pub extern "c" fn html5ever_parse_document(
getTemplateContentsCallback: *const fn (ctx: *anyopaque, target_ref: *anyopaque) callconv(.c) ?*anyopaque, getTemplateContentsCallback: *const fn (ctx: *anyopaque, target_ref: *anyopaque) callconv(.c) ?*anyopaque,
removeFromParentCallback: *const fn (ctx: *anyopaque, target_ref: *anyopaque) callconv(.c) void, removeFromParentCallback: *const fn (ctx: *anyopaque, target_ref: *anyopaque) callconv(.c) void,
reparentChildrenCallback: *const fn (ctx: *anyopaque, node_ref: *anyopaque, new_parent_ref: *anyopaque) callconv(.c) void, reparentChildrenCallback: *const fn (ctx: *anyopaque, node_ref: *anyopaque, new_parent_ref: *anyopaque) callconv(.c) void,
appendBeforeSiblingCallback: *const fn (ctx: *anyopaque, sibling_ref: *anyopaque, NodeOrText) callconv(.c) void,
appendBasedOnParentNodeCallback: *const fn (ctx: *anyopaque, element_ref: *anyopaque, prev_element_ref: *anyopaque, NodeOrText) callconv(.c) void,
) void; ) void;
pub extern "c" fn html5ever_parse_fragment( pub extern "c" fn html5ever_parse_fragment(
@@ -54,6 +56,8 @@ pub extern "c" fn html5ever_parse_fragment(
getTemplateContentsCallback: *const fn (ctx: *anyopaque, target_ref: *anyopaque) callconv(.c) ?*anyopaque, getTemplateContentsCallback: *const fn (ctx: *anyopaque, target_ref: *anyopaque) callconv(.c) ?*anyopaque,
removeFromParentCallback: *const fn (ctx: *anyopaque, target_ref: *anyopaque) callconv(.c) void, removeFromParentCallback: *const fn (ctx: *anyopaque, target_ref: *anyopaque) callconv(.c) void,
reparentChildrenCallback: *const fn (ctx: *anyopaque, node_ref: *anyopaque, new_parent_ref: *anyopaque) callconv(.c) void, reparentChildrenCallback: *const fn (ctx: *anyopaque, node_ref: *anyopaque, new_parent_ref: *anyopaque) callconv(.c) void,
appendBeforeSiblingCallback: *const fn (ctx: *anyopaque, sibling_ref: *anyopaque, NodeOrText) callconv(.c) void,
appendBasedOnParentNodeCallback: *const fn (ctx: *anyopaque, element_ref: *anyopaque, prev_element_ref: *anyopaque, NodeOrText) callconv(.c) void,
) void; ) void;
pub extern "c" fn html5ever_attribute_iterator_next(ctx: *anyopaque) Nullable(Attribute); pub extern "c" fn html5ever_attribute_iterator_next(ctx: *anyopaque) Nullable(Attribute);
@@ -82,6 +86,8 @@ pub extern "c" fn html5ever_streaming_parser_create(
getTemplateContentsCallback: *const fn (ctx: *anyopaque, target_ref: *anyopaque) callconv(.c) ?*anyopaque, getTemplateContentsCallback: *const fn (ctx: *anyopaque, target_ref: *anyopaque) callconv(.c) ?*anyopaque,
removeFromParentCallback: *const fn (ctx: *anyopaque, target_ref: *anyopaque) callconv(.c) void, removeFromParentCallback: *const fn (ctx: *anyopaque, target_ref: *anyopaque) callconv(.c) void,
reparentChildrenCallback: *const fn (ctx: *anyopaque, node_ref: *anyopaque, new_parent_ref: *anyopaque) callconv(.c) void, reparentChildrenCallback: *const fn (ctx: *anyopaque, node_ref: *anyopaque, new_parent_ref: *anyopaque) callconv(.c) void,
appendBeforeSiblingCallback: *const fn (ctx: *anyopaque, sibling_ref: *anyopaque, NodeOrText) callconv(.c) void,
appendBasedOnParentNodeCallback: *const fn (ctx: *anyopaque, element_ref: *anyopaque, prev_element_ref: *anyopaque, NodeOrText) callconv(.c) void,
) ?*anyopaque; ) ?*anyopaque;
pub extern "c" fn html5ever_streaming_parser_feed( pub extern "c" fn html5ever_streaming_parser_feed(

View File

@@ -49,6 +49,8 @@ pub extern "C" fn html5ever_parse_document(
get_template_contents_callback: GetTemplateContentsCallback, get_template_contents_callback: GetTemplateContentsCallback,
remove_from_parent_callback: RemoveFromParentCallback, remove_from_parent_callback: RemoveFromParentCallback,
reparent_children_callback: ReparentChildrenCallback, reparent_children_callback: ReparentChildrenCallback,
append_before_sibling_callback: AppendBeforeSiblingCallback,
append_based_on_parent_node_callback: AppendBasedOnParentNodeCallback,
) -> () { ) -> () {
if html.is_null() || len == 0 { if html.is_null() || len == 0 {
return (); return ();
@@ -73,6 +75,8 @@ pub extern "C" fn html5ever_parse_document(
get_template_contents_callback: get_template_contents_callback, get_template_contents_callback: get_template_contents_callback,
remove_from_parent_callback: remove_from_parent_callback, remove_from_parent_callback: remove_from_parent_callback,
reparent_children_callback: reparent_children_callback, reparent_children_callback: reparent_children_callback,
append_before_sibling_callback: append_before_sibling_callback,
append_based_on_parent_node_callback: append_based_on_parent_node_callback,
}; };
let bytes = unsafe { std::slice::from_raw_parts(html, len) }; let bytes = unsafe { std::slice::from_raw_parts(html, len) };
@@ -99,6 +103,8 @@ pub extern "C" fn html5ever_parse_fragment(
get_template_contents_callback: GetTemplateContentsCallback, get_template_contents_callback: GetTemplateContentsCallback,
remove_from_parent_callback: RemoveFromParentCallback, remove_from_parent_callback: RemoveFromParentCallback,
reparent_children_callback: ReparentChildrenCallback, reparent_children_callback: ReparentChildrenCallback,
append_before_sibling_callback: AppendBeforeSiblingCallback,
append_based_on_parent_node_callback: AppendBasedOnParentNodeCallback,
) -> () { ) -> () {
if html.is_null() || len == 0 { if html.is_null() || len == 0 {
return (); return ();
@@ -123,6 +129,8 @@ pub extern "C" fn html5ever_parse_fragment(
get_template_contents_callback: get_template_contents_callback, get_template_contents_callback: get_template_contents_callback,
remove_from_parent_callback: remove_from_parent_callback, remove_from_parent_callback: remove_from_parent_callback,
reparent_children_callback: reparent_children_callback, reparent_children_callback: reparent_children_callback,
append_before_sibling_callback: append_before_sibling_callback,
append_based_on_parent_node_callback: append_based_on_parent_node_callback,
}; };
let bytes = unsafe { std::slice::from_raw_parts(html, len) }; let bytes = unsafe { std::slice::from_raw_parts(html, len) };
@@ -209,6 +217,8 @@ pub extern "C" fn html5ever_streaming_parser_create(
get_template_contents_callback: GetTemplateContentsCallback, get_template_contents_callback: GetTemplateContentsCallback,
remove_from_parent_callback: RemoveFromParentCallback, remove_from_parent_callback: RemoveFromParentCallback,
reparent_children_callback: ReparentChildrenCallback, reparent_children_callback: ReparentChildrenCallback,
append_before_sibling_callback: AppendBeforeSiblingCallback,
append_based_on_parent_node_callback: AppendBasedOnParentNodeCallback,
) -> *mut c_void { ) -> *mut c_void {
let arena = Box::new(typed_arena::Arena::new()); let arena = Box::new(typed_arena::Arena::new());
@@ -236,6 +246,8 @@ pub extern "C" fn html5ever_streaming_parser_create(
get_template_contents_callback: get_template_contents_callback, get_template_contents_callback: get_template_contents_callback,
remove_from_parent_callback: remove_from_parent_callback, remove_from_parent_callback: remove_from_parent_callback,
reparent_children_callback: reparent_children_callback, reparent_children_callback: reparent_children_callback,
append_before_sibling_callback: append_before_sibling_callback,
append_based_on_parent_node_callback: append_based_on_parent_node_callback,
}; };
// Create a parser which implements TendrilSink for streaming parsing // Create a parser which implements TendrilSink for streaming parsing

View File

@@ -60,6 +60,8 @@ pub struct Sink<'arena> {
pub get_template_contents_callback: GetTemplateContentsCallback, pub get_template_contents_callback: GetTemplateContentsCallback,
pub remove_from_parent_callback: RemoveFromParentCallback, pub remove_from_parent_callback: RemoveFromParentCallback,
pub reparent_children_callback: ReparentChildrenCallback, pub reparent_children_callback: ReparentChildrenCallback,
pub append_before_sibling_callback: AppendBeforeSiblingCallback,
pub append_based_on_parent_node_callback: AppendBasedOnParentNodeCallback,
} }
impl<'arena> TreeSink for Sink<'arena> { impl<'arena> TreeSink for Sink<'arena> {
@@ -189,9 +191,30 @@ impl<'arena> TreeSink for Sink<'arena> {
} }
fn append_before_sibling(&self, sibling: &Ref, child: NodeOrText<Ref>) { fn append_before_sibling(&self, sibling: &Ref, child: NodeOrText<Ref>) {
_ = sibling; match child {
_ = child; NodeOrText::AppendText(ref t) => {
panic!("append_before_sibling"); let byte_slice = t.as_ref().as_bytes();
let static_slice: &'static [u8] = unsafe {
std::mem::transmute(byte_slice)
};
unsafe {
(self.append_before_sibling_callback)(self.ctx, *sibling, CNodeOrText{
tag: 1,
node: ptr::null(),
text: StringSlice { ptr: static_slice.as_ptr(), len: static_slice.len()},
});
};
},
NodeOrText::AppendNode(node) => {
unsafe {
(self.append_before_sibling_callback)(self.ctx, *sibling, CNodeOrText{
tag: 0,
node: node,
text: StringSlice::default()
});
};
}
}
} }
fn append_based_on_parent_node( fn append_based_on_parent_node(
@@ -200,10 +223,30 @@ impl<'arena> TreeSink for Sink<'arena> {
prev_element: &Ref, prev_element: &Ref,
child: NodeOrText<Ref>, child: NodeOrText<Ref>,
) { ) {
_ = element; match child {
_ = prev_element; NodeOrText::AppendText(ref t) => {
_ = child; let byte_slice = t.as_ref().as_bytes();
panic!("append_based_on_parent_node"); let static_slice: &'static [u8] = unsafe {
std::mem::transmute(byte_slice)
};
unsafe {
(self.append_based_on_parent_node_callback)(self.ctx, *element, *prev_element, CNodeOrText{
tag: 1,
node: ptr::null(),
text: StringSlice { ptr: static_slice.as_ptr(), len: static_slice.len()},
});
};
},
NodeOrText::AppendNode(node) => {
unsafe {
(self.append_based_on_parent_node_callback)(self.ctx, *element, *prev_element, CNodeOrText{
tag: 0,
node: node,
text: StringSlice::default()
});
};
}
}
} }
fn append_doctype_to_document( fn append_doctype_to_document(

View File

@@ -69,6 +69,19 @@ pub type RemoveFromParentCallback = unsafe extern "C" fn(ctx: Ref, target: Ref)
pub type ReparentChildrenCallback = unsafe extern "C" fn(ctx: Ref, node: Ref, new_parent: Ref) -> (); pub type ReparentChildrenCallback = unsafe extern "C" fn(ctx: Ref, node: Ref, new_parent: Ref) -> ();
pub type AppendBeforeSiblingCallback = unsafe extern "C" fn(
ctx: Ref,
sibling: Ref,
node_or_text: CNodeOrText
) -> ();
pub type AppendBasedOnParentNodeCallback = unsafe extern "C" fn(
ctx: Ref,
element: Ref,
prev_element: Ref,
node_or_text: CNodeOrText
) -> ();
pub type Ref = *const c_void; pub type Ref = *const c_void;
#[repr(C)] #[repr(C)]