mirror of
https://github.com/lightpanda-io/browser.git
synced 2026-02-04 06:23:45 +00:00
initial XML parsing support in DOMParser
This commit is contained in:
@@ -98,6 +98,29 @@ pub fn parse(self: *Parser, html: []const u8) void {
|
||||
);
|
||||
}
|
||||
|
||||
pub fn parseXML(self: *Parser, xml: []const u8) void {
|
||||
h5e.xml5ever_parse_document(
|
||||
xml.ptr,
|
||||
xml.len,
|
||||
&self.container,
|
||||
self,
|
||||
createElementCallback,
|
||||
getDataCallback,
|
||||
appendCallback,
|
||||
parseErrorCallback,
|
||||
popCallback,
|
||||
createCommentCallback,
|
||||
createProcessingInstruction,
|
||||
appendDoctypeToDocument,
|
||||
addAttrsIfMissingCallback,
|
||||
getTemplateContentsCallback,
|
||||
removeFromParentCallback,
|
||||
reparentChildrenCallback,
|
||||
appendBeforeSiblingCallback,
|
||||
appendBasedOnParentNodeCallback,
|
||||
);
|
||||
}
|
||||
|
||||
pub fn parseFragment(self: *Parser, html: []const u8) void {
|
||||
h5e.html5ever_parse_fragment(
|
||||
html.ptr,
|
||||
|
||||
@@ -171,3 +171,24 @@ pub const NodeOrText = extern struct {
|
||||
text: []const u8,
|
||||
};
|
||||
};
|
||||
|
||||
pub extern "c" fn xml5ever_parse_document(
|
||||
html: [*c]const u8,
|
||||
len: usize,
|
||||
doc: *anyopaque,
|
||||
ctx: *anyopaque,
|
||||
createElementCallback: *const fn (ctx: *anyopaque, data: *anyopaque, QualName, AttributeIterator) callconv(.c) ?*anyopaque,
|
||||
elemNameCallback: *const fn (node_ref: *anyopaque) callconv(.c) *anyopaque,
|
||||
appendCallback: *const fn (ctx: *anyopaque, parent_ref: *anyopaque, NodeOrText) callconv(.c) void,
|
||||
parseErrorCallback: *const fn (ctx: *anyopaque, StringSlice) callconv(.c) void,
|
||||
popCallback: *const fn (ctx: *anyopaque, node_ref: *anyopaque) callconv(.c) void,
|
||||
createCommentCallback: *const fn (ctx: *anyopaque, StringSlice) callconv(.c) ?*anyopaque,
|
||||
createProcessingInstruction: *const fn (ctx: *anyopaque, StringSlice, StringSlice) callconv(.c) ?*anyopaque,
|
||||
appendDoctypeToDocument: *const fn (ctx: *anyopaque, StringSlice, StringSlice, StringSlice) callconv(.c) void,
|
||||
addAttrsIfMissingCallback: *const fn (ctx: *anyopaque, target_ref: *anyopaque, AttributeIterator) callconv(.c) void,
|
||||
getTemplateContentsCallback: *const fn (ctx: *anyopaque, target_ref: *anyopaque) callconv(.c) ?*anyopaque,
|
||||
removeFromParentCallback: *const fn (ctx: *anyopaque, target_ref: *anyopaque) callconv(.c) void,
|
||||
reparentChildrenCallback: *const fn (ctx: *anyopaque, node_ref: *anyopaque, new_parent_ref: *anyopaque) callconv(.c) void,
|
||||
appendBeforeSiblingCallback: *const fn (ctx: *anyopaque, sibling_ref: *anyopaque, NodeOrText) callconv(.c) void,
|
||||
appendBasedOnParentNodeCallback: *const fn (ctx: *anyopaque, element_ref: *anyopaque, prev_element_ref: *anyopaque, NodeOrText) callconv(.c) void,
|
||||
) void;
|
||||
|
||||
@@ -21,6 +21,7 @@ const std = @import("std");
|
||||
const js = @import("../js/js.zig");
|
||||
const Page = @import("../Page.zig");
|
||||
const HTMLDocument = @import("HTMLDocument.zig");
|
||||
const XMLDocument = @import("XMLDocument.zig");
|
||||
|
||||
const DOMParser = @This();
|
||||
|
||||
@@ -28,14 +29,18 @@ pub fn init() DOMParser {
|
||||
return .{};
|
||||
}
|
||||
|
||||
pub fn parseFromString(self: *const DOMParser, html: []const u8, mime_type: []const u8, page: *Page) !*HTMLDocument {
|
||||
_ = self;
|
||||
|
||||
// For now, only support text/html
|
||||
if (!std.mem.eql(u8, mime_type, "text/html")) {
|
||||
return error.NotSupported;
|
||||
}
|
||||
pub const HTMLDocumentOrXMLDocument = union(enum) {
|
||||
html_document: *HTMLDocument,
|
||||
xml_document: *XMLDocument,
|
||||
};
|
||||
|
||||
pub fn parseFromString(
|
||||
_: *const DOMParser,
|
||||
html: []const u8,
|
||||
mime_type: []const u8,
|
||||
page: *Page,
|
||||
) !HTMLDocumentOrXMLDocument {
|
||||
if (std.mem.eql(u8, mime_type, "text/html")) {
|
||||
// Create a new HTMLDocument
|
||||
const doc = try page._factory.document(HTMLDocument{
|
||||
._proto = undefined,
|
||||
@@ -55,7 +60,28 @@ pub fn parseFromString(self: *const DOMParser, html: []const u8, mime_type: []co
|
||||
return pe.err;
|
||||
}
|
||||
|
||||
return doc;
|
||||
return .{ .html_document = doc };
|
||||
}
|
||||
|
||||
if (std.mem.eql(u8, mime_type, "text/xml")) {
|
||||
// Create a new XMLDocument.
|
||||
const doc = try page._factory.document(XMLDocument{
|
||||
._proto = undefined,
|
||||
});
|
||||
|
||||
// Parse XML into XMLDocument.
|
||||
const Parser = @import("../parser/Parser.zig");
|
||||
var parser = Parser.init(page.arena, doc.asNode(), page);
|
||||
parser.parseXML(html);
|
||||
|
||||
if (parser.err) |pe| {
|
||||
return pe.err;
|
||||
}
|
||||
|
||||
return .{ .xml_document = doc };
|
||||
}
|
||||
|
||||
return error.NotSupported;
|
||||
}
|
||||
|
||||
pub const JsApi = struct {
|
||||
|
||||
11
src/html5ever/Cargo.lock
generated
11
src/html5ever/Cargo.lock
generated
@@ -72,6 +72,7 @@ dependencies = [
|
||||
"tikv-jemalloc-ctl",
|
||||
"tikv-jemallocator",
|
||||
"typed-arena",
|
||||
"xml5ever",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
@@ -476,3 +477,13 @@ name = "windows_x86_64_msvc"
|
||||
version = "0.52.6"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "589f6da84c646204747d1270a2a5661ea66ed1cced2631d546fdfb155959f9ec"
|
||||
|
||||
[[package]]
|
||||
name = "xml5ever"
|
||||
version = "0.35.0"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "ee3f1e41afb31a75aef076563b0ad3ecc24f5bd9d12a72b132222664eb76b494"
|
||||
dependencies = [
|
||||
"log",
|
||||
"markup5ever",
|
||||
]
|
||||
|
||||
@@ -14,6 +14,7 @@ string_cache = "0.9.0"
|
||||
typed-arena = "2.0.2"
|
||||
tikv-jemallocator = {version = "0.6.0", features = ["stats"]}
|
||||
tikv-jemalloc-ctl = {version = "0.6.0", features = ["stats"]}
|
||||
xml5ever = "0.35.0"
|
||||
|
||||
[profile.release]
|
||||
lto = true
|
||||
|
||||
@@ -16,20 +16,20 @@
|
||||
// You should have received a copy of the GNU Affero General Public License
|
||||
// along with this program. If not, see <https://www.gnu.org/licenses/>.
|
||||
|
||||
mod types;
|
||||
mod sink;
|
||||
mod types;
|
||||
|
||||
#[cfg(debug_assertions)]
|
||||
#[global_allocator]
|
||||
static GLOBAL: tikv_jemallocator::Jemalloc = tikv_jemallocator::Jemalloc;
|
||||
|
||||
use types::*;
|
||||
use std::cell::Cell;
|
||||
use std::os::raw::{c_uchar, c_void};
|
||||
use types::*;
|
||||
|
||||
use html5ever::{parse_document, parse_fragment, QualName, LocalName, ns, ParseOpts, Parser};
|
||||
use html5ever::tendril::{TendrilSink, StrTendril};
|
||||
use html5ever::interface::tree_builder::QuirksMode;
|
||||
use html5ever::tendril::{StrTendril, TendrilSink};
|
||||
use html5ever::{ns, parse_document, parse_fragment, LocalName, ParseOpts, Parser, QualName};
|
||||
|
||||
#[no_mangle]
|
||||
pub extern "C" fn html5ever_parse_document(
|
||||
@@ -135,7 +135,8 @@ pub extern "C" fn html5ever_parse_fragment(
|
||||
|
||||
let bytes = unsafe { std::slice::from_raw_parts(html, len) };
|
||||
parse_fragment(
|
||||
sink, Default::default(),
|
||||
sink,
|
||||
Default::default(),
|
||||
QualName::new(None, ns!(html), LocalName::from("body")),
|
||||
vec![], // attributes
|
||||
false, // context_element_allows_scripting
|
||||
@@ -182,15 +183,15 @@ pub struct Memory {
|
||||
#[cfg(debug_assertions)]
|
||||
#[no_mangle]
|
||||
pub extern "C" fn html5ever_get_memory_usage() -> Memory {
|
||||
use tikv_jemalloc_ctl::{stats, epoch};
|
||||
use tikv_jemalloc_ctl::{epoch, stats};
|
||||
|
||||
// many statistics are cached and only updated when the epoch is advanced.
|
||||
epoch::advance().unwrap();
|
||||
|
||||
return Memory{
|
||||
return Memory {
|
||||
resident: stats::resident::read().unwrap(),
|
||||
allocated: stats::allocated::read().unwrap(),
|
||||
}
|
||||
};
|
||||
}
|
||||
|
||||
// Streaming parser API
|
||||
@@ -225,9 +226,8 @@ pub extern "C" fn html5ever_streaming_parser_create(
|
||||
// SAFETY: We're creating a self-referential structure here.
|
||||
// The arena is stored in the StreamingParser and lives as long as the parser.
|
||||
// The sink contains a reference to the arena that's valid for the parser's lifetime.
|
||||
let arena_ref: &'static typed_arena::Arena<sink::ElementData> = unsafe {
|
||||
std::mem::transmute(arena.as_ref())
|
||||
};
|
||||
let arena_ref: &'static typed_arena::Arena<sink::ElementData> =
|
||||
unsafe { std::mem::transmute(arena.as_ref()) };
|
||||
|
||||
let sink = sink::Sink {
|
||||
ctx: ctx,
|
||||
@@ -281,7 +281,8 @@ pub extern "C" fn html5ever_streaming_parser_feed(
|
||||
|
||||
// Feed the chunk to the parser
|
||||
// The Parser implements TendrilSink, so we can call process() on it
|
||||
let parser = streaming_parser.parser
|
||||
let parser = streaming_parser
|
||||
.parser
|
||||
.downcast_mut::<Parser<sink::Sink>>()
|
||||
.expect("Invalid parser type");
|
||||
|
||||
@@ -304,7 +305,8 @@ pub extern "C" fn html5ever_streaming_parser_finish(parser_ptr: *mut c_void) {
|
||||
let streaming_parser = unsafe { Box::from_raw(parser_ptr as *mut StreamingParser) };
|
||||
|
||||
// Extract and finish the parser
|
||||
let parser = streaming_parser.parser
|
||||
let parser = streaming_parser
|
||||
.parser
|
||||
.downcast::<Parser<sink::Sink>>()
|
||||
.expect("Invalid parser type");
|
||||
|
||||
@@ -326,3 +328,57 @@ pub extern "C" fn html5ever_streaming_parser_destroy(parser_ptr: *mut c_void) {
|
||||
let _ = Box::from_raw(parser_ptr as *mut StreamingParser);
|
||||
}
|
||||
}
|
||||
|
||||
#[no_mangle]
|
||||
pub extern "C" fn xml5ever_parse_document(
|
||||
xml: *mut c_uchar,
|
||||
len: usize,
|
||||
document: Ref,
|
||||
ctx: Ref,
|
||||
create_element_callback: CreateElementCallback,
|
||||
get_data_callback: GetDataCallback,
|
||||
append_callback: AppendCallback,
|
||||
parse_error_callback: ParseErrorCallback,
|
||||
pop_callback: PopCallback,
|
||||
create_comment_callback: CreateCommentCallback,
|
||||
create_processing_instruction: CreateProcessingInstruction,
|
||||
append_doctype_to_document: AppendDoctypeToDocumentCallback,
|
||||
add_attrs_if_missing_callback: AddAttrsIfMissingCallback,
|
||||
get_template_contents_callback: GetTemplateContentsCallback,
|
||||
remove_from_parent_callback: RemoveFromParentCallback,
|
||||
reparent_children_callback: ReparentChildrenCallback,
|
||||
append_before_sibling_callback: AppendBeforeSiblingCallback,
|
||||
append_based_on_parent_node_callback: AppendBasedOnParentNodeCallback,
|
||||
) -> () {
|
||||
if xml.is_null() || len == 0 {
|
||||
return ();
|
||||
}
|
||||
|
||||
let arena = typed_arena::Arena::new();
|
||||
|
||||
let sink = sink::Sink {
|
||||
ctx: ctx,
|
||||
arena: &arena,
|
||||
document: document,
|
||||
quirks_mode: Cell::new(QuirksMode::NoQuirks),
|
||||
pop_callback: pop_callback,
|
||||
append_callback: append_callback,
|
||||
get_data_callback: get_data_callback,
|
||||
parse_error_callback: parse_error_callback,
|
||||
create_element_callback: create_element_callback,
|
||||
create_comment_callback: create_comment_callback,
|
||||
create_processing_instruction: create_processing_instruction,
|
||||
append_doctype_to_document: append_doctype_to_document,
|
||||
add_attrs_if_missing_callback: add_attrs_if_missing_callback,
|
||||
get_template_contents_callback: get_template_contents_callback,
|
||||
remove_from_parent_callback: remove_from_parent_callback,
|
||||
reparent_children_callback: reparent_children_callback,
|
||||
append_before_sibling_callback: append_before_sibling_callback,
|
||||
append_based_on_parent_node_callback: append_based_on_parent_node_callback,
|
||||
};
|
||||
|
||||
let bytes = unsafe { std::slice::from_raw_parts(xml, len) };
|
||||
xml5ever::driver::parse_document(sink, xml5ever::driver::XmlParseOpts::default())
|
||||
.from_utf8()
|
||||
.one(bytes);
|
||||
}
|
||||
|
||||
Reference in New Issue
Block a user