mirror of
https://github.com/lightpanda-io/browser.git
synced 2026-02-04 06:23:45 +00:00
Merge pull request #1313 from lightpanda-io/nikneym/xml-parsing
Support XML parsing
This commit is contained in:
@@ -98,6 +98,29 @@ pub fn parse(self: *Parser, html: []const u8) void {
|
|||||||
);
|
);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
pub fn parseXML(self: *Parser, xml: []const u8) void {
|
||||||
|
h5e.xml5ever_parse_document(
|
||||||
|
xml.ptr,
|
||||||
|
xml.len,
|
||||||
|
&self.container,
|
||||||
|
self,
|
||||||
|
createElementCallback,
|
||||||
|
getDataCallback,
|
||||||
|
appendCallback,
|
||||||
|
parseErrorCallback,
|
||||||
|
popCallback,
|
||||||
|
createCommentCallback,
|
||||||
|
createProcessingInstruction,
|
||||||
|
appendDoctypeToDocument,
|
||||||
|
addAttrsIfMissingCallback,
|
||||||
|
getTemplateContentsCallback,
|
||||||
|
removeFromParentCallback,
|
||||||
|
reparentChildrenCallback,
|
||||||
|
appendBeforeSiblingCallback,
|
||||||
|
appendBasedOnParentNodeCallback,
|
||||||
|
);
|
||||||
|
}
|
||||||
|
|
||||||
pub fn parseFragment(self: *Parser, html: []const u8) void {
|
pub fn parseFragment(self: *Parser, html: []const u8) void {
|
||||||
h5e.html5ever_parse_fragment(
|
h5e.html5ever_parse_fragment(
|
||||||
html.ptr,
|
html.ptr,
|
||||||
|
|||||||
@@ -171,3 +171,24 @@ pub const NodeOrText = extern struct {
|
|||||||
text: []const u8,
|
text: []const u8,
|
||||||
};
|
};
|
||||||
};
|
};
|
||||||
|
|
||||||
|
pub extern "c" fn xml5ever_parse_document(
|
||||||
|
html: [*c]const u8,
|
||||||
|
len: usize,
|
||||||
|
doc: *anyopaque,
|
||||||
|
ctx: *anyopaque,
|
||||||
|
createElementCallback: *const fn (ctx: *anyopaque, data: *anyopaque, QualName, AttributeIterator) callconv(.c) ?*anyopaque,
|
||||||
|
elemNameCallback: *const fn (node_ref: *anyopaque) callconv(.c) *anyopaque,
|
||||||
|
appendCallback: *const fn (ctx: *anyopaque, parent_ref: *anyopaque, NodeOrText) callconv(.c) void,
|
||||||
|
parseErrorCallback: *const fn (ctx: *anyopaque, StringSlice) callconv(.c) void,
|
||||||
|
popCallback: *const fn (ctx: *anyopaque, node_ref: *anyopaque) callconv(.c) void,
|
||||||
|
createCommentCallback: *const fn (ctx: *anyopaque, StringSlice) callconv(.c) ?*anyopaque,
|
||||||
|
createProcessingInstruction: *const fn (ctx: *anyopaque, StringSlice, StringSlice) callconv(.c) ?*anyopaque,
|
||||||
|
appendDoctypeToDocument: *const fn (ctx: *anyopaque, StringSlice, StringSlice, StringSlice) callconv(.c) void,
|
||||||
|
addAttrsIfMissingCallback: *const fn (ctx: *anyopaque, target_ref: *anyopaque, AttributeIterator) callconv(.c) void,
|
||||||
|
getTemplateContentsCallback: *const fn (ctx: *anyopaque, target_ref: *anyopaque) callconv(.c) ?*anyopaque,
|
||||||
|
removeFromParentCallback: *const fn (ctx: *anyopaque, target_ref: *anyopaque) callconv(.c) void,
|
||||||
|
reparentChildrenCallback: *const fn (ctx: *anyopaque, node_ref: *anyopaque, new_parent_ref: *anyopaque) callconv(.c) void,
|
||||||
|
appendBeforeSiblingCallback: *const fn (ctx: *anyopaque, sibling_ref: *anyopaque, NodeOrText) callconv(.c) void,
|
||||||
|
appendBasedOnParentNodeCallback: *const fn (ctx: *anyopaque, element_ref: *anyopaque, prev_element_ref: *anyopaque, NodeOrText) callconv(.c) void,
|
||||||
|
) void;
|
||||||
|
|||||||
@@ -107,19 +107,6 @@
|
|||||||
}
|
}
|
||||||
</script>
|
</script>
|
||||||
|
|
||||||
<script id=unsupportedMimeType>
|
|
||||||
{
|
|
||||||
const parser = new DOMParser();
|
|
||||||
|
|
||||||
// Should throw an error for unsupported MIME types
|
|
||||||
testing.withError((err) => {
|
|
||||||
testing.expectEqual('NotSupported', err.message);
|
|
||||||
}, () => {
|
|
||||||
parser.parseFromString('<div>test</div>', 'application/xml');
|
|
||||||
});
|
|
||||||
}
|
|
||||||
</script>
|
|
||||||
|
|
||||||
<script id=getElementById>
|
<script id=getElementById>
|
||||||
{
|
{
|
||||||
const doc = new DOMParser().parseFromString('<div id="new-node">new-node</div>', 'text/html');
|
const doc = new DOMParser().parseFromString('<div id="new-node">new-node</div>', 'text/html');
|
||||||
@@ -244,3 +231,161 @@
|
|||||||
testing.expectEqual('<html><head></head><body>spice</body></html>', new DOMParser().parseFromString('spice', "text/html").documentElement.outerHTML);
|
testing.expectEqual('<html><head></head><body>spice</body></html>', new DOMParser().parseFromString('spice', "text/html").documentElement.outerHTML);
|
||||||
testing.expectEqual('<html><head></head><body></body></html>', new DOMParser().parseFromString('<html></html>', "text/html").documentElement.outerHTML);
|
testing.expectEqual('<html><head></head><body></body></html>', new DOMParser().parseFromString('<html></html>', "text/html").documentElement.outerHTML);
|
||||||
</script>
|
</script>
|
||||||
|
|
||||||
|
<script id=parse-xml>
|
||||||
|
{
|
||||||
|
const sampleXML = `<?xml version="1.0"?>
|
||||||
|
<catalog>
|
||||||
|
<book id="bk101">
|
||||||
|
<author>Gambardella, Matthew</author>
|
||||||
|
<title>XML Developer's Guide</title>
|
||||||
|
<genre>Computer</genre>
|
||||||
|
<price>44.95</price>
|
||||||
|
<publish_date>2000-10-01</publish_date>
|
||||||
|
<description>An in-depth look at creating applications
|
||||||
|
with XML.</description>
|
||||||
|
</book>
|
||||||
|
<book id="bk102">
|
||||||
|
<author>Ralls, Kim</author>
|
||||||
|
<title>Midnight Rain</title>
|
||||||
|
<genre>Fantasy</genre>
|
||||||
|
<price>5.95</price>
|
||||||
|
<publish_date>2000-12-16</publish_date>
|
||||||
|
<description>A former architect battles corporate zombies,
|
||||||
|
an evil sorceress, and her own childhood to become queen
|
||||||
|
of the world.</description>
|
||||||
|
</book>
|
||||||
|
<book id="bk103">
|
||||||
|
<author>Corets, Eva</author>
|
||||||
|
<title>Maeve Ascendant</title>
|
||||||
|
<genre>Fantasy</genre>
|
||||||
|
<price>5.95</price>
|
||||||
|
<publish_date>2000-11-17</publish_date>
|
||||||
|
<description>After the collapse of a nanotechnology
|
||||||
|
society in England, the young survivors lay the
|
||||||
|
foundation for a new society.</description>
|
||||||
|
</book>
|
||||||
|
<book id="bk104">
|
||||||
|
<author>Corets, Eva</author>
|
||||||
|
<title>Oberon's Legacy</title>
|
||||||
|
<genre>Fantasy</genre>
|
||||||
|
<price>5.95</price>
|
||||||
|
<publish_date>2001-03-10</publish_date>
|
||||||
|
<description>In post-apocalypse England, the mysterious
|
||||||
|
agent known only as Oberon helps to create a new life
|
||||||
|
for the inhabitants of London. Sequel to Maeve
|
||||||
|
Ascendant.</description>
|
||||||
|
</book>
|
||||||
|
<book id="bk105">
|
||||||
|
<author>Corets, Eva</author>
|
||||||
|
<title>The Sundered Grail</title>
|
||||||
|
<genre>Fantasy</genre>
|
||||||
|
<price>5.95</price>
|
||||||
|
<publish_date>2001-09-10</publish_date>
|
||||||
|
<description>The two daughters of Maeve, half-sisters,
|
||||||
|
battle one another for control of England. Sequel to
|
||||||
|
Oberon's Legacy.</description>
|
||||||
|
</book>
|
||||||
|
<book id="bk106">
|
||||||
|
<author>Randall, Cynthia</author>
|
||||||
|
<title>Lover Birds</title>
|
||||||
|
<genre>Romance</genre>
|
||||||
|
<price>4.95</price>
|
||||||
|
<publish_date>2000-09-02</publish_date>
|
||||||
|
<description>When Carla meets Paul at an ornithology
|
||||||
|
conference, tempers fly as feathers get ruffled.</description>
|
||||||
|
</book>
|
||||||
|
<book id="bk107">
|
||||||
|
<author>Thurman, Paula</author>
|
||||||
|
<title>Splish Splash</title>
|
||||||
|
<genre>Romance</genre>
|
||||||
|
<price>4.95</price>
|
||||||
|
<publish_date>2000-11-02</publish_date>
|
||||||
|
<description>A deep sea diver finds true love twenty
|
||||||
|
thousand leagues beneath the sea.</description>
|
||||||
|
</book>
|
||||||
|
<book id="bk108">
|
||||||
|
<author>Knorr, Stefan</author>
|
||||||
|
<title>Creepy Crawlies</title>
|
||||||
|
<genre>Horror</genre>
|
||||||
|
<price>4.95</price>
|
||||||
|
<publish_date>2000-12-06</publish_date>
|
||||||
|
<description>An anthology of horror stories about roaches,
|
||||||
|
centipedes, scorpions and other insects.</description>
|
||||||
|
</book>
|
||||||
|
<book id="bk109">
|
||||||
|
<author>Kress, Peter</author>
|
||||||
|
<title>Paradox Lost</title>
|
||||||
|
<genre>Science Fiction</genre>
|
||||||
|
<price>6.95</price>
|
||||||
|
<publish_date>2000-11-02</publish_date>
|
||||||
|
<description>After an inadvertant trip through a Heisenberg
|
||||||
|
Uncertainty Device, James Salway discovers the problems
|
||||||
|
of being quantum.</description>
|
||||||
|
</book>
|
||||||
|
<book id="bk110">
|
||||||
|
<author>O'Brien, Tim</author>
|
||||||
|
<title>Microsoft .NET: The Programming Bible</title>
|
||||||
|
<genre>Computer</genre>
|
||||||
|
<price>36.95</price>
|
||||||
|
<publish_date>2000-12-09</publish_date>
|
||||||
|
<description>Microsoft's .NET initiative is explored in
|
||||||
|
detail in this deep programmer's reference.</description>
|
||||||
|
</book>
|
||||||
|
<book id="bk111">
|
||||||
|
<author>O'Brien, Tim</author>
|
||||||
|
<title>MSXML3: A Comprehensive Guide</title>
|
||||||
|
<genre>Computer</genre>
|
||||||
|
<price>36.95</price>
|
||||||
|
<publish_date>2000-12-01</publish_date>
|
||||||
|
<description>The Microsoft MSXML3 parser is covered in
|
||||||
|
detail, with attention to XML DOM interfaces, XSLT processing,
|
||||||
|
SAX and more.</description>
|
||||||
|
</book>
|
||||||
|
<book id="bk112">
|
||||||
|
<author>Galos, Mike</author>
|
||||||
|
<title>Visual Studio 7: A Comprehensive Guide</title>
|
||||||
|
<genre>Computer</genre>
|
||||||
|
<price>49.95</price>
|
||||||
|
<publish_date>2001-04-16</publish_date>
|
||||||
|
<description>Microsoft Visual Studio 7 is explored in depth,
|
||||||
|
looking at how Visual Basic, Visual C++, C#, and ASP+ are
|
||||||
|
integrated into a comprehensive development
|
||||||
|
environment.</description>
|
||||||
|
</book>
|
||||||
|
</catalog>`;
|
||||||
|
|
||||||
|
const parser = new DOMParser();
|
||||||
|
const mimes = [
|
||||||
|
"text/xml",
|
||||||
|
"application/xml",
|
||||||
|
"application/xhtml+xml",
|
||||||
|
"image/svg+xml",
|
||||||
|
];
|
||||||
|
|
||||||
|
for (const mime of mimes) {
|
||||||
|
const doc = parser.parseFromString(sampleXML, "text/xml");
|
||||||
|
const { firstChild: { childNodes, children: collection, tagName }, children } = doc;
|
||||||
|
// doc.
|
||||||
|
testing.expectEqual(true, doc instanceof XMLDocument);
|
||||||
|
testing.expectEqual(1, children.length);
|
||||||
|
// firstChild.
|
||||||
|
// TODO: Modern browsers expect this in lowercase.
|
||||||
|
testing.expectEqual("CATALOG", tagName);
|
||||||
|
testing.expectEqual(25, childNodes.length);
|
||||||
|
testing.expectEqual(12, collection.length);
|
||||||
|
// Check children of first child.
|
||||||
|
for (let i = 0; i < collection.length; i++) {
|
||||||
|
const {children: elements, id} = collection.item(i);
|
||||||
|
testing.expectEqual("bk" + (100 + i + 1), id);
|
||||||
|
// TODO: Modern browsers expect these in lowercase.
|
||||||
|
testing.expectEqual("AUTHOR", elements.item(0).tagName);
|
||||||
|
testing.expectEqual("TITLE", elements.item(1).tagName);
|
||||||
|
testing.expectEqual("GENRE", elements.item(2).tagName);
|
||||||
|
testing.expectEqual("PRICE", elements.item(3).tagName);
|
||||||
|
testing.expectEqual("PUBLISH_DATE", elements.item(4).tagName);
|
||||||
|
testing.expectEqual("DESCRIPTION", elements.item(5).tagName);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
</script>
|
||||||
|
|||||||
@@ -19,8 +19,13 @@
|
|||||||
const std = @import("std");
|
const std = @import("std");
|
||||||
|
|
||||||
const js = @import("../js/js.zig");
|
const js = @import("../js/js.zig");
|
||||||
|
|
||||||
const Page = @import("../Page.zig");
|
const Page = @import("../Page.zig");
|
||||||
|
const Parser = @import("../parser/Parser.zig");
|
||||||
|
|
||||||
const HTMLDocument = @import("HTMLDocument.zig");
|
const HTMLDocument = @import("HTMLDocument.zig");
|
||||||
|
const XMLDocument = @import("XMLDocument.zig");
|
||||||
|
const ProcessingInstruction = @import("../webapi/cdata/ProcessingInstruction.zig");
|
||||||
|
|
||||||
const DOMParser = @This();
|
const DOMParser = @This();
|
||||||
|
|
||||||
@@ -28,14 +33,27 @@ pub fn init() DOMParser {
|
|||||||
return .{};
|
return .{};
|
||||||
}
|
}
|
||||||
|
|
||||||
pub fn parseFromString(self: *const DOMParser, html: []const u8, mime_type: []const u8, page: *Page) !*HTMLDocument {
|
pub const HTMLDocumentOrXMLDocument = union(enum) {
|
||||||
_ = self;
|
html_document: *HTMLDocument,
|
||||||
|
xml_document: *XMLDocument,
|
||||||
|
};
|
||||||
|
|
||||||
// For now, only support text/html
|
pub fn parseFromString(
|
||||||
if (!std.mem.eql(u8, mime_type, "text/html")) {
|
_: *const DOMParser,
|
||||||
return error.NotSupported;
|
html: []const u8,
|
||||||
}
|
mime_type: []const u8,
|
||||||
|
page: *Page,
|
||||||
|
) !HTMLDocumentOrXMLDocument {
|
||||||
|
const maybe_target_mime = std.meta.stringToEnum(enum {
|
||||||
|
@"text/html",
|
||||||
|
@"text/xml",
|
||||||
|
@"application/xml",
|
||||||
|
@"application/xhtml+xml",
|
||||||
|
@"image/svg+xml",
|
||||||
|
}, mime_type);
|
||||||
|
|
||||||
|
if (maybe_target_mime) |target_mime| switch (target_mime) {
|
||||||
|
.@"text/html" => {
|
||||||
// Create a new HTMLDocument
|
// Create a new HTMLDocument
|
||||||
const doc = try page._factory.document(HTMLDocument{
|
const doc = try page._factory.document(HTMLDocument{
|
||||||
._proto = undefined,
|
._proto = undefined,
|
||||||
@@ -47,7 +65,6 @@ pub fn parseFromString(self: *const DOMParser, html: []const u8, mime_type: []co
|
|||||||
}
|
}
|
||||||
|
|
||||||
// Parse HTML into the document
|
// Parse HTML into the document
|
||||||
const Parser = @import("../parser/Parser.zig");
|
|
||||||
var parser = Parser.init(page.arena, doc.asNode(), page);
|
var parser = Parser.init(page.arena, doc.asNode(), page);
|
||||||
parser.parse(normalized);
|
parser.parse(normalized);
|
||||||
|
|
||||||
@@ -55,7 +72,39 @@ pub fn parseFromString(self: *const DOMParser, html: []const u8, mime_type: []co
|
|||||||
return pe.err;
|
return pe.err;
|
||||||
}
|
}
|
||||||
|
|
||||||
return doc;
|
return .{ .html_document = doc };
|
||||||
|
},
|
||||||
|
else => {
|
||||||
|
// Create a new XMLDocument.
|
||||||
|
const doc = try page._factory.document(XMLDocument{
|
||||||
|
._proto = undefined,
|
||||||
|
});
|
||||||
|
|
||||||
|
// Parse XML into XMLDocument.
|
||||||
|
const doc_node = doc.asNode();
|
||||||
|
var parser = Parser.init(page.arena, doc_node, page);
|
||||||
|
parser.parseXML(html);
|
||||||
|
|
||||||
|
if (parser.err) |pe| {
|
||||||
|
return pe.err;
|
||||||
|
}
|
||||||
|
|
||||||
|
// If first node is a `ProcessingInstruction`, skip it.
|
||||||
|
const first_child = doc_node.firstChild() orelse {
|
||||||
|
// Parsing should fail if there aren't any nodes.
|
||||||
|
unreachable;
|
||||||
|
};
|
||||||
|
|
||||||
|
if (first_child.getNodeType() == 7) {
|
||||||
|
// We're sure that firstChild exist, this cannot fail.
|
||||||
|
_ = doc_node.removeChild(first_child, page) catch unreachable;
|
||||||
|
}
|
||||||
|
|
||||||
|
return .{ .xml_document = doc };
|
||||||
|
},
|
||||||
|
};
|
||||||
|
|
||||||
|
return error.NotSupported;
|
||||||
}
|
}
|
||||||
|
|
||||||
pub const JsApi = struct {
|
pub const JsApi = struct {
|
||||||
|
|||||||
11
src/html5ever/Cargo.lock
generated
11
src/html5ever/Cargo.lock
generated
@@ -72,6 +72,7 @@ dependencies = [
|
|||||||
"tikv-jemalloc-ctl",
|
"tikv-jemalloc-ctl",
|
||||||
"tikv-jemallocator",
|
"tikv-jemallocator",
|
||||||
"typed-arena",
|
"typed-arena",
|
||||||
|
"xml5ever",
|
||||||
]
|
]
|
||||||
|
|
||||||
[[package]]
|
[[package]]
|
||||||
@@ -476,3 +477,13 @@ name = "windows_x86_64_msvc"
|
|||||||
version = "0.52.6"
|
version = "0.52.6"
|
||||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||||
checksum = "589f6da84c646204747d1270a2a5661ea66ed1cced2631d546fdfb155959f9ec"
|
checksum = "589f6da84c646204747d1270a2a5661ea66ed1cced2631d546fdfb155959f9ec"
|
||||||
|
|
||||||
|
[[package]]
|
||||||
|
name = "xml5ever"
|
||||||
|
version = "0.35.0"
|
||||||
|
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||||
|
checksum = "ee3f1e41afb31a75aef076563b0ad3ecc24f5bd9d12a72b132222664eb76b494"
|
||||||
|
dependencies = [
|
||||||
|
"log",
|
||||||
|
"markup5ever",
|
||||||
|
]
|
||||||
|
|||||||
@@ -14,6 +14,7 @@ string_cache = "0.9.0"
|
|||||||
typed-arena = "2.0.2"
|
typed-arena = "2.0.2"
|
||||||
tikv-jemallocator = {version = "0.6.0", features = ["stats"]}
|
tikv-jemallocator = {version = "0.6.0", features = ["stats"]}
|
||||||
tikv-jemalloc-ctl = {version = "0.6.0", features = ["stats"]}
|
tikv-jemalloc-ctl = {version = "0.6.0", features = ["stats"]}
|
||||||
|
xml5ever = "0.35.0"
|
||||||
|
|
||||||
[profile.release]
|
[profile.release]
|
||||||
lto = true
|
lto = true
|
||||||
|
|||||||
@@ -16,20 +16,20 @@
|
|||||||
// You should have received a copy of the GNU Affero General Public License
|
// You should have received a copy of the GNU Affero General Public License
|
||||||
// along with this program. If not, see <https://www.gnu.org/licenses/>.
|
// along with this program. If not, see <https://www.gnu.org/licenses/>.
|
||||||
|
|
||||||
mod types;
|
|
||||||
mod sink;
|
mod sink;
|
||||||
|
mod types;
|
||||||
|
|
||||||
#[cfg(debug_assertions)]
|
#[cfg(debug_assertions)]
|
||||||
#[global_allocator]
|
#[global_allocator]
|
||||||
static GLOBAL: tikv_jemallocator::Jemalloc = tikv_jemallocator::Jemalloc;
|
static GLOBAL: tikv_jemallocator::Jemalloc = tikv_jemallocator::Jemalloc;
|
||||||
|
|
||||||
use types::*;
|
|
||||||
use std::cell::Cell;
|
use std::cell::Cell;
|
||||||
use std::os::raw::{c_uchar, c_void};
|
use std::os::raw::{c_uchar, c_void};
|
||||||
|
use types::*;
|
||||||
|
|
||||||
use html5ever::{parse_document, parse_fragment, QualName, LocalName, ns, ParseOpts, Parser};
|
|
||||||
use html5ever::tendril::{TendrilSink, StrTendril};
|
|
||||||
use html5ever::interface::tree_builder::QuirksMode;
|
use html5ever::interface::tree_builder::QuirksMode;
|
||||||
|
use html5ever::tendril::{StrTendril, TendrilSink};
|
||||||
|
use html5ever::{ns, parse_document, parse_fragment, LocalName, ParseOpts, Parser, QualName};
|
||||||
|
|
||||||
#[no_mangle]
|
#[no_mangle]
|
||||||
pub extern "C" fn html5ever_parse_document(
|
pub extern "C" fn html5ever_parse_document(
|
||||||
@@ -135,7 +135,8 @@ pub extern "C" fn html5ever_parse_fragment(
|
|||||||
|
|
||||||
let bytes = unsafe { std::slice::from_raw_parts(html, len) };
|
let bytes = unsafe { std::slice::from_raw_parts(html, len) };
|
||||||
parse_fragment(
|
parse_fragment(
|
||||||
sink, Default::default(),
|
sink,
|
||||||
|
Default::default(),
|
||||||
QualName::new(None, ns!(html), LocalName::from("body")),
|
QualName::new(None, ns!(html), LocalName::from("body")),
|
||||||
vec![], // attributes
|
vec![], // attributes
|
||||||
false, // context_element_allows_scripting
|
false, // context_element_allows_scripting
|
||||||
@@ -182,7 +183,7 @@ pub struct Memory {
|
|||||||
#[cfg(debug_assertions)]
|
#[cfg(debug_assertions)]
|
||||||
#[no_mangle]
|
#[no_mangle]
|
||||||
pub extern "C" fn html5ever_get_memory_usage() -> Memory {
|
pub extern "C" fn html5ever_get_memory_usage() -> Memory {
|
||||||
use tikv_jemalloc_ctl::{stats, epoch};
|
use tikv_jemalloc_ctl::{epoch, stats};
|
||||||
|
|
||||||
// many statistics are cached and only updated when the epoch is advanced.
|
// many statistics are cached and only updated when the epoch is advanced.
|
||||||
epoch::advance().unwrap();
|
epoch::advance().unwrap();
|
||||||
@@ -190,7 +191,7 @@ pub extern "C" fn html5ever_get_memory_usage() -> Memory {
|
|||||||
return Memory {
|
return Memory {
|
||||||
resident: stats::resident::read().unwrap(),
|
resident: stats::resident::read().unwrap(),
|
||||||
allocated: stats::allocated::read().unwrap(),
|
allocated: stats::allocated::read().unwrap(),
|
||||||
}
|
};
|
||||||
}
|
}
|
||||||
|
|
||||||
// Streaming parser API
|
// Streaming parser API
|
||||||
@@ -225,9 +226,8 @@ pub extern "C" fn html5ever_streaming_parser_create(
|
|||||||
// SAFETY: We're creating a self-referential structure here.
|
// SAFETY: We're creating a self-referential structure here.
|
||||||
// The arena is stored in the StreamingParser and lives as long as the parser.
|
// The arena is stored in the StreamingParser and lives as long as the parser.
|
||||||
// The sink contains a reference to the arena that's valid for the parser's lifetime.
|
// The sink contains a reference to the arena that's valid for the parser's lifetime.
|
||||||
let arena_ref: &'static typed_arena::Arena<sink::ElementData> = unsafe {
|
let arena_ref: &'static typed_arena::Arena<sink::ElementData> =
|
||||||
std::mem::transmute(arena.as_ref())
|
unsafe { std::mem::transmute(arena.as_ref()) };
|
||||||
};
|
|
||||||
|
|
||||||
let sink = sink::Sink {
|
let sink = sink::Sink {
|
||||||
ctx: ctx,
|
ctx: ctx,
|
||||||
@@ -281,7 +281,8 @@ pub extern "C" fn html5ever_streaming_parser_feed(
|
|||||||
|
|
||||||
// Feed the chunk to the parser
|
// Feed the chunk to the parser
|
||||||
// The Parser implements TendrilSink, so we can call process() on it
|
// The Parser implements TendrilSink, so we can call process() on it
|
||||||
let parser = streaming_parser.parser
|
let parser = streaming_parser
|
||||||
|
.parser
|
||||||
.downcast_mut::<Parser<sink::Sink>>()
|
.downcast_mut::<Parser<sink::Sink>>()
|
||||||
.expect("Invalid parser type");
|
.expect("Invalid parser type");
|
||||||
|
|
||||||
@@ -304,7 +305,8 @@ pub extern "C" fn html5ever_streaming_parser_finish(parser_ptr: *mut c_void) {
|
|||||||
let streaming_parser = unsafe { Box::from_raw(parser_ptr as *mut StreamingParser) };
|
let streaming_parser = unsafe { Box::from_raw(parser_ptr as *mut StreamingParser) };
|
||||||
|
|
||||||
// Extract and finish the parser
|
// Extract and finish the parser
|
||||||
let parser = streaming_parser.parser
|
let parser = streaming_parser
|
||||||
|
.parser
|
||||||
.downcast::<Parser<sink::Sink>>()
|
.downcast::<Parser<sink::Sink>>()
|
||||||
.expect("Invalid parser type");
|
.expect("Invalid parser type");
|
||||||
|
|
||||||
@@ -326,3 +328,57 @@ pub extern "C" fn html5ever_streaming_parser_destroy(parser_ptr: *mut c_void) {
|
|||||||
let _ = Box::from_raw(parser_ptr as *mut StreamingParser);
|
let _ = Box::from_raw(parser_ptr as *mut StreamingParser);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
#[no_mangle]
|
||||||
|
pub extern "C" fn xml5ever_parse_document(
|
||||||
|
xml: *mut c_uchar,
|
||||||
|
len: usize,
|
||||||
|
document: Ref,
|
||||||
|
ctx: Ref,
|
||||||
|
create_element_callback: CreateElementCallback,
|
||||||
|
get_data_callback: GetDataCallback,
|
||||||
|
append_callback: AppendCallback,
|
||||||
|
parse_error_callback: ParseErrorCallback,
|
||||||
|
pop_callback: PopCallback,
|
||||||
|
create_comment_callback: CreateCommentCallback,
|
||||||
|
create_processing_instruction: CreateProcessingInstruction,
|
||||||
|
append_doctype_to_document: AppendDoctypeToDocumentCallback,
|
||||||
|
add_attrs_if_missing_callback: AddAttrsIfMissingCallback,
|
||||||
|
get_template_contents_callback: GetTemplateContentsCallback,
|
||||||
|
remove_from_parent_callback: RemoveFromParentCallback,
|
||||||
|
reparent_children_callback: ReparentChildrenCallback,
|
||||||
|
append_before_sibling_callback: AppendBeforeSiblingCallback,
|
||||||
|
append_based_on_parent_node_callback: AppendBasedOnParentNodeCallback,
|
||||||
|
) -> () {
|
||||||
|
if xml.is_null() || len == 0 {
|
||||||
|
return ();
|
||||||
|
}
|
||||||
|
|
||||||
|
let arena = typed_arena::Arena::new();
|
||||||
|
|
||||||
|
let sink = sink::Sink {
|
||||||
|
ctx: ctx,
|
||||||
|
arena: &arena,
|
||||||
|
document: document,
|
||||||
|
quirks_mode: Cell::new(QuirksMode::NoQuirks),
|
||||||
|
pop_callback: pop_callback,
|
||||||
|
append_callback: append_callback,
|
||||||
|
get_data_callback: get_data_callback,
|
||||||
|
parse_error_callback: parse_error_callback,
|
||||||
|
create_element_callback: create_element_callback,
|
||||||
|
create_comment_callback: create_comment_callback,
|
||||||
|
create_processing_instruction: create_processing_instruction,
|
||||||
|
append_doctype_to_document: append_doctype_to_document,
|
||||||
|
add_attrs_if_missing_callback: add_attrs_if_missing_callback,
|
||||||
|
get_template_contents_callback: get_template_contents_callback,
|
||||||
|
remove_from_parent_callback: remove_from_parent_callback,
|
||||||
|
reparent_children_callback: reparent_children_callback,
|
||||||
|
append_before_sibling_callback: append_before_sibling_callback,
|
||||||
|
append_based_on_parent_node_callback: append_based_on_parent_node_callback,
|
||||||
|
};
|
||||||
|
|
||||||
|
let bytes = unsafe { std::slice::from_raw_parts(xml, len) };
|
||||||
|
xml5ever::driver::parse_document(sink, xml5ever::driver::XmlParseOpts::default())
|
||||||
|
.from_utf8()
|
||||||
|
.one(bytes);
|
||||||
|
}
|
||||||
|
|||||||
Reference in New Issue
Block a user