move html5ever/ under vendor/

This commit is contained in:
Halil Durak
2025-11-25 12:42:43 +03:00
parent 71af78caea
commit 23e3a1d012
5 changed files with 0 additions and 0 deletions

478
vendor/html5ever/Cargo.lock generated vendored Normal file
View File

@@ -0,0 +1,478 @@
# This file is automatically @generated by Cargo.
# It is not intended for manual editing.
version = 4
[[package]]
name = "autocfg"
version = "1.4.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "ace50bade8e6234aa140d9a2f552bbee1db4d353f69b8217bc503490fc1a9f26"
[[package]]
name = "bitflags"
version = "2.9.1"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "1b8e56985ec62d17e9c1001dc89c88ecd7dc08e47eba5ec7c29c7b5eeecde967"
[[package]]
name = "cc"
version = "1.2.39"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "e1354349954c6fc9cb0deab020f27f783cf0b604e8bb754dc4658ecf0d29c35f"
dependencies = [
"find-msvc-tools",
"shlex",
]
[[package]]
name = "cfg-if"
version = "1.0.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "baf1de4339761588bc0619e3cbc0120ee582ebb74b53b4efbf79117bd2da40fd"
[[package]]
name = "find-msvc-tools"
version = "0.1.2"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "1ced73b1dacfc750a6db6c0a0c3a3853c8b41997e2e2c563dc90804ae6867959"
[[package]]
name = "futf"
version = "0.1.5"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "df420e2e84819663797d1ec6544b13c5be84629e7bb00dc960d6917db2987843"
dependencies = [
"mac",
"new_debug_unreachable",
]
[[package]]
name = "html5ever"
version = "0.35.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "55d958c2f74b664487a2035fe1dadb032c48718a03b63f3ab0b8537db8549ed4"
dependencies = [
"log",
"markup5ever",
"match_token",
]
[[package]]
name = "libc"
version = "0.2.172"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "d750af042f7ef4f724306de029d18836c26c1765a54a6a3f094cbd23a7267ffa"
[[package]]
name = "litefetch-html5ever"
version = "0.1.0"
dependencies = [
"html5ever",
"string_cache 0.9.0",
"tikv-jemalloc-ctl",
"tikv-jemallocator",
"typed-arena",
]
[[package]]
name = "lock_api"
version = "0.4.13"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "96936507f153605bddfcda068dd804796c84324ed2510809e5b2a624c81da765"
dependencies = [
"autocfg",
"scopeguard",
]
[[package]]
name = "log"
version = "0.4.27"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "13dc2df351e3202783a1fe0d44375f7295ffb4049267b0f3018346dc122a1d94"
[[package]]
name = "mac"
version = "0.1.1"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "c41e0c4fef86961ac6d6f8a82609f55f31b05e4fce149ac5710e439df7619ba4"
[[package]]
name = "markup5ever"
version = "0.35.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "311fe69c934650f8f19652b3946075f0fc41ad8757dbb68f1ca14e7900ecc1c3"
dependencies = [
"log",
"tendril",
"web_atoms",
]
[[package]]
name = "match_token"
version = "0.35.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "ac84fd3f360fcc43dc5f5d186f02a94192761a080e8bc58621ad4d12296a58cf"
dependencies = [
"proc-macro2",
"quote",
"syn",
]
[[package]]
name = "new_debug_unreachable"
version = "1.0.6"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "650eef8c711430f1a879fdd01d4745a7deea475becfb90269c06775983bbf086"
[[package]]
name = "parking_lot"
version = "0.12.4"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "70d58bf43669b5795d1576d0641cfb6fbb2057bf629506267a92807158584a13"
dependencies = [
"lock_api",
"parking_lot_core",
]
[[package]]
name = "parking_lot_core"
version = "0.9.11"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "bc838d2a56b5b1a6c25f55575dfc605fabb63bb2365f6c2353ef9159aa69e4a5"
dependencies = [
"cfg-if",
"libc",
"redox_syscall",
"smallvec",
"windows-targets",
]
[[package]]
name = "paste"
version = "1.0.15"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "57c0d7b74b563b49d38dae00a0c37d4d6de9b432382b2892f0574ddcae73fd0a"
[[package]]
name = "phf"
version = "0.11.3"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "1fd6780a80ae0c52cc120a26a1a42c1ae51b247a253e4e06113d23d2c2edd078"
dependencies = [
"phf_shared 0.11.3",
]
[[package]]
name = "phf_codegen"
version = "0.11.3"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "aef8048c789fa5e851558d709946d6d79a8ff88c0440c587967f8e94bfb1216a"
dependencies = [
"phf_generator",
"phf_shared 0.11.3",
]
[[package]]
name = "phf_generator"
version = "0.11.3"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "3c80231409c20246a13fddb31776fb942c38553c51e871f8cbd687a4cfb5843d"
dependencies = [
"phf_shared 0.11.3",
"rand",
]
[[package]]
name = "phf_shared"
version = "0.11.3"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "67eabc2ef2a60eb7faa00097bd1ffdb5bd28e62bf39990626a582201b7a754e5"
dependencies = [
"siphasher",
]
[[package]]
name = "phf_shared"
version = "0.13.1"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "e57fef6bc5981e38c2ce2d63bfa546861309f875b8a75f092d1d54ae2d64f266"
dependencies = [
"siphasher",
]
[[package]]
name = "precomputed-hash"
version = "0.1.1"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "925383efa346730478fb4838dbe9137d2a47675ad789c546d150a6e1dd4ab31c"
[[package]]
name = "proc-macro2"
version = "1.0.95"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "02b3e5e68a3a1a02aad3ec490a98007cbc13c37cbe84a3cd7b8e406d76e7f778"
dependencies = [
"unicode-ident",
]
[[package]]
name = "quote"
version = "1.0.40"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "1885c039570dc00dcb4ff087a89e185fd56bae234ddc7f056a945bf36467248d"
dependencies = [
"proc-macro2",
]
[[package]]
name = "rand"
version = "0.8.5"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "34af8d1a0e25924bc5b7c43c079c942339d8f0a8b57c39049bef581b46327404"
dependencies = [
"rand_core",
]
[[package]]
name = "rand_core"
version = "0.6.4"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "ec0be4795e2f6a28069bec0b5ff3e2ac9bafc99e6a9a7dc3547996c5c816922c"
[[package]]
name = "redox_syscall"
version = "0.5.12"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "928fca9cf2aa042393a8325b9ead81d2f0df4cb12e1e24cef072922ccd99c5af"
dependencies = [
"bitflags",
]
[[package]]
name = "scopeguard"
version = "1.2.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "94143f37725109f92c262ed2cf5e59bce7498c01bcc1502d7b9afe439a4e9f49"
[[package]]
name = "serde"
version = "1.0.219"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "5f0e2c6ed6606019b4e29e69dbaba95b11854410e5347d525002456dbbb786b6"
dependencies = [
"serde_derive",
]
[[package]]
name = "serde_derive"
version = "1.0.219"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "5b0276cf7f2c73365f7157c8123c21cd9a50fbbd844757af28ca1f5925fc2a00"
dependencies = [
"proc-macro2",
"quote",
"syn",
]
[[package]]
name = "shlex"
version = "1.3.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "0fda2ff0d084019ba4d7c6f371c95d8fd75ce3524c3cb8fb653a3023f6323e64"
[[package]]
name = "siphasher"
version = "1.0.1"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "56199f7ddabf13fe5074ce809e7d3f42b42ae711800501b5b16ea82ad029c39d"
[[package]]
name = "smallvec"
version = "1.15.1"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "67b1b7a3b5fe4f1376887184045fcf45c69e92af734b7aaddc05fb777b6fbd03"
[[package]]
name = "string_cache"
version = "0.8.9"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "bf776ba3fa74f83bf4b63c3dcbbf82173db2632ed8452cb2d891d33f459de70f"
dependencies = [
"new_debug_unreachable",
"parking_lot",
"phf_shared 0.11.3",
"precomputed-hash",
"serde",
]
[[package]]
name = "string_cache"
version = "0.9.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "a18596f8c785a729f2819c0f6a7eae6ebeebdfffbfe4214ae6b087f690e31901"
dependencies = [
"new_debug_unreachable",
"parking_lot",
"phf_shared 0.13.1",
"precomputed-hash",
"serde",
]
[[package]]
name = "string_cache_codegen"
version = "0.5.4"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "c711928715f1fe0fe509c53b43e993a9a557babc2d0a3567d0a3006f1ac931a0"
dependencies = [
"phf_generator",
"phf_shared 0.11.3",
"proc-macro2",
"quote",
]
[[package]]
name = "syn"
version = "2.0.101"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "8ce2b7fc941b3a24138a0a7cf8e858bfc6a992e7978a068a5c760deb0ed43caf"
dependencies = [
"proc-macro2",
"quote",
"unicode-ident",
]
[[package]]
name = "tendril"
version = "0.4.3"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "d24a120c5fc464a3458240ee02c299ebcb9d67b5249c8848b09d639dca8d7bb0"
dependencies = [
"futf",
"mac",
"utf-8",
]
[[package]]
name = "tikv-jemalloc-ctl"
version = "0.6.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "f21f216790c8df74ce3ab25b534e0718da5a1916719771d3fec23315c99e468b"
dependencies = [
"libc",
"paste",
"tikv-jemalloc-sys",
]
[[package]]
name = "tikv-jemalloc-sys"
version = "0.6.0+5.3.0-1-ge13ca993e8ccb9ba9847cc330696e02839f328f7"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "cd3c60906412afa9c2b5b5a48ca6a5abe5736aec9eb48ad05037a677e52e4e2d"
dependencies = [
"cc",
"libc",
]
[[package]]
name = "tikv-jemallocator"
version = "0.6.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "4cec5ff18518d81584f477e9bfdf957f5bb0979b0bac3af4ca30b5b3ae2d2865"
dependencies = [
"libc",
"tikv-jemalloc-sys",
]
[[package]]
name = "typed-arena"
version = "2.0.2"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "6af6ae20167a9ece4bcb41af5b80f8a1f1df981f6391189ce00fd257af04126a"
[[package]]
name = "unicode-ident"
version = "1.0.18"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "5a5f39404a5da50712a4c1eecf25e90dd62b613502b7e925fd4e4d19b5c96512"
[[package]]
name = "utf-8"
version = "0.7.6"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "09cc8ee72d2a9becf2f2febe0205bbed8fc6615b7cb429ad062dc7b7ddd036a9"
[[package]]
name = "web_atoms"
version = "0.1.3"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "57ffde1dc01240bdf9992e3205668b235e59421fd085e8a317ed98da0178d414"
dependencies = [
"phf",
"phf_codegen",
"string_cache 0.8.9",
"string_cache_codegen",
]
[[package]]
name = "windows-targets"
version = "0.52.6"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "9b724f72796e036ab90c1021d4780d4d3d648aca59e491e6b98e725b84e99973"
dependencies = [
"windows_aarch64_gnullvm",
"windows_aarch64_msvc",
"windows_i686_gnu",
"windows_i686_gnullvm",
"windows_i686_msvc",
"windows_x86_64_gnu",
"windows_x86_64_gnullvm",
"windows_x86_64_msvc",
]
[[package]]
name = "windows_aarch64_gnullvm"
version = "0.52.6"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "32a4622180e7a0ec044bb555404c800bc9fd9ec262ec147edd5989ccd0c02cd3"
[[package]]
name = "windows_aarch64_msvc"
version = "0.52.6"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "09ec2a7bb152e2252b53fa7803150007879548bc709c039df7627cabbd05d469"
[[package]]
name = "windows_i686_gnu"
version = "0.52.6"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "8e9b5ad5ab802e97eb8e295ac6720e509ee4c243f69d781394014ebfe8bbfa0b"
[[package]]
name = "windows_i686_gnullvm"
version = "0.52.6"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "0eee52d38c090b3caa76c563b86c3a4bd71ef1a819287c19d586d7334ae8ed66"
[[package]]
name = "windows_i686_msvc"
version = "0.52.6"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "240948bc05c5e7c6dabba28bf89d89ffce3e303022809e73deaefe4f6ec56c66"
[[package]]
name = "windows_x86_64_gnu"
version = "0.52.6"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "147a5c80aabfbf0c7d901cb5895d1de30ef2907eb21fbbab29ca94c5b08b1a78"
[[package]]
name = "windows_x86_64_gnullvm"
version = "0.52.6"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "24d5b23dc417412679681396f2b49f3de8c1473deb516bd34410872eff51ed0d"
[[package]]
name = "windows_x86_64_msvc"
version = "0.52.6"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "589f6da84c646204747d1270a2a5661ea66ed1cced2631d546fdfb155959f9ec"

20
vendor/html5ever/Cargo.toml vendored Normal file
View File

@@ -0,0 +1,20 @@
[package]
name = "litefetch-html5ever"
version = "0.1.0"
edition = "2021"
[lib]
name = "litefetch_html5ever"
path = "lib.rs"
crate-type = ["cdylib", "staticlib"]
[dependencies]
html5ever = "0.35.0"
string_cache = "0.9.0"
typed-arena = "2.0.2"
tikv-jemallocator = {version = "0.6.0", features = ["stats"]}
tikv-jemalloc-ctl = {version = "0.6.0", features = ["stats"]}
[profile.release]
lto = true
codegen-units = 1

303
vendor/html5ever/lib.rs vendored Normal file
View File

@@ -0,0 +1,303 @@
// Copyright (C) 2023-2025 Lightpanda (Selecy SAS)
//
// Francis Bouvier <francis@lightpanda.io>
// Pierre Tachoire <pierre@lightpanda.io>
//
// This program is free software: you can redistribute it and/or modify
// it under the terms of the GNU Affero General Public License as
// published by the Free Software Foundation, either version 3 of the
// License, or (at your option) any later version.
//
// This program is distributed in the hope that it will be useful,
// but WITHOUT ANY WARRANTY; without even the implied warranty of
// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
// GNU Affero General Public License for more details.
//
// You should have received a copy of the GNU Affero General Public License
// along with this program. If not, see <https://www.gnu.org/licenses/>.
mod types;
mod sink;
#[cfg(debug_assertions)]
#[global_allocator]
static GLOBAL: tikv_jemallocator::Jemalloc = tikv_jemallocator::Jemalloc;
use types::*;
use std::cell::Cell;
use std::os::raw::{c_uchar, c_void};
use html5ever::{parse_document, parse_fragment, QualName, LocalName, ns, ParseOpts, Parser};
use html5ever::tendril::{TendrilSink, StrTendril};
use html5ever::interface::tree_builder::QuirksMode;
#[no_mangle]
pub extern "C" fn html5ever_parse_document(
html: *mut c_uchar,
len: usize,
document: Ref,
ctx: Ref,
create_element_callback: CreateElementCallback,
get_data_callback: GetDataCallback,
append_callback: AppendCallback,
parse_error_callback: ParseErrorCallback,
pop_callback: PopCallback,
create_comment_callback: CreateCommentCallback,
append_doctype_to_document: AppendDoctypeToDocumentCallback,
add_attrs_if_missing_callback: AddAttrsIfMissingCallback,
get_template_contents_callback: GetTemplateContentsCallback,
remove_from_parent_callback: RemoveFromParentCallback,
reparent_children_callback: ReparentChildrenCallback,
) -> () {
if html.is_null() || len == 0 {
return ();
}
let arena = typed_arena::Arena::new();
let sink = sink::Sink {
ctx: ctx,
arena: &arena,
document: document,
quirks_mode: Cell::new(QuirksMode::NoQuirks),
pop_callback: pop_callback,
append_callback: append_callback,
get_data_callback: get_data_callback,
parse_error_callback: parse_error_callback,
create_element_callback: create_element_callback,
create_comment_callback: create_comment_callback,
append_doctype_to_document: append_doctype_to_document,
add_attrs_if_missing_callback: add_attrs_if_missing_callback,
get_template_contents_callback: get_template_contents_callback,
remove_from_parent_callback: remove_from_parent_callback,
reparent_children_callback: reparent_children_callback,
};
let bytes = unsafe { std::slice::from_raw_parts(html, len) };
parse_document(sink, Default::default())
.from_utf8()
.one(bytes);
}
#[no_mangle]
pub extern "C" fn html5ever_parse_fragment(
html: *mut c_uchar,
len: usize,
document: Ref,
ctx: Ref,
create_element_callback: CreateElementCallback,
get_data_callback: GetDataCallback,
append_callback: AppendCallback,
parse_error_callback: ParseErrorCallback,
pop_callback: PopCallback,
create_comment_callback: CreateCommentCallback,
append_doctype_to_document: AppendDoctypeToDocumentCallback,
add_attrs_if_missing_callback: AddAttrsIfMissingCallback,
get_template_contents_callback: GetTemplateContentsCallback,
remove_from_parent_callback: RemoveFromParentCallback,
reparent_children_callback: ReparentChildrenCallback,
) -> () {
if html.is_null() || len == 0 {
return ();
}
let arena = typed_arena::Arena::new();
let sink = sink::Sink {
ctx: ctx,
arena: &arena,
document: document,
quirks_mode: Cell::new(QuirksMode::NoQuirks),
pop_callback: pop_callback,
append_callback: append_callback,
get_data_callback: get_data_callback,
parse_error_callback: parse_error_callback,
create_element_callback: create_element_callback,
create_comment_callback: create_comment_callback,
append_doctype_to_document: append_doctype_to_document,
add_attrs_if_missing_callback: add_attrs_if_missing_callback,
get_template_contents_callback: get_template_contents_callback,
remove_from_parent_callback: remove_from_parent_callback,
reparent_children_callback: reparent_children_callback,
};
let bytes = unsafe { std::slice::from_raw_parts(html, len) };
parse_fragment(
sink, Default::default(),
QualName::new(None, ns!(html), LocalName::from("body")),
vec![], // attributes
false, // context_element_allows_scripting
)
.from_utf8()
.one(bytes);
}
#[no_mangle]
pub extern "C" fn html5ever_attribute_iterator_next(
c_iter: *const c_void,
) -> CNullable<CAttribute> {
let iter: &mut CAttributeIterator = unsafe { &mut *(c_iter as *mut CAttributeIterator) };
let pos = iter.pos;
if pos == iter.vec.len() {
return CNullable::<CAttribute>::none();
}
let attr = &iter.vec[pos];
iter.pos += 1;
return CNullable::<CAttribute>::some(CAttribute {
name: CQualName::create(&attr.name),
value: StringSlice {
ptr: attr.value.as_ptr(),
len: attr.value.len(),
},
});
}
#[no_mangle]
pub extern "C" fn html5ever_attribute_iterator_count(c_iter: *const c_void) -> usize {
let iter: &mut CAttributeIterator = unsafe { &mut *(c_iter as *mut CAttributeIterator) };
return iter.vec.len();
}
#[cfg(debug_assertions)]
#[repr(C)]
pub struct Memory {
pub resident: usize,
pub allocated: usize,
}
#[cfg(debug_assertions)]
#[no_mangle]
pub extern "C" fn html5ever_get_memory_usage() -> Memory {
use tikv_jemalloc_ctl::{stats, epoch};
// many statistics are cached and only updated when the epoch is advanced.
epoch::advance().unwrap();
return Memory{
resident: stats::resident::read().unwrap(),
allocated: stats::allocated::read().unwrap(),
}
}
// Streaming parser API
// The Parser type from html5ever implements TendrilSink and supports streaming
pub struct StreamingParser {
#[allow(dead_code)]
arena: Box<typed_arena::Arena<sink::ElementData>>,
parser: Box<dyn std::any::Any>,
}
#[no_mangle]
pub extern "C" fn html5ever_streaming_parser_create(
document: Ref,
ctx: Ref,
create_element_callback: CreateElementCallback,
get_data_callback: GetDataCallback,
append_callback: AppendCallback,
parse_error_callback: ParseErrorCallback,
pop_callback: PopCallback,
create_comment_callback: CreateCommentCallback,
append_doctype_to_document: AppendDoctypeToDocumentCallback,
add_attrs_if_missing_callback: AddAttrsIfMissingCallback,
get_template_contents_callback: GetTemplateContentsCallback,
remove_from_parent_callback: RemoveFromParentCallback,
reparent_children_callback: ReparentChildrenCallback,
) -> *mut c_void {
let arena = Box::new(typed_arena::Arena::new());
// SAFETY: We're creating a self-referential structure here.
// The arena is stored in the StreamingParser and lives as long as the parser.
// The sink contains a reference to the arena that's valid for the parser's lifetime.
let arena_ref: &'static typed_arena::Arena<sink::ElementData> = unsafe {
std::mem::transmute(arena.as_ref())
};
let sink = sink::Sink {
ctx: ctx,
arena: arena_ref,
document: document,
quirks_mode: Cell::new(QuirksMode::NoQuirks),
pop_callback: pop_callback,
append_callback: append_callback,
get_data_callback: get_data_callback,
parse_error_callback: parse_error_callback,
create_element_callback: create_element_callback,
create_comment_callback: create_comment_callback,
append_doctype_to_document: append_doctype_to_document,
add_attrs_if_missing_callback: add_attrs_if_missing_callback,
get_template_contents_callback: get_template_contents_callback,
remove_from_parent_callback: remove_from_parent_callback,
reparent_children_callback: reparent_children_callback,
};
// Create a parser which implements TendrilSink for streaming parsing
let parser = parse_document(sink, ParseOpts::default());
let streaming_parser = Box::new(StreamingParser {
arena,
parser: Box::new(parser),
});
return Box::into_raw(streaming_parser) as *mut c_void;
}
#[no_mangle]
pub extern "C" fn html5ever_streaming_parser_feed(
parser_ptr: *mut c_void,
html: *const c_uchar,
len: usize,
) {
if parser_ptr.is_null() || html.is_null() || len == 0 {
return;
}
let streaming_parser = unsafe { &mut *(parser_ptr as *mut StreamingParser) };
let bytes = unsafe { std::slice::from_raw_parts(html, len) };
// Convert bytes to UTF-8 string
if let Ok(s) = std::str::from_utf8(bytes) {
let tendril = StrTendril::from(s);
// Feed the chunk to the parser
// The Parser implements TendrilSink, so we can call process() on it
let parser = streaming_parser.parser
.downcast_mut::<Parser<sink::Sink>>()
.expect("Invalid parser type");
parser.process(tendril);
}
}
#[no_mangle]
pub extern "C" fn html5ever_streaming_parser_finish(parser_ptr: *mut c_void) {
if parser_ptr.is_null() {
return;
}
let streaming_parser = unsafe { Box::from_raw(parser_ptr as *mut StreamingParser) };
// Extract and finish the parser
let parser = streaming_parser.parser
.downcast::<Parser<sink::Sink>>()
.expect("Invalid parser type");
// Finish consumes the parser, which will call finish() on the sink
parser.finish();
// Note: The arena will be dropped here automatically
}
#[no_mangle]
pub extern "C" fn html5ever_streaming_parser_destroy(parser_ptr: *mut c_void) {
if parser_ptr.is_null() {
return;
}
// Drop the parser box without finishing
// This is for cases where you want to cancel parsing
unsafe {
let _ = Box::from_raw(parser_ptr as *mut StreamingParser);
}
}

243
vendor/html5ever/sink.rs vendored Normal file
View File

@@ -0,0 +1,243 @@
// Copyright (C) 2023-2025 Lightpanda (Selecy SAS)
//
// Francis Bouvier <francis@lightpanda.io>
// Pierre Tachoire <pierre@lightpanda.io>
//
// This program is free software: you can redistribute it and/or modify
// it under the terms of the GNU Affero General Public License as
// published by the Free Software Foundation, either version 3 of the
// License, or (at your option) any later version.
//
// This program is distributed in the hope that it will be useful,
// but WITHOUT ANY WARRANTY; without even the implied warranty of
// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
// GNU Affero General Public License for more details.
//
// You should have received a copy of the GNU Affero General Public License
// along with this program. If not, see <https://www.gnu.org/licenses/>.
use std::ptr;
use std::cell::Cell;
use std::borrow::Cow;
use std::os::raw::{c_void};
use crate::types::*;
use html5ever::tendril::{StrTendril};
use html5ever::{Attribute, QualName};
use html5ever::interface::tree_builder::{ElementFlags, NodeOrText, QuirksMode, TreeSink};
type Arena<'arena> = &'arena typed_arena::Arena<ElementData>;
// Made public so it can be used from lib.rs
pub struct ElementData {
pub qname: QualName,
pub mathml_annotation_xml_integration_point: bool,
}
impl ElementData {
fn new(qname: QualName, flags: ElementFlags) -> Self {
return Self {
qname: qname,
mathml_annotation_xml_integration_point: flags.mathml_annotation_xml_integration_point,
};
}
}
pub struct Sink<'arena> {
pub ctx: Ref,
pub document: Ref,
pub arena: Arena<'arena>,
pub quirks_mode: Cell<QuirksMode>,
pub pop_callback: PopCallback,
pub append_callback: AppendCallback,
pub get_data_callback: GetDataCallback,
pub parse_error_callback: ParseErrorCallback,
pub create_element_callback: CreateElementCallback,
pub create_comment_callback: CreateCommentCallback,
pub append_doctype_to_document: AppendDoctypeToDocumentCallback,
pub add_attrs_if_missing_callback: AddAttrsIfMissingCallback,
pub get_template_contents_callback: GetTemplateContentsCallback,
pub remove_from_parent_callback: RemoveFromParentCallback,
pub reparent_children_callback: ReparentChildrenCallback,
}
impl<'arena> TreeSink for Sink<'arena> {
type Handle = *const c_void;
type Output = ();
type ElemName<'a>
= &'a QualName
where
Self: 'a;
fn finish(self) -> () {
return ();
}
fn parse_error(&self, err: Cow<'static, str>) {
unsafe {
(self.parse_error_callback)(
self.ctx,
StringSlice {
ptr: err.as_ptr(),
len: err.len(),
},
);
}
}
fn get_document(&self) -> *const c_void {
return self.document;
}
fn set_quirks_mode(&self, mode: QuirksMode) {
self.quirks_mode.set(mode);
}
fn same_node(&self, x: &Ref, y: &Ref) -> bool {
ptr::eq::<c_void>(*x, *y)
}
fn elem_name(&self, target: &Ref) -> Self::ElemName<'_> {
let opaque = unsafe { (self.get_data_callback)(*target) };
let data = opaque as *mut ElementData;
return unsafe { &(*data).qname };
}
fn get_template_contents(&self, target: &Ref) -> Ref {
unsafe {
return (self.get_template_contents_callback)(self.ctx, *target);
}
}
fn is_mathml_annotation_xml_integration_point(&self, target: &Ref) -> bool {
let opaque = unsafe { (self.get_data_callback)(*target) };
let data = opaque as *mut ElementData;
return unsafe { (*data).mathml_annotation_xml_integration_point };
}
fn pop(&self, node: &Ref) {
unsafe {
(self.pop_callback)(self.ctx, *node);
}
}
fn create_element(&self, name: QualName, attrs: Vec<Attribute>, flags: ElementFlags) -> Ref {
let data = self.arena.alloc(ElementData::new(name.clone(), flags));
unsafe {
let mut attribute_iterator = CAttributeIterator { vec: attrs, pos: 0 };
return (self.create_element_callback)(
self.ctx,
data as *mut _ as *mut c_void,
CQualName::create(&name),
&mut attribute_iterator as *mut _ as *mut c_void,
);
}
}
fn create_comment(&self, txt: StrTendril) -> Ref {
let str = StringSlice{ ptr: txt.as_ptr(), len: txt.len()};
unsafe {
return (self.create_comment_callback)(self.ctx, str);
}
}
fn create_pi(&self, target: StrTendril, data: StrTendril) -> Ref {
_ = target;
_ = data;
panic!("create_pi");
}
fn append(&self, parent: &Ref, child: NodeOrText<Ref>) {
match child {
NodeOrText::AppendText(ref t) => {
// The child exists for the duration of the append_callback call,
// but sometimes the memory on the Zig side, in append_callback,
// is zeroed. If you try to refactor this code a bit, and do:
// unsafe {
// (self.append_callback)(self.ctx, *parent, CNodeOrText::create(child));
// }
// Where CNodeOrText::create returns the property CNodeOrText,
// you'll occasionally see that zeroed memory. Makes no sense to
// me, but a far as I can tell, this version works.
let byte_slice = t.as_ref().as_bytes();
let static_slice: &'static [u8] = unsafe {
std::mem::transmute(byte_slice)
};
unsafe {
(self.append_callback)(self.ctx, *parent, CNodeOrText{
tag: 1,
node: ptr::null(),
text: StringSlice { ptr: static_slice.as_ptr(), len: static_slice.len()},
});
};
},
NodeOrText::AppendNode(node) => {
unsafe {
(self.append_callback)(self.ctx, *parent, CNodeOrText{
tag: 0,
node: node,
text: StringSlice::default()
});
};
}
}
}
fn append_before_sibling(&self, sibling: &Ref, child: NodeOrText<Ref>) {
_ = sibling;
_ = child;
panic!("append_before_sibling");
}
fn append_based_on_parent_node(
&self,
element: &Ref,
prev_element: &Ref,
child: NodeOrText<Ref>,
) {
_ = element;
_ = prev_element;
_ = child;
panic!("append_based_on_parent_node");
}
fn append_doctype_to_document(
&self,
name: StrTendril,
public_id: StrTendril,
system_id: StrTendril,
) {
let name_str = StringSlice{ ptr: name.as_ptr(), len: name.len()};
let public_id_str = StringSlice{ ptr: public_id.as_ptr(), len: public_id.len()};
let system_id_str = StringSlice{ ptr: system_id.as_ptr(), len: system_id.len()};
unsafe {
(self.append_doctype_to_document)(self.ctx, name_str, public_id_str, system_id_str);
}
}
fn add_attrs_if_missing(&self, target: &Ref, attrs: Vec<Attribute>) {
unsafe {
let mut attribute_iterator = CAttributeIterator { vec: attrs, pos: 0 };
(self.add_attrs_if_missing_callback)(
self.ctx,
*target,
&mut attribute_iterator as *mut _ as *mut c_void,
);
}
}
fn remove_from_parent(&self, target: &Ref) {
unsafe {
(self.remove_from_parent_callback)(self.ctx, *target);
}
}
fn reparent_children(&self, node: &Ref, new_parent: &Ref) {
unsafe {
(self.reparent_children_callback)(self.ctx, *node, *new_parent);
}
}
}

149
vendor/html5ever/types.rs vendored Normal file
View File

@@ -0,0 +1,149 @@
// Copyright (C) 2023-2025 Lightpanda (Selecy SAS)
//
// Francis Bouvier <francis@lightpanda.io>
// Pierre Tachoire <pierre@lightpanda.io>
//
// This program is free software: you can redistribute it and/or modify
// it under the terms of the GNU Affero General Public License as
// published by the Free Software Foundation, either version 3 of the
// License, or (at your option) any later version.
//
// This program is distributed in the hope that it will be useful,
// but WITHOUT ANY WARRANTY; without even the implied warranty of
// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
// GNU Affero General Public License for more details.
//
// You should have received a copy of the GNU Affero General Public License
// along with this program. If not, see <https://www.gnu.org/licenses/>.
use std::ptr;
use html5ever::{QualName, Attribute};
use std::os::raw::{c_uchar, c_void};
pub type CreateElementCallback = unsafe extern "C" fn(
ctx: Ref,
data: *const c_void,
name: CQualName,
attributes: *mut c_void,
) -> Ref;
pub type CreateCommentCallback = unsafe extern "C" fn(
ctx: Ref,
str: StringSlice,
) -> Ref;
pub type AppendDoctypeToDocumentCallback = unsafe extern "C" fn(
ctx: Ref,
name: StringSlice,
public_id: StringSlice,
system_id: StringSlice,
) -> ();
pub type GetDataCallback = unsafe extern "C" fn(ctx: Ref) -> *mut c_void;
pub type AppendCallback = unsafe extern "C" fn(
ctx: Ref,
parent: Ref,
node_or_text: CNodeOrText
) -> ();
pub type ParseErrorCallback = unsafe extern "C" fn(ctx: Ref, str: StringSlice) -> ();
pub type PopCallback = unsafe extern "C" fn(ctx: Ref, node: Ref) -> ();
pub type AddAttrsIfMissingCallback = unsafe extern "C" fn(
ctx: Ref,
target: Ref,
attributes: *mut c_void,
) -> ();
pub type GetTemplateContentsCallback = unsafe extern "C" fn(ctx: Ref, target: Ref) -> Ref;
pub type RemoveFromParentCallback = unsafe extern "C" fn(ctx: Ref, target: Ref) -> ();
pub type ReparentChildrenCallback = unsafe extern "C" fn(ctx: Ref, node: Ref, new_parent: Ref) -> ();
pub type Ref = *const c_void;
#[repr(C)]
pub struct CNullable<T> {
tag: u8, // 0 = None, 1 = Some
value: T,
}
impl<T: Default> CNullable<T> {
pub fn none() -> CNullable<T> {
return Self{tag: 0, value: T::default()};
}
pub fn some(v: T) -> CNullable<T> {
return Self{tag: 1, value: v};
}
}
#[repr(C)]
pub struct Slice<T> {
pub ptr: *const T,
pub len: usize,
}
impl<T> Default for Slice<T> {
fn default() -> Self {
return Self{ptr: ptr::null(), len: 0};
}
}
pub type StringSlice = Slice<c_uchar>;
#[repr(C)]
pub struct CQualName {
prefix: CNullable<StringSlice>,
ns: StringSlice,
local: StringSlice,
}
impl CQualName {
pub fn create(q: &QualName) -> Self {
let ns = StringSlice { ptr: q.ns.as_ptr(), len: q.ns.len()};
let local = StringSlice { ptr: q.local.as_ptr(), len: q.local.len()};
let prefix = match &q.prefix {
None => CNullable::<StringSlice>::none(),
Some(prefix) => CNullable::<StringSlice>::some(StringSlice { ptr: prefix.as_ptr(), len: prefix.len()}),
};
return CQualName{
// inner: q as *const _ as *const c_void,
ns: ns,
local: local,
prefix: prefix,
};
}
}
impl Default for CQualName {
fn default() -> Self {
return Self{
prefix: CNullable::<StringSlice>::none(),
ns: StringSlice::default(),
local: StringSlice::default(),
};
}
}
#[repr(C)]
pub struct CAttribute {
pub name: CQualName,
pub value: StringSlice,
}
impl Default for CAttribute {
fn default() -> Self {
return Self{name: CQualName::default(), value: StringSlice::default()};
}
}
pub struct CAttributeIterator {
pub vec: Vec<Attribute>,
pub pos: usize,
}
#[repr(C)]
pub struct CNodeOrText {
pub tag: u8, // 0 = node, 1 = text
pub node: Ref,
pub text: StringSlice,
}