Merge pull request #1070 from lightpanda-io/dump_strip_mode
Some checks failed
e2e-test / zig build release (push) Has been cancelled
e2e-test / demo-scripts (push) Has been cancelled
e2e-test / cdp-and-hyperfine-bench (push) Has been cancelled
e2e-test / perf-fmt (push) Has been cancelled
zig-test / zig build dev (push) Has been cancelled
zig-test / browser fetch (push) Has been cancelled
zig-test / zig test (push) Has been cancelled
zig-test / perf-fmt (push) Has been cancelled

Replace --noscript with more advanced --strip_mode
This commit is contained in:
Karl Seguin
2025-09-19 19:25:06 +08:00
committed by GitHub
3 changed files with 102 additions and 13 deletions

View File

@@ -26,7 +26,13 @@ pub const Opts = struct {
// set to include element shadowroots in the dump
page: ?*const Page = null,
exclude_scripts: bool = false,
strip_mode: StripMode = .{},
pub const StripMode = struct {
js: bool = false,
ui: bool = false,
css: bool = false,
};
};
// writer must be a std.io.Writer
@@ -67,7 +73,7 @@ pub fn writeNode(node: *parser.Node, opts: Opts, writer: *std.Io.Writer) anyerro
.element => {
// open the tag
const tag_type = try parser.nodeHTMLGetTagType(node) orelse .undef;
if (opts.exclude_scripts and try isScriptOrRelated(tag_type, node)) {
if (try isStripped(tag_type, node, opts.strip_mode)) {
return;
}
@@ -159,9 +165,22 @@ pub fn writeChildren(root: *parser.Node, opts: Opts, writer: *std.Io.Writer) !vo
}
}
// When `exclude_scripts` is passed to dump, we don't include <script> tags.
// We also want to omit <link rel=preload as=ascript>
fn isScriptOrRelated(tag_type: parser.Tag, node: *parser.Node) !bool {
fn isStripped(tag_type: parser.Tag, node: *parser.Node, strip_mode: Opts.StripMode) !bool {
if (strip_mode.js and try isJsRelated(tag_type, node)) {
return true;
}
if (strip_mode.css and try isCssRelated(tag_type, node)) {
return true;
}
if (strip_mode.ui and try isUIRelated(tag_type, node)) {
return true;
}
return false;
}
fn isJsRelated(tag_type: parser.Tag, node: *parser.Node) !bool {
if (tag_type == .script) {
return true;
}
@@ -178,6 +197,34 @@ fn isScriptOrRelated(tag_type: parser.Tag, node: *parser.Node) !bool {
return false;
}
fn isCssRelated(tag_type: parser.Tag, node: *parser.Node) !bool {
if (tag_type == .style) {
return true;
}
if (tag_type == .link) {
const el = parser.nodeToElement(node);
const rel = try parser.elementGetAttribute(el, "rel") orelse return false;
return std.ascii.eqlIgnoreCase(rel, "stylesheet");
}
return false;
}
fn isUIRelated(tag_type: parser.Tag, node: *parser.Node) !bool {
if (try isCssRelated(tag_type, node)) {
return true;
}
if (tag_type == .img or tag_type == .picture or tag_type == .video) {
return true;
}
if (tag_type == .undef) {
const name = try parser.nodeLocalName(node);
if (std.mem.eql(u8, name, "svg")) {
return true;
}
}
return false;
}
// area, base, br, col, embed, hr, img, input, link, meta, source, track, wbr
// https://html.spec.whatwg.org/#void-elements
fn isVoid(elem: *parser.Element) !bool {

View File

@@ -181,7 +181,7 @@ pub const Page = struct {
// set to include element shadowroots in the dump
page: ?*const Page = null,
with_base: bool = false,
exclude_scripts: bool = false,
strip_mode: Dump.Opts.StripMode = .{},
};
// dump writes the page content into the given file.
@@ -228,7 +228,7 @@ pub const Page = struct {
try Dump.writeHTML(doc, .{
.page = opts.page,
.exclude_scripts = opts.exclude_scripts,
.strip_mode = opts.strip_mode,
}, out);
}

View File

@@ -24,6 +24,7 @@ const log = @import("log.zig");
const App = @import("app.zig").App;
const Server = @import("server.zig").Server;
const Browser = @import("browser/browser.zig").Browser;
const DumpStripMode = @import("browser/dump.zig").Opts.StripMode;
const build_config = @import("build_config");
@@ -184,7 +185,7 @@ fn run(alloc: Allocator) !void {
try page.dump(.{
.page = page,
.with_base = opts.withbase,
.exclude_scripts = opts.noscript,
.strip_mode = opts.strip_mode,
}, &writer.interface);
try writer.interface.flush();
}
@@ -292,8 +293,8 @@ const Command = struct {
url: []const u8,
dump: bool = false,
common: Common,
noscript: bool = false,
withbase: bool = false,
strip_mode: DumpStripMode = .{},
};
const Common = struct {
@@ -372,7 +373,14 @@ const Command = struct {
\\Options:
\\--dump Dumps document to stdout.
\\ Defaults to false.
\\--noscript Exclude <script> tags in dump. Defaults to false.
\\
\\--strip_mode Comma separated list of tag groups to remove from dump
\\ the dump. e.g. --strip_mode js,css
\\ - "js" script and link[as=script, rel=preload]
\\ - "ui" includes img, picture, video, css and svg
\\ - "css" includes style and link[rel=stylesheet]
\\ - "full" includes js, ui and css
\\
\\--with_base Add a <base> tag in dump. Defaults to false.
\\
++ common_options ++
@@ -460,6 +468,10 @@ fn inferMode(opt: []const u8) ?App.RunMode {
return .fetch;
}
if (std.mem.eql(u8, opt, "--strip_mode")) {
return .fetch;
}
if (std.mem.eql(u8, opt, "--with_base")) {
return .fetch;
}
@@ -545,10 +557,10 @@ fn parseFetchArgs(
args: *std.process.ArgIterator,
) !Command.Fetch {
var dump: bool = false;
var noscript: bool = false;
var withbase: bool = false;
var url: ?[]const u8 = null;
var common: Command.Common = .{};
var strip_mode: DumpStripMode = .{};
while (args.next()) |opt| {
if (std.mem.eql(u8, "--dump", opt)) {
@@ -557,7 +569,11 @@ fn parseFetchArgs(
}
if (std.mem.eql(u8, "--noscript", opt)) {
noscript = true;
log.warn(.app, "deprecation warning", .{
.feature = "--noscript argument",
.hint = "use '--strip_mode js' instead",
});
strip_mode.js = true;
continue;
}
@@ -566,6 +582,32 @@ fn parseFetchArgs(
continue;
}
if (std.mem.eql(u8, "--strip_mode", opt)) {
const str = args.next() orelse {
log.fatal(.app, "missing argument value", .{ .arg = "--strip_mode" });
return error.InvalidArgument;
};
var it = std.mem.splitScalar(u8, str, ',');
while (it.next()) |part| {
const trimmed = std.mem.trim(u8, part, &std.ascii.whitespace);
if (std.mem.eql(u8, trimmed, "js")) {
strip_mode.js = true;
} else if (std.mem.eql(u8, trimmed, "ui")) {
strip_mode.ui = true;
} else if (std.mem.eql(u8, trimmed, "css")) {
strip_mode.css = true;
} else if (std.mem.eql(u8, trimmed, "full")) {
strip_mode.js = true;
strip_mode.ui = true;
strip_mode.css = true;
} else {
log.fatal(.app, "invalid option choice", .{ .arg = "--strip_mode", .value = trimmed });
}
}
continue;
}
if (try parseCommonArg(allocator, opt, args, &common)) {
continue;
}
@@ -591,8 +633,8 @@ fn parseFetchArgs(
.url = url.?,
.dump = dump,
.common = common,
.noscript = noscript,
.withbase = withbase,
.strip_mode = strip_mode,
};
}