mirror of
https://github.com/lightpanda-io/browser.git
synced 2025-10-28 14:43:28 +00:00
Merge pull request #1070 from lightpanda-io/dump_strip_mode
Some checks failed
e2e-test / zig build release (push) Has been cancelled
e2e-test / demo-scripts (push) Has been cancelled
e2e-test / cdp-and-hyperfine-bench (push) Has been cancelled
e2e-test / perf-fmt (push) Has been cancelled
zig-test / zig build dev (push) Has been cancelled
zig-test / browser fetch (push) Has been cancelled
zig-test / zig test (push) Has been cancelled
zig-test / perf-fmt (push) Has been cancelled
Some checks failed
e2e-test / zig build release (push) Has been cancelled
e2e-test / demo-scripts (push) Has been cancelled
e2e-test / cdp-and-hyperfine-bench (push) Has been cancelled
e2e-test / perf-fmt (push) Has been cancelled
zig-test / zig build dev (push) Has been cancelled
zig-test / browser fetch (push) Has been cancelled
zig-test / zig test (push) Has been cancelled
zig-test / perf-fmt (push) Has been cancelled
Replace --noscript with more advanced --strip_mode
This commit is contained in:
@@ -26,7 +26,13 @@ pub const Opts = struct {
|
||||
// set to include element shadowroots in the dump
|
||||
page: ?*const Page = null,
|
||||
|
||||
exclude_scripts: bool = false,
|
||||
strip_mode: StripMode = .{},
|
||||
|
||||
pub const StripMode = struct {
|
||||
js: bool = false,
|
||||
ui: bool = false,
|
||||
css: bool = false,
|
||||
};
|
||||
};
|
||||
|
||||
// writer must be a std.io.Writer
|
||||
@@ -67,7 +73,7 @@ pub fn writeNode(node: *parser.Node, opts: Opts, writer: *std.Io.Writer) anyerro
|
||||
.element => {
|
||||
// open the tag
|
||||
const tag_type = try parser.nodeHTMLGetTagType(node) orelse .undef;
|
||||
if (opts.exclude_scripts and try isScriptOrRelated(tag_type, node)) {
|
||||
if (try isStripped(tag_type, node, opts.strip_mode)) {
|
||||
return;
|
||||
}
|
||||
|
||||
@@ -159,9 +165,22 @@ pub fn writeChildren(root: *parser.Node, opts: Opts, writer: *std.Io.Writer) !vo
|
||||
}
|
||||
}
|
||||
|
||||
// When `exclude_scripts` is passed to dump, we don't include <script> tags.
|
||||
// We also want to omit <link rel=preload as=ascript>
|
||||
fn isScriptOrRelated(tag_type: parser.Tag, node: *parser.Node) !bool {
|
||||
fn isStripped(tag_type: parser.Tag, node: *parser.Node, strip_mode: Opts.StripMode) !bool {
|
||||
if (strip_mode.js and try isJsRelated(tag_type, node)) {
|
||||
return true;
|
||||
}
|
||||
|
||||
if (strip_mode.css and try isCssRelated(tag_type, node)) {
|
||||
return true;
|
||||
}
|
||||
|
||||
if (strip_mode.ui and try isUIRelated(tag_type, node)) {
|
||||
return true;
|
||||
}
|
||||
return false;
|
||||
}
|
||||
|
||||
fn isJsRelated(tag_type: parser.Tag, node: *parser.Node) !bool {
|
||||
if (tag_type == .script) {
|
||||
return true;
|
||||
}
|
||||
@@ -178,6 +197,34 @@ fn isScriptOrRelated(tag_type: parser.Tag, node: *parser.Node) !bool {
|
||||
return false;
|
||||
}
|
||||
|
||||
fn isCssRelated(tag_type: parser.Tag, node: *parser.Node) !bool {
|
||||
if (tag_type == .style) {
|
||||
return true;
|
||||
}
|
||||
if (tag_type == .link) {
|
||||
const el = parser.nodeToElement(node);
|
||||
const rel = try parser.elementGetAttribute(el, "rel") orelse return false;
|
||||
return std.ascii.eqlIgnoreCase(rel, "stylesheet");
|
||||
}
|
||||
return false;
|
||||
}
|
||||
|
||||
fn isUIRelated(tag_type: parser.Tag, node: *parser.Node) !bool {
|
||||
if (try isCssRelated(tag_type, node)) {
|
||||
return true;
|
||||
}
|
||||
if (tag_type == .img or tag_type == .picture or tag_type == .video) {
|
||||
return true;
|
||||
}
|
||||
if (tag_type == .undef) {
|
||||
const name = try parser.nodeLocalName(node);
|
||||
if (std.mem.eql(u8, name, "svg")) {
|
||||
return true;
|
||||
}
|
||||
}
|
||||
return false;
|
||||
}
|
||||
|
||||
// area, base, br, col, embed, hr, img, input, link, meta, source, track, wbr
|
||||
// https://html.spec.whatwg.org/#void-elements
|
||||
fn isVoid(elem: *parser.Element) !bool {
|
||||
|
||||
@@ -181,7 +181,7 @@ pub const Page = struct {
|
||||
// set to include element shadowroots in the dump
|
||||
page: ?*const Page = null,
|
||||
with_base: bool = false,
|
||||
exclude_scripts: bool = false,
|
||||
strip_mode: Dump.Opts.StripMode = .{},
|
||||
};
|
||||
|
||||
// dump writes the page content into the given file.
|
||||
@@ -228,7 +228,7 @@ pub const Page = struct {
|
||||
|
||||
try Dump.writeHTML(doc, .{
|
||||
.page = opts.page,
|
||||
.exclude_scripts = opts.exclude_scripts,
|
||||
.strip_mode = opts.strip_mode,
|
||||
}, out);
|
||||
}
|
||||
|
||||
|
||||
54
src/main.zig
54
src/main.zig
@@ -24,6 +24,7 @@ const log = @import("log.zig");
|
||||
const App = @import("app.zig").App;
|
||||
const Server = @import("server.zig").Server;
|
||||
const Browser = @import("browser/browser.zig").Browser;
|
||||
const DumpStripMode = @import("browser/dump.zig").Opts.StripMode;
|
||||
|
||||
const build_config = @import("build_config");
|
||||
|
||||
@@ -184,7 +185,7 @@ fn run(alloc: Allocator) !void {
|
||||
try page.dump(.{
|
||||
.page = page,
|
||||
.with_base = opts.withbase,
|
||||
.exclude_scripts = opts.noscript,
|
||||
.strip_mode = opts.strip_mode,
|
||||
}, &writer.interface);
|
||||
try writer.interface.flush();
|
||||
}
|
||||
@@ -292,8 +293,8 @@ const Command = struct {
|
||||
url: []const u8,
|
||||
dump: bool = false,
|
||||
common: Common,
|
||||
noscript: bool = false,
|
||||
withbase: bool = false,
|
||||
strip_mode: DumpStripMode = .{},
|
||||
};
|
||||
|
||||
const Common = struct {
|
||||
@@ -372,7 +373,14 @@ const Command = struct {
|
||||
\\Options:
|
||||
\\--dump Dumps document to stdout.
|
||||
\\ Defaults to false.
|
||||
\\--noscript Exclude <script> tags in dump. Defaults to false.
|
||||
\\
|
||||
\\--strip_mode Comma separated list of tag groups to remove from dump
|
||||
\\ the dump. e.g. --strip_mode js,css
|
||||
\\ - "js" script and link[as=script, rel=preload]
|
||||
\\ - "ui" includes img, picture, video, css and svg
|
||||
\\ - "css" includes style and link[rel=stylesheet]
|
||||
\\ - "full" includes js, ui and css
|
||||
\\
|
||||
\\--with_base Add a <base> tag in dump. Defaults to false.
|
||||
\\
|
||||
++ common_options ++
|
||||
@@ -460,6 +468,10 @@ fn inferMode(opt: []const u8) ?App.RunMode {
|
||||
return .fetch;
|
||||
}
|
||||
|
||||
if (std.mem.eql(u8, opt, "--strip_mode")) {
|
||||
return .fetch;
|
||||
}
|
||||
|
||||
if (std.mem.eql(u8, opt, "--with_base")) {
|
||||
return .fetch;
|
||||
}
|
||||
@@ -545,10 +557,10 @@ fn parseFetchArgs(
|
||||
args: *std.process.ArgIterator,
|
||||
) !Command.Fetch {
|
||||
var dump: bool = false;
|
||||
var noscript: bool = false;
|
||||
var withbase: bool = false;
|
||||
var url: ?[]const u8 = null;
|
||||
var common: Command.Common = .{};
|
||||
var strip_mode: DumpStripMode = .{};
|
||||
|
||||
while (args.next()) |opt| {
|
||||
if (std.mem.eql(u8, "--dump", opt)) {
|
||||
@@ -557,7 +569,11 @@ fn parseFetchArgs(
|
||||
}
|
||||
|
||||
if (std.mem.eql(u8, "--noscript", opt)) {
|
||||
noscript = true;
|
||||
log.warn(.app, "deprecation warning", .{
|
||||
.feature = "--noscript argument",
|
||||
.hint = "use '--strip_mode js' instead",
|
||||
});
|
||||
strip_mode.js = true;
|
||||
continue;
|
||||
}
|
||||
|
||||
@@ -566,6 +582,32 @@ fn parseFetchArgs(
|
||||
continue;
|
||||
}
|
||||
|
||||
if (std.mem.eql(u8, "--strip_mode", opt)) {
|
||||
const str = args.next() orelse {
|
||||
log.fatal(.app, "missing argument value", .{ .arg = "--strip_mode" });
|
||||
return error.InvalidArgument;
|
||||
};
|
||||
|
||||
var it = std.mem.splitScalar(u8, str, ',');
|
||||
while (it.next()) |part| {
|
||||
const trimmed = std.mem.trim(u8, part, &std.ascii.whitespace);
|
||||
if (std.mem.eql(u8, trimmed, "js")) {
|
||||
strip_mode.js = true;
|
||||
} else if (std.mem.eql(u8, trimmed, "ui")) {
|
||||
strip_mode.ui = true;
|
||||
} else if (std.mem.eql(u8, trimmed, "css")) {
|
||||
strip_mode.css = true;
|
||||
} else if (std.mem.eql(u8, trimmed, "full")) {
|
||||
strip_mode.js = true;
|
||||
strip_mode.ui = true;
|
||||
strip_mode.css = true;
|
||||
} else {
|
||||
log.fatal(.app, "invalid option choice", .{ .arg = "--strip_mode", .value = trimmed });
|
||||
}
|
||||
}
|
||||
continue;
|
||||
}
|
||||
|
||||
if (try parseCommonArg(allocator, opt, args, &common)) {
|
||||
continue;
|
||||
}
|
||||
@@ -591,8 +633,8 @@ fn parseFetchArgs(
|
||||
.url = url.?,
|
||||
.dump = dump,
|
||||
.common = common,
|
||||
.noscript = noscript,
|
||||
.withbase = withbase,
|
||||
.strip_mode = strip_mode,
|
||||
};
|
||||
}
|
||||
|
||||
|
||||
Reference in New Issue
Block a user