mirror of
https://github.com/lightpanda-io/browser.git
synced 2025-10-28 22:53:28 +00:00
Add a --noscript option to "improve" --dump
Currently, fetch --dump includes <script> tag (either inline or with src). I don't know what use-case this is the desired behavior. Excluding them, via the new --noscript option has benefit that if you --dump --noscript and open the resulting page in the browser, you don't re-execute JavaScript, which is likely to break the page. For example, opening a --dump of github makes it look like the page is broken because it re-executes JavaScript that isn't meant to be re-executed. Similarly, opening a --dump in a browser might execute JavaScript that lightpanda browser failed to execute, making it looks like it worked better than it did.
This commit is contained in:
@@ -110,13 +110,13 @@ pub const Element = struct {
|
||||
|
||||
pub fn get_innerHTML(self: *parser.Element, page: *Page) ![]const u8 {
|
||||
var buf = std.ArrayList(u8).init(page.arena);
|
||||
try dump.writeChildren(parser.elementToNode(self), buf.writer());
|
||||
try dump.writeChildren(parser.elementToNode(self), .{}, buf.writer());
|
||||
return buf.items;
|
||||
}
|
||||
|
||||
pub fn get_outerHTML(self: *parser.Element, page: *Page) ![]const u8 {
|
||||
var buf = std.ArrayList(u8).init(page.arena);
|
||||
try dump.writeNode(parser.elementToNode(self), buf.writer());
|
||||
try dump.writeNode(parser.elementToNode(self), .{}, buf.writer());
|
||||
return buf.items;
|
||||
}
|
||||
|
||||
|
||||
@@ -21,10 +21,14 @@ const std = @import("std");
|
||||
const parser = @import("netsurf.zig");
|
||||
const Walker = @import("dom/walker.zig").WalkerChildren;
|
||||
|
||||
pub const Opts = struct {
|
||||
exclude_scripts: bool = false,
|
||||
};
|
||||
|
||||
// writer must be a std.io.Writer
|
||||
pub fn writeHTML(doc: *parser.Document, writer: anytype) !void {
|
||||
pub fn writeHTML(doc: *parser.Document, opts: Opts, writer: anytype) !void {
|
||||
try writer.writeAll("<!DOCTYPE html>\n");
|
||||
try writeChildren(parser.documentToNode(doc), writer);
|
||||
try writeChildren(parser.documentToNode(doc), opts, writer);
|
||||
try writer.writeAll("\n");
|
||||
}
|
||||
|
||||
@@ -54,10 +58,15 @@ pub fn writeDocType(doc_type: *parser.DocumentType, writer: anytype) !void {
|
||||
try writer.writeAll(">");
|
||||
}
|
||||
|
||||
pub fn writeNode(node: *parser.Node, writer: anytype) anyerror!void {
|
||||
pub fn writeNode(node: *parser.Node, opts: Opts, writer: anytype) anyerror!void {
|
||||
switch (try parser.nodeType(node)) {
|
||||
.element => {
|
||||
// open the tag
|
||||
const tag_type = try parser.elementHTMLGetTagType(@ptrCast(node));
|
||||
if (tag_type == .script and opts.exclude_scripts) {
|
||||
return;
|
||||
}
|
||||
|
||||
const tag = try parser.nodeLocalName(node);
|
||||
try writer.writeAll("<");
|
||||
try writer.writeAll(tag);
|
||||
@@ -82,12 +91,12 @@ pub fn writeNode(node: *parser.Node, writer: anytype) anyerror!void {
|
||||
// void elements can't have any content.
|
||||
if (try isVoid(parser.nodeToElement(node))) return;
|
||||
|
||||
if (try parser.elementHTMLGetTagType(@ptrCast(node)) == .script) {
|
||||
if (tag_type == .script) {
|
||||
try writer.writeAll(try parser.nodeTextContent(node) orelse "");
|
||||
} else {
|
||||
// write the children
|
||||
// TODO avoid recursion
|
||||
try writeChildren(node, writer);
|
||||
try writeChildren(node, opts, writer);
|
||||
}
|
||||
|
||||
// close the tag
|
||||
@@ -129,12 +138,12 @@ pub fn writeNode(node: *parser.Node, writer: anytype) anyerror!void {
|
||||
}
|
||||
|
||||
// writer must be a std.io.Writer
|
||||
pub fn writeChildren(root: *parser.Node, writer: anytype) !void {
|
||||
pub fn writeChildren(root: *parser.Node, opts: Opts, writer: anytype) !void {
|
||||
const walker = Walker{};
|
||||
var next: ?*parser.Node = null;
|
||||
while (true) {
|
||||
next = try walker.get_next(root, next) orelse break;
|
||||
try writeNode(next.?, writer);
|
||||
try writeNode(next.?, opts, writer);
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
@@ -142,7 +142,7 @@ pub const Page = struct {
|
||||
}
|
||||
|
||||
// dump writes the page content into the given file.
|
||||
pub fn dump(self: *const Page, out: std.fs.File) !void {
|
||||
pub fn dump(self: *const Page, opts: Dump.Opts, out: std.fs.File) !void {
|
||||
if (self.raw_data) |raw_data| {
|
||||
// raw_data was set if the document was not HTML, dump the data content only.
|
||||
return try out.writeAll(raw_data);
|
||||
@@ -150,7 +150,7 @@ pub const Page = struct {
|
||||
|
||||
// if the page has a pointer to a document, dumps the HTML.
|
||||
const doc = parser.documentHTMLToDocument(self.window.document);
|
||||
try Dump.writeHTML(doc, out);
|
||||
try Dump.writeHTML(doc, opts, out);
|
||||
}
|
||||
|
||||
pub fn fetchModuleSource(ctx: *anyopaque, src: []const u8) !?[]const u8 {
|
||||
|
||||
@@ -36,9 +36,9 @@ pub const XMLSerializer = struct {
|
||||
pub fn _serializeToString(_: *const XMLSerializer, root: *parser.Node, page: *Page) ![]const u8 {
|
||||
var buf = std.ArrayList(u8).init(page.arena);
|
||||
switch (try parser.nodeType(root)) {
|
||||
.document => try dump.writeHTML(@as(*parser.Document, @ptrCast(root)), buf.writer()),
|
||||
.document => try dump.writeHTML(@as(*parser.Document, @ptrCast(root)), .{}, buf.writer()),
|
||||
.document_type => try dump.writeDocType(@as(*parser.DocumentType, @ptrCast(root)), buf.writer()),
|
||||
else => try dump.writeNode(root, buf.writer()),
|
||||
else => try dump.writeNode(root, .{}, buf.writer()),
|
||||
}
|
||||
return buf.items;
|
||||
}
|
||||
|
||||
14
src/main.zig
14
src/main.zig
@@ -134,7 +134,7 @@ fn run(alloc: Allocator) !void {
|
||||
|
||||
// dump
|
||||
if (opts.dump) {
|
||||
try page.dump(std.io.getStdOut());
|
||||
try page.dump(.{ .exclude_scripts = opts.noscript }, std.io.getStdOut());
|
||||
}
|
||||
},
|
||||
else => unreachable,
|
||||
@@ -212,6 +212,7 @@ const Command = struct {
|
||||
url: []const u8,
|
||||
dump: bool = false,
|
||||
common: Common,
|
||||
noscript: bool = false,
|
||||
};
|
||||
|
||||
const Common = struct {
|
||||
@@ -275,6 +276,7 @@ const Command = struct {
|
||||
\\Options:
|
||||
\\--dump Dumps document to stdout.
|
||||
\\ Defaults to false.
|
||||
\\--noscript Exclude <script> tags in dump. Defaults to false.
|
||||
\\
|
||||
++ common_options ++
|
||||
\\
|
||||
@@ -352,6 +354,9 @@ fn inferMode(opt: []const u8) ?App.RunMode {
|
||||
if (std.mem.eql(u8, opt, "--dump")) {
|
||||
return .fetch;
|
||||
}
|
||||
if (std.mem.eql(u8, opt, "--noscript")) {
|
||||
return .fetch;
|
||||
}
|
||||
if (std.mem.startsWith(u8, opt, "--") == false) {
|
||||
return .fetch;
|
||||
}
|
||||
@@ -437,6 +442,7 @@ fn parseFetchArgs(
|
||||
args: *std.process.ArgIterator,
|
||||
) !Command.Fetch {
|
||||
var dump: bool = false;
|
||||
var noscript: bool = true;
|
||||
var url: ?[]const u8 = null;
|
||||
var common: Command.Common = .{};
|
||||
|
||||
@@ -446,6 +452,11 @@ fn parseFetchArgs(
|
||||
continue;
|
||||
}
|
||||
|
||||
if (std.mem.eql(u8, "--noscript", opt)) {
|
||||
noscript = true;
|
||||
continue;
|
||||
}
|
||||
|
||||
if (try parseCommonArg(allocator, opt, args, &common)) {
|
||||
continue;
|
||||
}
|
||||
@@ -471,6 +482,7 @@ fn parseFetchArgs(
|
||||
.url = url.?,
|
||||
.dump = dump,
|
||||
.common = common,
|
||||
.noscript = noscript,
|
||||
};
|
||||
}
|
||||
|
||||
|
||||
Reference in New Issue
Block a user