mirror of
				https://github.com/lightpanda-io/browser.git
				synced 2025-10-29 15:13:28 +00:00 
			
		
		
		
	Add a --noscript option to "improve" --dump
Currently, fetch --dump includes <script> tag (either inline or with src). I don't know what use-case this is the desired behavior. Excluding them, via the new --noscript option has benefit that if you --dump --noscript and open the resulting page in the browser, you don't re-execute JavaScript, which is likely to break the page. For example, opening a --dump of github makes it look like the page is broken because it re-executes JavaScript that isn't meant to be re-executed. Similarly, opening a --dump in a browser might execute JavaScript that lightpanda browser failed to execute, making it looks like it worked better than it did.
This commit is contained in:
		| @@ -110,13 +110,13 @@ pub const Element = struct { | ||||
|  | ||||
|     pub fn get_innerHTML(self: *parser.Element, page: *Page) ![]const u8 { | ||||
|         var buf = std.ArrayList(u8).init(page.arena); | ||||
|         try dump.writeChildren(parser.elementToNode(self), buf.writer()); | ||||
|         try dump.writeChildren(parser.elementToNode(self), .{}, buf.writer()); | ||||
|         return buf.items; | ||||
|     } | ||||
|  | ||||
|     pub fn get_outerHTML(self: *parser.Element, page: *Page) ![]const u8 { | ||||
|         var buf = std.ArrayList(u8).init(page.arena); | ||||
|         try dump.writeNode(parser.elementToNode(self), buf.writer()); | ||||
|         try dump.writeNode(parser.elementToNode(self), .{}, buf.writer()); | ||||
|         return buf.items; | ||||
|     } | ||||
|  | ||||
|   | ||||
| @@ -21,10 +21,14 @@ const std = @import("std"); | ||||
| const parser = @import("netsurf.zig"); | ||||
| const Walker = @import("dom/walker.zig").WalkerChildren; | ||||
|  | ||||
| pub const Opts = struct { | ||||
|     exclude_scripts: bool = false, | ||||
| }; | ||||
|  | ||||
| // writer must be a std.io.Writer | ||||
| pub fn writeHTML(doc: *parser.Document, writer: anytype) !void { | ||||
| pub fn writeHTML(doc: *parser.Document, opts: Opts, writer: anytype) !void { | ||||
|     try writer.writeAll("<!DOCTYPE html>\n"); | ||||
|     try writeChildren(parser.documentToNode(doc), writer); | ||||
|     try writeChildren(parser.documentToNode(doc), opts, writer); | ||||
|     try writer.writeAll("\n"); | ||||
| } | ||||
|  | ||||
| @@ -54,10 +58,15 @@ pub fn writeDocType(doc_type: *parser.DocumentType, writer: anytype) !void { | ||||
|     try writer.writeAll(">"); | ||||
| } | ||||
|  | ||||
| pub fn writeNode(node: *parser.Node, writer: anytype) anyerror!void { | ||||
| pub fn writeNode(node: *parser.Node, opts: Opts, writer: anytype) anyerror!void { | ||||
|     switch (try parser.nodeType(node)) { | ||||
|         .element => { | ||||
|             // open the tag | ||||
|             const tag_type = try parser.elementHTMLGetTagType(@ptrCast(node)); | ||||
|             if (tag_type == .script and opts.exclude_scripts) { | ||||
|                 return; | ||||
|             } | ||||
|  | ||||
|             const tag = try parser.nodeLocalName(node); | ||||
|             try writer.writeAll("<"); | ||||
|             try writer.writeAll(tag); | ||||
| @@ -82,12 +91,12 @@ pub fn writeNode(node: *parser.Node, writer: anytype) anyerror!void { | ||||
|             // void elements can't have any content. | ||||
|             if (try isVoid(parser.nodeToElement(node))) return; | ||||
|  | ||||
|             if (try parser.elementHTMLGetTagType(@ptrCast(node)) == .script) { | ||||
|             if (tag_type == .script) { | ||||
|                 try writer.writeAll(try parser.nodeTextContent(node) orelse ""); | ||||
|             } else { | ||||
|                 // write the children | ||||
|                 // TODO avoid recursion | ||||
|                 try writeChildren(node, writer); | ||||
|                 try writeChildren(node, opts, writer); | ||||
|             } | ||||
|  | ||||
|             // close the tag | ||||
| @@ -129,12 +138,12 @@ pub fn writeNode(node: *parser.Node, writer: anytype) anyerror!void { | ||||
| } | ||||
|  | ||||
| // writer must be a std.io.Writer | ||||
| pub fn writeChildren(root: *parser.Node, writer: anytype) !void { | ||||
| pub fn writeChildren(root: *parser.Node, opts: Opts, writer: anytype) !void { | ||||
|     const walker = Walker{}; | ||||
|     var next: ?*parser.Node = null; | ||||
|     while (true) { | ||||
|         next = try walker.get_next(root, next) orelse break; | ||||
|         try writeNode(next.?, writer); | ||||
|         try writeNode(next.?, opts, writer); | ||||
|     } | ||||
| } | ||||
|  | ||||
|   | ||||
| @@ -142,7 +142,7 @@ pub const Page = struct { | ||||
|     } | ||||
|  | ||||
|     // dump writes the page content into the given file. | ||||
|     pub fn dump(self: *const Page, out: std.fs.File) !void { | ||||
|     pub fn dump(self: *const Page, opts: Dump.Opts, out: std.fs.File) !void { | ||||
|         if (self.raw_data) |raw_data| { | ||||
|             // raw_data was set if the document was not HTML, dump the data content only. | ||||
|             return try out.writeAll(raw_data); | ||||
| @@ -150,7 +150,7 @@ pub const Page = struct { | ||||
|  | ||||
|         // if the page has a pointer to a document, dumps the HTML. | ||||
|         const doc = parser.documentHTMLToDocument(self.window.document); | ||||
|         try Dump.writeHTML(doc, out); | ||||
|         try Dump.writeHTML(doc, opts, out); | ||||
|     } | ||||
|  | ||||
|     pub fn fetchModuleSource(ctx: *anyopaque, src: []const u8) !?[]const u8 { | ||||
|   | ||||
| @@ -36,9 +36,9 @@ pub const XMLSerializer = struct { | ||||
|     pub fn _serializeToString(_: *const XMLSerializer, root: *parser.Node, page: *Page) ![]const u8 { | ||||
|         var buf = std.ArrayList(u8).init(page.arena); | ||||
|         switch (try parser.nodeType(root)) { | ||||
|             .document => try dump.writeHTML(@as(*parser.Document, @ptrCast(root)), buf.writer()), | ||||
|             .document => try dump.writeHTML(@as(*parser.Document, @ptrCast(root)), .{}, buf.writer()), | ||||
|             .document_type => try dump.writeDocType(@as(*parser.DocumentType, @ptrCast(root)), buf.writer()), | ||||
|             else => try dump.writeNode(root, buf.writer()), | ||||
|             else => try dump.writeNode(root, .{}, buf.writer()), | ||||
|         } | ||||
|         return buf.items; | ||||
|     } | ||||
|   | ||||
							
								
								
									
										14
									
								
								src/main.zig
									
									
									
									
									
								
							
							
						
						
									
										14
									
								
								src/main.zig
									
									
									
									
									
								
							| @@ -134,7 +134,7 @@ fn run(alloc: Allocator) !void { | ||||
|  | ||||
|             // dump | ||||
|             if (opts.dump) { | ||||
|                 try page.dump(std.io.getStdOut()); | ||||
|                 try page.dump(.{ .exclude_scripts = opts.noscript }, std.io.getStdOut()); | ||||
|             } | ||||
|         }, | ||||
|         else => unreachable, | ||||
| @@ -212,6 +212,7 @@ const Command = struct { | ||||
|         url: []const u8, | ||||
|         dump: bool = false, | ||||
|         common: Common, | ||||
|         noscript: bool = false, | ||||
|     }; | ||||
|  | ||||
|     const Common = struct { | ||||
| @@ -275,6 +276,7 @@ const Command = struct { | ||||
|             \\Options: | ||||
|             \\--dump          Dumps document to stdout. | ||||
|             \\                Defaults to false. | ||||
|             \\--noscript      Exclude <script> tags in dump. Defaults to false. | ||||
|             \\ | ||||
|         ++ common_options ++ | ||||
|             \\ | ||||
| @@ -352,6 +354,9 @@ fn inferMode(opt: []const u8) ?App.RunMode { | ||||
|     if (std.mem.eql(u8, opt, "--dump")) { | ||||
|         return .fetch; | ||||
|     } | ||||
|     if (std.mem.eql(u8, opt, "--noscript")) { | ||||
|         return .fetch; | ||||
|     } | ||||
|     if (std.mem.startsWith(u8, opt, "--") == false) { | ||||
|         return .fetch; | ||||
|     } | ||||
| @@ -437,6 +442,7 @@ fn parseFetchArgs( | ||||
|     args: *std.process.ArgIterator, | ||||
| ) !Command.Fetch { | ||||
|     var dump: bool = false; | ||||
|     var noscript: bool = true; | ||||
|     var url: ?[]const u8 = null; | ||||
|     var common: Command.Common = .{}; | ||||
|  | ||||
| @@ -446,6 +452,11 @@ fn parseFetchArgs( | ||||
|             continue; | ||||
|         } | ||||
|  | ||||
|         if (std.mem.eql(u8, "--noscript", opt)) { | ||||
|             noscript = true; | ||||
|             continue; | ||||
|         } | ||||
|  | ||||
|         if (try parseCommonArg(allocator, opt, args, &common)) { | ||||
|             continue; | ||||
|         } | ||||
| @@ -471,6 +482,7 @@ fn parseFetchArgs( | ||||
|         .url = url.?, | ||||
|         .dump = dump, | ||||
|         .common = common, | ||||
|         .noscript = noscript, | ||||
|     }; | ||||
| } | ||||
|  | ||||
|   | ||||
		Reference in New Issue
	
	Block a user
	 Karl Seguin
					Karl Seguin