diff --git a/src/browser/dom/element.zig b/src/browser/dom/element.zig index 753ad707..8eab360a 100644 --- a/src/browser/dom/element.zig +++ b/src/browser/dom/element.zig @@ -110,13 +110,13 @@ pub const Element = struct { pub fn get_innerHTML(self: *parser.Element, page: *Page) ![]const u8 { var buf = std.ArrayList(u8).init(page.arena); - try dump.writeChildren(parser.elementToNode(self), buf.writer()); + try dump.writeChildren(parser.elementToNode(self), .{}, buf.writer()); return buf.items; } pub fn get_outerHTML(self: *parser.Element, page: *Page) ![]const u8 { var buf = std.ArrayList(u8).init(page.arena); - try dump.writeNode(parser.elementToNode(self), buf.writer()); + try dump.writeNode(parser.elementToNode(self), .{}, buf.writer()); return buf.items; } diff --git a/src/browser/dump.zig b/src/browser/dump.zig index 978de3c9..f69ba095 100644 --- a/src/browser/dump.zig +++ b/src/browser/dump.zig @@ -21,10 +21,14 @@ const std = @import("std"); const parser = @import("netsurf.zig"); const Walker = @import("dom/walker.zig").WalkerChildren; +pub const Opts = struct { + exclude_scripts: bool = false, +}; + // writer must be a std.io.Writer -pub fn writeHTML(doc: *parser.Document, writer: anytype) !void { +pub fn writeHTML(doc: *parser.Document, opts: Opts, writer: anytype) !void { try writer.writeAll("\n"); - try writeChildren(parser.documentToNode(doc), writer); + try writeChildren(parser.documentToNode(doc), opts, writer); try writer.writeAll("\n"); } @@ -54,10 +58,15 @@ pub fn writeDocType(doc_type: *parser.DocumentType, writer: anytype) !void { try writer.writeAll(">"); } -pub fn writeNode(node: *parser.Node, writer: anytype) anyerror!void { +pub fn writeNode(node: *parser.Node, opts: Opts, writer: anytype) anyerror!void { switch (try parser.nodeType(node)) { .element => { // open the tag + const tag_type = try parser.elementHTMLGetTagType(@ptrCast(node)); + if (tag_type == .script and opts.exclude_scripts) { + return; + } + const tag = try parser.nodeLocalName(node); try writer.writeAll("<"); try writer.writeAll(tag); @@ -82,12 +91,12 @@ pub fn writeNode(node: *parser.Node, writer: anytype) anyerror!void { // void elements can't have any content. if (try isVoid(parser.nodeToElement(node))) return; - if (try parser.elementHTMLGetTagType(@ptrCast(node)) == .script) { + if (tag_type == .script) { try writer.writeAll(try parser.nodeTextContent(node) orelse ""); } else { // write the children // TODO avoid recursion - try writeChildren(node, writer); + try writeChildren(node, opts, writer); } // close the tag @@ -129,12 +138,12 @@ pub fn writeNode(node: *parser.Node, writer: anytype) anyerror!void { } // writer must be a std.io.Writer -pub fn writeChildren(root: *parser.Node, writer: anytype) !void { +pub fn writeChildren(root: *parser.Node, opts: Opts, writer: anytype) !void { const walker = Walker{}; var next: ?*parser.Node = null; while (true) { next = try walker.get_next(root, next) orelse break; - try writeNode(next.?, writer); + try writeNode(next.?, opts, writer); } } @@ -238,6 +247,6 @@ fn testWriteFullHTML(comptime expected: []const u8, src: []const u8) !void { defer parser.documentHTMLClose(doc_html) catch {}; const doc = parser.documentHTMLToDocument(doc_html); - try writeHTML(doc, buf.writer(testing.allocator)); + try writeHTML(doc, .{}, buf.writer(testing.allocator)); try testing.expectEqualStrings(expected, buf.items); } diff --git a/src/browser/page.zig b/src/browser/page.zig index fb64baaa..5e7afc48 100644 --- a/src/browser/page.zig +++ b/src/browser/page.zig @@ -142,7 +142,7 @@ pub const Page = struct { } // dump writes the page content into the given file. - pub fn dump(self: *const Page, out: std.fs.File) !void { + pub fn dump(self: *const Page, opts: Dump.Opts, out: std.fs.File) !void { if (self.raw_data) |raw_data| { // raw_data was set if the document was not HTML, dump the data content only. return try out.writeAll(raw_data); @@ -150,7 +150,7 @@ pub const Page = struct { // if the page has a pointer to a document, dumps the HTML. const doc = parser.documentHTMLToDocument(self.window.document); - try Dump.writeHTML(doc, out); + try Dump.writeHTML(doc, opts, out); } pub fn fetchModuleSource(ctx: *anyopaque, src: []const u8) !?[]const u8 { diff --git a/src/browser/xmlserializer/xmlserializer.zig b/src/browser/xmlserializer/xmlserializer.zig index 61e780c1..0cf07b9d 100644 --- a/src/browser/xmlserializer/xmlserializer.zig +++ b/src/browser/xmlserializer/xmlserializer.zig @@ -36,9 +36,9 @@ pub const XMLSerializer = struct { pub fn _serializeToString(_: *const XMLSerializer, root: *parser.Node, page: *Page) ![]const u8 { var buf = std.ArrayList(u8).init(page.arena); switch (try parser.nodeType(root)) { - .document => try dump.writeHTML(@as(*parser.Document, @ptrCast(root)), buf.writer()), + .document => try dump.writeHTML(@as(*parser.Document, @ptrCast(root)), .{}, buf.writer()), .document_type => try dump.writeDocType(@as(*parser.DocumentType, @ptrCast(root)), buf.writer()), - else => try dump.writeNode(root, buf.writer()), + else => try dump.writeNode(root, .{}, buf.writer()), } return buf.items; } diff --git a/src/main.zig b/src/main.zig index 95cdcead..cb99d227 100644 --- a/src/main.zig +++ b/src/main.zig @@ -134,7 +134,7 @@ fn run(alloc: Allocator) !void { // dump if (opts.dump) { - try page.dump(std.io.getStdOut()); + try page.dump(.{ .exclude_scripts = opts.noscript }, std.io.getStdOut()); } }, else => unreachable, @@ -212,6 +212,7 @@ const Command = struct { url: []const u8, dump: bool = false, common: Common, + noscript: bool = false, }; const Common = struct { @@ -275,6 +276,7 @@ const Command = struct { \\Options: \\--dump Dumps document to stdout. \\ Defaults to false. + \\--noscript Exclude