get: dump the HTML

This commit is contained in:
Pierre Tachoire
2024-01-05 16:30:51 +01:00
parent 2cdbf68526
commit cfacfc8db7
3 changed files with 30 additions and 3 deletions

View File

@@ -2,6 +2,7 @@ const std = @import("std");
const parser = @import("../netsurf.zig");
const Loader = @import("loader.zig").Loader;
const Dump = @import("dump.zig");
const Mime = @import("mime.zig");
const jsruntime = @import("jsruntime");
@@ -113,11 +114,14 @@ pub const Page = struct {
loader: *Loader,
env: *Env,
window: *Window,
doc: ?*parser.Document = null,
// handle url
rawuri: ?[]const u8 = null,
uri: std.Uri = undefined,
raw_data: ?[]const u8 = null,
fn init(
allocator: std.mem.Allocator,
loader: *Loader,
@@ -138,9 +142,25 @@ pub const Page = struct {
}
pub fn deinit(self: *Page) void {
if (self.url != null) {
self.allocator.free(self.url);
if (self.raw_data) |s| {
self.allocator.free(s);
}
if (self.raw_data) |s| {
self.allocator.free(s);
}
}
// dump writes the page content into the given file.
pub fn dump(self: *Page, out: std.fs.File) !void {
// no data loaded, nothin to do.
if (self.raw_data == null) return;
// if no HTML document pointer available, dump the data content only.
if (self.doc == null) return try out.writeAll(self.raw_data.?);
// if the page has a pointer to a document, dumps the HTML.
const root = try parser.documentGetDocumentElement(self.doc.?) orelse return;
try Dump.htmlFile(root, out);
}
// spec reference: https://html.spec.whatwg.org/#document-lifecycle
@@ -165,6 +185,9 @@ pub const Page = struct {
if (result.body == null) return error.NoBody;
// save the body into the page.
self.raw_data = try self.allocator.dupe(u8, result.body.?);
// TODO handle charset
// https://html.spec.whatwg.org/#content-type
const ct = result.headers.getFirstValue("Content-Type") orelse {
@@ -188,6 +211,9 @@ pub const Page = struct {
const html_doc = try parser.documentHTMLParseFromStrAlloc(self.allocator, result.body.?);
const doc = parser.documentHTMLToDocument(html_doc);
// save a document's pointer in the page.
self.doc = doc;
// TODO set document.readyState to interactive
// https://html.spec.whatwg.org/#reporting-document-loading-status

View File

@@ -31,7 +31,7 @@ fn nodeFile(root: *parser.Element, out: File) !void {
try out.writeAll(" ");
try out.writeAll(try parser.attributeGetName(attr));
try out.writeAll("=\"");
try out.writeAll(try parser.attributeGetValue(attr));
try out.writeAll(try parser.attributeGetValue(attr) orelse "");
try out.writeAll("\"");
i += 1;
}

View File

@@ -56,4 +56,5 @@ pub fn main() !void {
var page = try browser.currentSession().createPage();
defer page.end();
try page.navigate(url);
try page.dump(std.io.getStdOut());
}