From b53d4a149c21b5f336da3cde05fb849c9ffea326 Mon Sep 17 00:00:00 2001 From: Pierre Tachoire Date: Fri, 15 Dec 2023 17:25:23 +0100 Subject: [PATCH 01/46] add nav web api --- src/{dom.zig => apiweb.zig} | 2 ++ src/main.zig | 4 ++-- src/main_shell.zig | 4 ++-- src/main_wpt.zig | 5 +++-- src/nav/nav.zig | 7 +++++++ src/nav/window.zig | 41 +++++++++++++++++++++++++++++++++++++ src/run_tests.zig | 4 ++-- 7 files changed, 59 insertions(+), 8 deletions(-) rename src/{dom.zig => apiweb.zig} (86%) create mode 100644 src/nav/nav.zig create mode 100644 src/nav/window.zig diff --git a/src/dom.zig b/src/apiweb.zig similarity index 86% rename from src/dom.zig rename to src/apiweb.zig index 08f4c3f5..32831909 100644 --- a/src/dom.zig +++ b/src/apiweb.zig @@ -4,6 +4,7 @@ const Console = @import("jsruntime").Console; const DOM = @import("dom/dom.zig"); const HTML = @import("html/html.zig"); +const nav = @import("nav/nav.zig"); pub const HTMLDocument = @import("html/document.zig").HTMLDocument; @@ -12,4 +13,5 @@ pub const Interfaces = generate.Tuple(.{ Console, DOM.Interfaces, HTML.Interfaces, + nav.Interfaces, }); diff --git a/src/main.zig b/src/main.zig index 9341072e..4469cddb 100644 --- a/src/main.zig +++ b/src/main.zig @@ -3,9 +3,9 @@ const std = @import("std"); const jsruntime = @import("jsruntime"); const parser = @import("netsurf.zig"); -const DOM = @import("dom.zig"); +const apiweb = @import("apiweb.zig"); -pub const Types = jsruntime.reflect(DOM.Interfaces); +pub const Types = jsruntime.reflect(apiweb.Interfaces); const socket_path = "/tmp/browsercore-server.sock"; diff --git a/src/main_shell.zig b/src/main_shell.zig index 35d1fb39..920c77bf 100644 --- a/src/main_shell.zig +++ b/src/main_shell.zig @@ -3,11 +3,11 @@ const std = @import("std"); const jsruntime = @import("jsruntime"); const parser = @import("netsurf.zig"); -const DOM = @import("dom.zig"); +const apiweb = @import("apiweb.zig"); const html_test = @import("html_test.zig").html; -pub const Types = jsruntime.reflect(DOM.Interfaces); +pub const Types = jsruntime.reflect(apiweb.Interfaces); var doc: *parser.DocumentHTML = undefined; diff --git a/src/main_wpt.zig b/src/main_wpt.zig index e91f42c6..e252ad52 100644 --- a/src/main_wpt.zig +++ b/src/main_wpt.zig @@ -6,7 +6,8 @@ const Suite = @import("wpt/testcase.zig").Suite; const FileLoader = @import("wpt/fileloader.zig").FileLoader; const wpt = @import("wpt/run.zig"); -const DOM = @import("dom.zig"); +const apiweb = @import("apiweb.zig"); +const nav = @import("nav/nav.zig"); const HTMLElem = @import("html/elements.zig"); const wpt_dir = "tests/wpt"; @@ -29,7 +30,7 @@ const Out = enum { text, }; -pub const Types = jsruntime.reflect(DOM.Interfaces); +pub const Types = jsruntime.reflect(apiweb.Interfaces); // TODO For now the WPT tests run is specific to WPT. // It manually load js framwork libs, and run the first script w/ js content in diff --git a/src/nav/nav.zig b/src/nav/nav.zig new file mode 100644 index 00000000..42590cd5 --- /dev/null +++ b/src/nav/nav.zig @@ -0,0 +1,7 @@ +const generate = @import("../generate.zig"); + +const Window = @import("window.zig"); + +pub const Interfaces = generate.Tuple(.{ + Window, +}); diff --git a/src/nav/window.zig b/src/nav/window.zig new file mode 100644 index 00000000..49ca899d --- /dev/null +++ b/src/nav/window.zig @@ -0,0 +1,41 @@ +const std = @import("std"); + +const parser = @import("../netsurf.zig"); + +// https://dom.spec.whatwg.org/#interface-window-extensions +// https://html.spec.whatwg.org/multipage/nav-history-apis.html#window +pub const Window = struct { + pub const mem_guarantied = true; + + document: *parser.Document, + target: []const u8, + + pub fn create(doc: *parser.Document, target: ?[]const u8) Window { + return Window{ + .document = doc, + .target = target orelse "", + }; + } + + pub fn get_window(self: *Window) *parser.Document { + return self; + } + + pub fn get_self(self: *Window) *parser.Document { + return self; + } + + pub fn get_parent(self: *Window) *parser.Document { + return self; + } + + pub fn get_document(self: *Window) *parser.Document { + return self.document; + } + + pub fn get_name(self: *Window) []const u8 { + return self.target; + } + + // TODO we need to re-implement EventTarget interface. +}; diff --git a/src/run_tests.zig b/src/run_tests.zig index 1ba0f8b3..08eaea3b 100644 --- a/src/run_tests.zig +++ b/src/run_tests.zig @@ -5,7 +5,7 @@ const jsruntime = @import("jsruntime"); const generate = @import("generate.zig"); const parser = @import("netsurf.zig"); -const DOM = @import("dom.zig"); +const apiweb = @import("apiweb.zig"); const documentTestExecFn = @import("dom/document.zig").testExecFn; const HTMLDocumentTestExecFn = @import("html/document.zig").testExecFn; @@ -21,7 +21,7 @@ const DOMTokenListExecFn = @import("dom/token_list.zig").testExecFn; const NodeListTestExecFn = @import("dom/nodelist.zig").testExecFn; const AttrTestExecFn = @import("dom/attribute.zig").testExecFn; -pub const Types = jsruntime.reflect(DOM.Interfaces); +pub const Types = jsruntime.reflect(apiweb.Interfaces); var doc: *parser.DocumentHTML = undefined; From 16427410279386a8c9271eb182c875407010a625 Mon Sep 17 00:00:00 2001 From: Pierre Tachoire Date: Tue, 19 Dec 2023 14:29:43 +0100 Subject: [PATCH 02/46] browser: start browser API --- build.zig | 23 +++++++ src/browser/browser.zig | 133 ++++++++++++++++++++++++++++++++++++++++ src/browser/loader.zig | 54 ++++++++++++++++ 3 files changed, 210 insertions(+) create mode 100644 src/browser/browser.zig create mode 100644 src/browser/loader.zig diff --git a/build.zig b/build.zig index 3004d839..90d200e9 100644 --- a/build.zig +++ b/build.zig @@ -115,6 +115,29 @@ pub fn build(b: *std.build.Builder) !void { // step const wpt_step = b.step("wpt", "WPT tests"); wpt_step.dependOn(&wpt_cmd.step); + + // get + // ----- + + // compile and install + const get = b.addExecutable(.{ + .name = "browsercore-get", + .root_source_file = .{ .path = "src/main_get.zig" }, + .target = target, + .optimize = mode, + }); + try common(get, options); + b.installArtifact(get); + + // run + const get_cmd = b.addRunArtifact(get); + get_cmd.step.dependOn(b.getInstallStep()); + if (b.args) |args| { + get_cmd.addArgs(args); + } + // step + const get_step = b.step("get", "request URL"); + get_step.dependOn(&get_cmd.step); } fn common( diff --git a/src/browser/browser.zig b/src/browser/browser.zig new file mode 100644 index 00000000..6fa291d2 --- /dev/null +++ b/src/browser/browser.zig @@ -0,0 +1,133 @@ +const std = @import("std"); + +const parser = @import("../netsurf.zig"); +const Loader = @import("loader.zig").Loader; + +const jsruntime = @import("jsruntime"); +const Loop = jsruntime.Loop; +const Env = jsruntime.Env; +const TPL = jsruntime.TPL; + +const apiweb = @import("../apiweb.zig"); +const apis = jsruntime.compile(apiweb.Interfaces); +const Window = @import("../nav/window.zig").Window; + +const log = std.log.scoped(.lpd_browser); + +pub const Browser = struct { + allocator: std.mem.Allocator, + session: Session = undefined, + + pub fn init(allocator: std.mem.Allocator) Browser { + var b = Browser{ .allocator = allocator }; + b.session = try b.createSession(null); + + return b; + } + + pub fn deinit(self: *Browser) void { + var session = self.session; + session.deinit(); + } + + pub fn currentSession(self: *Browser) *Session { + return &self.session; + } + + fn createSession(self: *Browser, uri: ?[]const u8) !Session { + return Session.init(self.allocator, uri orelse "about:blank"); + } +}; + +pub const Session = struct { + allocator: std.mem.Allocator, + uri: []const u8, + // TODO handle proxy + loader: Loader, + + fn init(allocator: std.mem.Allocator, uri: []const u8) Session { + return Session{ + .allocator = allocator, + .uri = uri, + .loader = Loader.init(allocator), + }; + } + + fn deinit(self: *Session) void { + self.loader.deinit(); + } + + pub fn createPage(self: *Session) !Page { + return Page.init(self); + } +}; + +pub const Page = struct { + arena: std.heap.ArenaAllocator, + session: *Session, + + fn init(session: *Session) Page { + return Page{ + .session = session, + .arena = std.heap.ArenaAllocator.init(session.allocator), + }; + } + + pub fn deinit(self: *Page) void { + self.arena.deinit(); + } + + pub fn navigate(self: *Page, uri: []const u8) !void { + const allocator = self.arena.allocator(); + + log.debug("starting GET {s}", .{uri}); + + // load the data + var result = try self.session.loader.fetch(allocator, uri); + defer result.deinit(); + + log.info("GET {s} {d}", .{ uri, result.status }); + + // TODO handle redirection + if (result.status != .ok) return error.BadStatusCode; + + if (result.body == null) return error.NoBody; + + // TODO handle charset + + // document + const html_doc = try parser.documentHTMLParseFromStrAlloc(allocator, result.body.?); + const doc = parser.documentHTMLToDocument(html_doc); + + // create JS env + var loop = try Loop.init(allocator); + defer loop.deinit(); + var js_env = try Env.init(allocator, &loop); + defer js_env.deinit(); + + // load APIs in JS env + var tpls: [apis.len]TPL = undefined; + try js_env.load(apis, &tpls); + + // start JS env + try js_env.start(allocator, apis); + defer js_env.stop(); + + // add global objects + const window = Window.create(doc, null); + _ = window; + // TODO should'nt we share the same pointer between instances of window? + // try js_env.addObject(apis, window, "self"); + // try js_env.addObject(apis, window, "window"); + try js_env.addObject(apis, doc, "document"); + } +}; + +test "create page" { + const allocator = std.testing.allocator; + var browser = Browser.init(allocator); + defer browser.deinit(); + + var page = try browser.currentSession().createPage(); + defer page.deinit(); +} diff --git a/src/browser/loader.zig b/src/browser/loader.zig new file mode 100644 index 00000000..8892f607 --- /dev/null +++ b/src/browser/loader.zig @@ -0,0 +1,54 @@ +const std = @import("std"); + +const user_agent = "Lightpanda.io/1.0"; + +pub const Loader = struct { + client: std.http.Client, + + pub const Response = struct { + req: std.http.Request, + + pub fn deinit(self: *Response) void { + self.req.deinit(); + } + }; + + pub fn init(allocator: std.mem.Allocator) Loader { + return Loader{ + .client = std.http.Client{ + .allocator = allocator, + }, + }; + } + + pub fn deinit(self: *Loader) void { + self.client.deinit(); + } + + // the caller must deinit the FetchResult. + pub fn fetch(self: *Loader, allocator: std.mem.Allocator, uri: []const u8) !std.http.Client.FetchResult { + var headers = try std.http.Headers.initList(allocator, &[_]std.http.Field{ + .{ .name = "User-Agent", .value = user_agent }, + .{ .name = "Accept", .value = "*/*" }, + .{ .name = "Accept-Language", .value = "en-US,en;q=0.5" }, + }); + defer headers.deinit(); + + return try self.client.fetch(allocator, .{ + .location = .{ .url = uri }, + .headers = headers, + .payload = .none, + }); + } +}; + +test "basic url fetch" { + const alloc = std.testing.allocator; + var loader = Loader.init(alloc); + defer loader.deinit(); + + var result = try loader.fetch(alloc, "https://en.wikipedia.org/wiki/Main_Page"); + defer result.deinit(); + + try std.testing.expect(result.status == std.http.Status.ok); +} From df2e6dcfc0b2efdc608856f945e7d2ec38f7dca9 Mon Sep 17 00:00:00 2001 From: Pierre Tachoire Date: Wed, 20 Dec 2023 11:08:17 +0100 Subject: [PATCH 03/46] get: start work on get command --- src/browser/browser.zig | 26 ++++++++++++++----- src/main_get.zig | 57 +++++++++++++++++++++++++++++++++++++++++ 2 files changed, 76 insertions(+), 7 deletions(-) create mode 100644 src/main_get.zig diff --git a/src/browser/browser.zig b/src/browser/browser.zig index 6fa291d2..452d3ed2 100644 --- a/src/browser/browser.zig +++ b/src/browser/browser.zig @@ -10,6 +10,7 @@ const TPL = jsruntime.TPL; const apiweb = @import("../apiweb.zig"); const apis = jsruntime.compile(apiweb.Interfaces); + const Window = @import("../nav/window.zig").Window; const log = std.log.scoped(.lpd_browser); @@ -65,11 +66,13 @@ pub const Session = struct { pub const Page = struct { arena: std.heap.ArenaAllocator, session: *Session, + env: Env, fn init(session: *Session) Page { return Page{ .session = session, .arena = std.heap.ArenaAllocator.init(session.allocator), + .env = undefined, }; } @@ -93,33 +96,42 @@ pub const Page = struct { if (result.body == null) return error.NoBody; + // TODO check content-type + // TODO handle charset // document + log.debug("parse html", .{}); const html_doc = try parser.documentHTMLParseFromStrAlloc(allocator, result.body.?); const doc = parser.documentHTMLToDocument(html_doc); - // create JS env + log.debug("init loop", .{}); var loop = try Loop.init(allocator); defer loop.deinit(); - var js_env = try Env.init(allocator, &loop); - defer js_env.deinit(); + + // create JS env + log.debug("init js env", .{}); + self.env = try Env.init(allocator, &loop); + defer self.env.deinit(); // load APIs in JS env + log.debug("load js apis", .{}); var tpls: [apis.len]TPL = undefined; - try js_env.load(apis, &tpls); + try self.env.load(apis, &tpls); // start JS env - try js_env.start(allocator, apis); - defer js_env.stop(); + log.debug("start js env", .{}); + try self.env.start(allocator, apis); + defer self.env.stop(); // add global objects + log.debug("setup global env", .{}); const window = Window.create(doc, null); _ = window; // TODO should'nt we share the same pointer between instances of window? // try js_env.addObject(apis, window, "self"); // try js_env.addObject(apis, window, "window"); - try js_env.addObject(apis, doc, "document"); + try self.env.addObject(apis, doc, "document"); } }; diff --git a/src/main_get.zig b/src/main_get.zig new file mode 100644 index 00000000..d4700773 --- /dev/null +++ b/src/main_get.zig @@ -0,0 +1,57 @@ +const std = @import("std"); +const b = @import("browser/browser.zig"); + +pub const std_options = struct { + pub const log_level = .debug; +}; + +const usage = + \\usage: {s} [options] + \\ request the url with the browser + \\ + \\ -h, --help Print this help message and exit. + \\ +; + +pub fn main() !void { + var gpa = std.heap.GeneralPurposeAllocator(.{}){}; + defer { + const check = gpa.deinit(); + if (check == .leak) { + std.log.warn("leaks detected\n", .{}); + } + } + const allocator = gpa.allocator(); + + var args = try std.process.argsWithAllocator(allocator); + defer args.deinit(); + + const execname = args.next().?; + var url: []const u8 = ""; + + while (args.next()) |arg| { + if (std.mem.eql(u8, "-h", arg) or std.mem.eql(u8, "--help", arg)) { + try std.io.getStdErr().writer().print(usage, .{execname}); + std.os.exit(0); + } + // allow only one url + if (url.len != 0) { + try std.io.getStdErr().writer().print(usage, .{execname}); + std.os.exit(1); + } + url = arg; + } + + if (url.len == 0) { + try std.io.getStdErr().writer().print(usage, .{execname}); + std.os.exit(1); + } + + var browser = b.Browser.init(allocator); + defer browser.deinit(); + + var page = try browser.currentSession().createPage(); + defer page.deinit(); + + try page.navigate(url); +} From 228f44a57d7eba4b8c47ba2c69187bdb5462e367 Mon Sep 17 00:00:00 2001 From: Pierre Tachoire Date: Wed, 20 Dec 2023 11:31:32 +0100 Subject: [PATCH 04/46] browser: add initVM/deinitVM in main --- src/browser/browser.zig | 8 ++++++++ src/main_get.zig | 7 +++++-- 2 files changed, 13 insertions(+), 2 deletions(-) diff --git a/src/browser/browser.zig b/src/browser/browser.zig index 452d3ed2..d3fd8d53 100644 --- a/src/browser/browser.zig +++ b/src/browser/browser.zig @@ -19,6 +19,14 @@ pub const Browser = struct { allocator: std.mem.Allocator, session: Session = undefined, + var vm: jsruntime.VM = undefined; + pub fn initVM() void { + vm = jsruntime.VM.init(); + } + pub fn deinitVM() void { + vm.deinit(); + } + pub fn init(allocator: std.mem.Allocator) Browser { var b = Browser{ .allocator = allocator }; b.session = try b.createSession(null); diff --git a/src/main_get.zig b/src/main_get.zig index d4700773..9854af1a 100644 --- a/src/main_get.zig +++ b/src/main_get.zig @@ -1,5 +1,5 @@ const std = @import("std"); -const b = @import("browser/browser.zig"); +const Browser = @import("browser/browser.zig").Browser; pub const std_options = struct { pub const log_level = .debug; @@ -47,7 +47,10 @@ pub fn main() !void { std.os.exit(1); } - var browser = b.Browser.init(allocator); + Browser.initVM(); + defer Browser.deinitVM(); + + var browser = Browser.init(allocator); defer browser.deinit(); var page = try browser.currentSession().createPage(); From 6ff121334fd46831cb05a3859a7fd6bf38d08e31 Mon Sep 17 00:00:00 2001 From: Pierre Tachoire Date: Wed, 20 Dec 2023 18:00:50 +0100 Subject: [PATCH 05/46] browser: refactor session and page Now session owns the js env and the loader. page start and stop the js session. --- src/browser/browser.zig | 139 ++++++++++++++++++++-------------------- src/main_get.zig | 5 +- 2 files changed, 71 insertions(+), 73 deletions(-) diff --git a/src/browser/browser.zig b/src/browser/browser.zig index d3fd8d53..3b32384c 100644 --- a/src/browser/browser.zig +++ b/src/browser/browser.zig @@ -15,9 +15,13 @@ const Window = @import("../nav/window.zig").Window; const log = std.log.scoped(.lpd_browser); +// Browser is an instance of the browser. +// You can create multiple browser instances. +// It contains only one session but initVM() and deinitVM() must be called only +// once per main. pub const Browser = struct { allocator: std.mem.Allocator, - session: Session = undefined, + session: *Session = undefined, var vm: jsruntime.VM = undefined; pub fn initVM() void { @@ -27,74 +31,93 @@ pub const Browser = struct { vm.deinit(); } - pub fn init(allocator: std.mem.Allocator) Browser { - var b = Browser{ .allocator = allocator }; - b.session = try b.createSession(null); - - return b; + pub fn init(allocator: std.mem.Allocator) !Browser { + return Browser{ + .allocator = allocator, + .session = try Session.init(allocator, "about:blank"), + }; } pub fn deinit(self: *Browser) void { - var session = self.session; - session.deinit(); + self.session.deinit(); + self.allocator.destroy(self.session); } pub fn currentSession(self: *Browser) *Session { - return &self.session; - } - - fn createSession(self: *Browser, uri: ?[]const u8) !Session { - return Session.init(self.allocator, uri orelse "about:blank"); + return self.session; } }; +// Session is like a browser's tab. +// It owns the js env and the loader and an allocator arena for all the pages +// of the session. +// You can create successively multiple pages for a session, but you must +// deinit a page before running another one. pub const Session = struct { - allocator: std.mem.Allocator, + arena: std.heap.ArenaAllocator, uri: []const u8, - // TODO handle proxy - loader: Loader, + tpls: [apis.len]TPL = undefined, - fn init(allocator: std.mem.Allocator, uri: []const u8) Session { - return Session{ - .allocator = allocator, + // TODO handle proxy + loader: Loader = undefined, + env: Env = undefined, + loop: Loop = undefined, + + fn init(allocator: std.mem.Allocator, uri: []const u8) !*Session { + var self = try allocator.create(Session); + self.* = Session{ .uri = uri, - .loader = Loader.init(allocator), + .arena = std.heap.ArenaAllocator.init(allocator), }; + + const aallocator = self.arena.allocator(); + + self.loader = Loader.init(aallocator); + self.loop = try Loop.init(aallocator); + self.env = try Env.init(aallocator, &self.loop); + + try self.env.load(apis, &self.tpls); + + return self; } fn deinit(self: *Session) void { self.loader.deinit(); - } - - pub fn createPage(self: *Session) !Page { - return Page.init(self); - } -}; - -pub const Page = struct { - arena: std.heap.ArenaAllocator, - session: *Session, - env: Env, - - fn init(session: *Session) Page { - return Page{ - .session = session, - .arena = std.heap.ArenaAllocator.init(session.allocator), - .env = undefined, - }; - } - - pub fn deinit(self: *Page) void { + self.loop.deinit(); + self.env.deinit(); self.arena.deinit(); } - pub fn navigate(self: *Page, uri: []const u8) !void { - const allocator = self.arena.allocator(); + pub fn createPage(self: *Session) !Page { + return Page.init(self.arena.allocator(), &self.loader, &self.env); + } +}; +// Page navigates to an url. +// You can navigates multiple urls with the same page, but you have to call +// end() to stop the previous navigation before starting a new one. +pub const Page = struct { + allocator: std.mem.Allocator, + loader: *Loader, + env: *Env, + + fn init(allocator: std.mem.Allocator, loader: *Loader, env: *Env) Page { + return Page{ + .allocator = allocator, + .loader = loader, + .env = env, + }; + } + + pub fn end(self: *Page) void { + self.env.stop(); + } + + pub fn navigate(self: *Page, uri: []const u8) !void { log.debug("starting GET {s}", .{uri}); // load the data - var result = try self.session.loader.fetch(allocator, uri); + var result = try self.loader.fetch(self.allocator, uri); defer result.deinit(); log.info("GET {s} {d}", .{ uri, result.status }); @@ -110,27 +133,12 @@ pub const Page = struct { // document log.debug("parse html", .{}); - const html_doc = try parser.documentHTMLParseFromStrAlloc(allocator, result.body.?); + const html_doc = try parser.documentHTMLParseFromStrAlloc(self.allocator, result.body.?); const doc = parser.documentHTMLToDocument(html_doc); - log.debug("init loop", .{}); - var loop = try Loop.init(allocator); - defer loop.deinit(); - - // create JS env - log.debug("init js env", .{}); - self.env = try Env.init(allocator, &loop); - defer self.env.deinit(); - - // load APIs in JS env - log.debug("load js apis", .{}); - var tpls: [apis.len]TPL = undefined; - try self.env.load(apis, &tpls); - // start JS env log.debug("start js env", .{}); - try self.env.start(allocator, apis); - defer self.env.stop(); + try self.env.start(self.allocator, apis); // add global objects log.debug("setup global env", .{}); @@ -142,12 +150,3 @@ pub const Page = struct { try self.env.addObject(apis, doc, "document"); } }; - -test "create page" { - const allocator = std.testing.allocator; - var browser = Browser.init(allocator); - defer browser.deinit(); - - var page = try browser.currentSession().createPage(); - defer page.deinit(); -} diff --git a/src/main_get.zig b/src/main_get.zig index 9854af1a..4c66a3a7 100644 --- a/src/main_get.zig +++ b/src/main_get.zig @@ -50,11 +50,10 @@ pub fn main() !void { Browser.initVM(); defer Browser.deinitVM(); - var browser = Browser.init(allocator); + var browser = try Browser.init(allocator); defer browser.deinit(); var page = try browser.currentSession().createPage(); - defer page.deinit(); - + defer page.end(); try page.navigate(url); } From 0d32fc62bab6845fcee2a3f2f37325dce58f6f95 Mon Sep 17 00:00:00 2001 From: Pierre Tachoire Date: Thu, 21 Dec 2023 09:44:48 +0100 Subject: [PATCH 06/46] browser: inject window global object --- src/browser/browser.zig | 5 ++--- src/nav/nav.zig | 2 +- src/nav/window.zig | 6 +++--- 3 files changed, 6 insertions(+), 7 deletions(-) diff --git a/src/browser/browser.zig b/src/browser/browser.zig index 3b32384c..6adbfdda 100644 --- a/src/browser/browser.zig +++ b/src/browser/browser.zig @@ -143,10 +143,9 @@ pub const Page = struct { // add global objects log.debug("setup global env", .{}); const window = Window.create(doc, null); - _ = window; // TODO should'nt we share the same pointer between instances of window? - // try js_env.addObject(apis, window, "self"); - // try js_env.addObject(apis, window, "window"); + try self.env.addObject(apis, window, "self"); + try self.env.addObject(apis, window, "window"); try self.env.addObject(apis, doc, "document"); } }; diff --git a/src/nav/nav.zig b/src/nav/nav.zig index 42590cd5..9aa96f3a 100644 --- a/src/nav/nav.zig +++ b/src/nav/nav.zig @@ -1,6 +1,6 @@ const generate = @import("../generate.zig"); -const Window = @import("window.zig"); +const Window = @import("window.zig").Window; pub const Interfaces = generate.Tuple(.{ Window, diff --git a/src/nav/window.zig b/src/nav/window.zig index 49ca899d..6424a7d2 100644 --- a/src/nav/window.zig +++ b/src/nav/window.zig @@ -17,15 +17,15 @@ pub const Window = struct { }; } - pub fn get_window(self: *Window) *parser.Document { + pub fn get_window(self: *Window) *Window { return self; } - pub fn get_self(self: *Window) *parser.Document { + pub fn get_self(self: *Window) *Window { return self; } - pub fn get_parent(self: *Window) *parser.Document { + pub fn get_parent(self: *Window) *Window { return self; } From bd9c63f22fc02a6e5d319e3a9c159fab9ed4917c Mon Sep 17 00:00:00 2001 From: Pierre Tachoire Date: Thu, 21 Dec 2023 11:35:43 +0100 Subject: [PATCH 07/46] browser: update window injection TODO --- src/browser/browser.zig | 12 ++++++++++-- 1 file changed, 10 insertions(+), 2 deletions(-) diff --git a/src/browser/browser.zig b/src/browser/browser.zig index 6adbfdda..d613031f 100644 --- a/src/browser/browser.zig +++ b/src/browser/browser.zig @@ -143,9 +143,17 @@ pub const Page = struct { // add global objects log.debug("setup global env", .{}); const window = Window.create(doc, null); - // TODO should'nt we share the same pointer between instances of window? - try self.env.addObject(apis, window, "self"); + + // TODO we must share the same pointer between window and self. + // once https://github.com/lightpanda-io/jsruntime-lib/issues/171 is + // done, replace the 2 lines with: + // + // const obj = try js_env.addObject(apis, window, "window"); + // try js_env.attachObject(try js_env.getGlobal(), "self", obj); + // try self.env.addObject(apis, window, "window"); + try self.env.addObject(apis, window, "self"); + try self.env.addObject(apis, doc, "document"); } }; From cc5280460e262cc7130b09bf2082d6dd7c0d1338 Mon Sep 17 00:00:00 2001 From: Pierre Tachoire Date: Thu, 21 Dec 2023 11:36:09 +0100 Subject: [PATCH 08/46] browser: add TODO for fragment URL --- src/browser/browser.zig | 2 ++ 1 file changed, 2 insertions(+) diff --git a/src/browser/browser.zig b/src/browser/browser.zig index d613031f..0f58333e 100644 --- a/src/browser/browser.zig +++ b/src/browser/browser.zig @@ -116,6 +116,8 @@ pub const Page = struct { pub fn navigate(self: *Page, uri: []const u8) !void { log.debug("starting GET {s}", .{uri}); + // TODO handle fragment in url. + // load the data var result = try self.loader.fetch(self.allocator, uri); defer result.deinit(); From 7d226a43b67c63458593f9e1164dd085409fa602 Mon Sep 17 00:00:00 2001 From: Pierre Tachoire Date: Thu, 21 Dec 2023 11:52:00 +0100 Subject: [PATCH 09/46] browser: window must survive between 2 pages So window is moved to session --- src/browser/browser.zig | 36 ++++++++++++++++++++++-------------- src/nav/window.zig | 9 ++++++--- 2 files changed, 28 insertions(+), 17 deletions(-) diff --git a/src/browser/browser.zig b/src/browser/browser.zig index 0f58333e..1117c3e2 100644 --- a/src/browser/browser.zig +++ b/src/browser/browser.zig @@ -63,11 +63,14 @@ pub const Session = struct { env: Env = undefined, loop: Loop = undefined, + window: Window, + fn init(allocator: std.mem.Allocator, uri: []const u8) !*Session { var self = try allocator.create(Session); self.* = Session{ .uri = uri, .arena = std.heap.ArenaAllocator.init(allocator), + .window = Window.create(null), }; const aallocator = self.arena.allocator(); @@ -89,7 +92,12 @@ pub const Session = struct { } pub fn createPage(self: *Session) !Page { - return Page.init(self.arena.allocator(), &self.loader, &self.env); + return Page.init( + self.arena.allocator(), + &self.loader, + &self.env, + &self.window, + ); } }; @@ -100,12 +108,19 @@ pub const Page = struct { allocator: std.mem.Allocator, loader: *Loader, env: *Env, + window: *Window, - fn init(allocator: std.mem.Allocator, loader: *Loader, env: *Env) Page { + fn init( + allocator: std.mem.Allocator, + loader: *Loader, + env: *Env, + window: *Window, + ) Page { return Page{ .allocator = allocator, .loader = loader, .env = env, + .window = window, }; } @@ -135,27 +150,20 @@ pub const Page = struct { // document log.debug("parse html", .{}); + // TODO inject the URL to the document. const html_doc = try parser.documentHTMLParseFromStrAlloc(self.allocator, result.body.?); const doc = parser.documentHTMLToDocument(html_doc); + self.window.replaceDocument(doc); + // start JS env log.debug("start js env", .{}); try self.env.start(self.allocator, apis); // add global objects log.debug("setup global env", .{}); - const window = Window.create(doc, null); - - // TODO we must share the same pointer between window and self. - // once https://github.com/lightpanda-io/jsruntime-lib/issues/171 is - // done, replace the 2 lines with: - // - // const obj = try js_env.addObject(apis, window, "window"); - // try js_env.attachObject(try js_env.getGlobal(), "self", obj); - // - try self.env.addObject(apis, window, "window"); - try self.env.addObject(apis, window, "self"); - + try self.env.addObject(apis, self.window, "window"); + try self.env.addObject(apis, self.window, "self"); try self.env.addObject(apis, doc, "document"); } }; diff --git a/src/nav/window.zig b/src/nav/window.zig index 6424a7d2..346e8e06 100644 --- a/src/nav/window.zig +++ b/src/nav/window.zig @@ -7,16 +7,19 @@ const parser = @import("../netsurf.zig"); pub const Window = struct { pub const mem_guarantied = true; - document: *parser.Document, + document: *parser.Document = undefined, target: []const u8, - pub fn create(doc: *parser.Document, target: ?[]const u8) Window { + pub fn create(target: ?[]const u8) Window { return Window{ - .document = doc, .target = target orelse "", }; } + pub fn replaceDocument(self: *Window, doc: *parser.Document) void { + self.document = doc; + } + pub fn get_window(self: *Window) *Window { return self; } From a470a7eaefc7bba89097eb1edf1ab3707a8ab854 Mon Sep 17 00:00:00 2001 From: Pierre Tachoire Date: Thu, 21 Dec 2023 14:40:36 +0100 Subject: [PATCH 10/46] browser: add som steps comments --- src/browser/browser.zig | 32 ++++++++++++++++++++++++++++---- 1 file changed, 28 insertions(+), 4 deletions(-) diff --git a/src/browser/browser.zig b/src/browser/browser.zig index 1117c3e2..3069c145 100644 --- a/src/browser/browser.zig +++ b/src/browser/browser.zig @@ -13,6 +13,8 @@ const apis = jsruntime.compile(apiweb.Interfaces); const Window = @import("../nav/window.zig").Window; +const FetchResult = std.http.Client.FetchResult; + const log = std.log.scoped(.lpd_browser); // Browser is an instance of the browser. @@ -126,8 +128,10 @@ pub const Page = struct { pub fn end(self: *Page) void { self.env.stop(); + // TODO unload document: https://html.spec.whatwg.org/#unloading-documents } + // spec reference: https://html.spec.whatwg.org/#document-lifecycle pub fn navigate(self: *Page, uri: []const u8) !void { log.debug("starting GET {s}", .{uri}); @@ -144,18 +148,29 @@ pub const Page = struct { if (result.body == null) return error.NoBody; - // TODO check content-type - // TODO handle charset + // https://html.spec.whatwg.org/#content-type - // document + // TODO check content-type + try self.loadHTMLDoc(&result); + } + + // https://html.spec.whatwg.org/#read-html + fn loadHTMLDoc(self: *Page, result: *FetchResult) !void { log.debug("parse html", .{}); - // TODO inject the URL to the document. const html_doc = try parser.documentHTMLParseFromStrAlloc(self.allocator, result.body.?); const doc = parser.documentHTMLToDocument(html_doc); + // TODO set document.readyState to interactive + // https://html.spec.whatwg.org/#reporting-document-loading-status + + // TODO inject the URL to the document. + // TODO set the referrer to the document. + self.window.replaceDocument(doc); + // https://html.spec.whatwg.org/#read-html + // start JS env log.debug("start js env", .{}); try self.env.start(self.allocator, apis); @@ -165,5 +180,14 @@ pub const Page = struct { try self.env.addObject(apis, self.window, "window"); try self.env.addObject(apis, self.window, "self"); try self.env.addObject(apis, doc, "document"); + + // https://html.spec.whatwg.org/#process-link-headers + + // TODO dispatch DOMContentLoaded before the transition to "complete", + // at the point where all subresources apart from async script elements + // have loaded. + // https://html.spec.whatwg.org/#reporting-document-loading-status + + // TODO set document.readyState to complete } }; From 5ee1bbd68eb37279e92ebaaaecbd846942f10aac Mon Sep 17 00:00:00 2001 From: Pierre Tachoire Date: Thu, 21 Dec 2023 17:17:36 +0100 Subject: [PATCH 11/46] browser: html document load next --- src/browser/browser.zig | 146 +++++++++++++++++++++++++++++++++++++++- 1 file changed, 144 insertions(+), 2 deletions(-) diff --git a/src/browser/browser.zig b/src/browser/browser.zig index 3069c145..037f71e1 100644 --- a/src/browser/browser.zig +++ b/src/browser/browser.zig @@ -12,6 +12,7 @@ const apiweb = @import("../apiweb.zig"); const apis = jsruntime.compile(apiweb.Interfaces); const Window = @import("../nav/window.zig").Window; +const Walker = @import("../dom/html_collection.zig").WalkerDepthFirst; const FetchResult = std.http.Client.FetchResult; @@ -164,7 +165,7 @@ pub const Page = struct { // TODO set document.readyState to interactive // https://html.spec.whatwg.org/#reporting-document-loading-status - // TODO inject the URL to the document. + // TODO inject the URL to the document including the fragment. // TODO set the referrer to the document. self.window.replaceDocument(doc); @@ -181,13 +182,154 @@ pub const Page = struct { try self.env.addObject(apis, self.window, "self"); try self.env.addObject(apis, doc, "document"); - // https://html.spec.whatwg.org/#process-link-headers + // browse the DOM tree to retrieve scripts + var sasync = std.ArrayList(*parser.Element).init(self.allocator); + defer sasync.deinit(); + + const root = try parser.documentGetDocumentElement(doc) orelse return; // TODO send loaded event in this case? + const walker = Walker{}; + var next: ?*parser.Node = null; + while (true) { + next = try walker.get_next(parser.elementToNode(root), next) orelse break; + + // ignore non-elements nodes. + if (try parser.nodeType(next.?) != .element) { + continue; + } + + const e = parser.nodeToElement(next.?); + const tag = try parser.elementHTMLGetTagType(@as(*parser.ElementHTML, @ptrCast(e))); + switch (tag) { + .script => { + // ignore non-js script. + // > type + // > Attribute is not set (default), an empty string, or a JavaScript MIME + // > type indicates that the script is a "classic script", containing + // > JavaScript code. + // https://developer.mozilla.org/en-US/docs/Web/HTML/Element/script#attribute_is_not_set_default_an_empty_string_or_a_javascript_mime_type + const stype = try parser.elementGetAttribute(e, "type"); + if (!isJS(stype)) { + continue; + } + + // Ignore the defer attribute b/c we analyze all script + // after the document has been parsed. + // https://developer.mozilla.org/en-US/docs/Web/HTML/Element/script#defer + + // TODO use fetchpriority + // https://developer.mozilla.org/en-US/docs/Web/HTML/Element/script#fetchpriority + + // > async + // > For classic scripts, if the async attribute is present, + // > then the classic script will be fetched in parallel to + // > parsing and evaluated as soon as it is available. + // https://developer.mozilla.org/en-US/docs/Web/HTML/Element/script#async + if (try parser.elementGetAttribute(e, "async") != null) { + try sasync.append(e); + continue; + } + + // TODO handle for attribute + // TODO handle event attribute + + // TODO defer + // > This Boolean attribute is set to indicate to a browser + // > that the script is meant to be executed after the + // > document has been parsed, but before firing + // > DOMContentLoaded. + // https://developer.mozilla.org/en-US/docs/Web/HTML/Element/script#defer + // defer allow us to load a script w/o blocking the rest of + // evaluations. + + // > Scripts without async, defer or type="module" + // > attributes, as well as inline scripts without the + // > type="module" attribute, are fetched and executed + // > immediately before the browser continues to parse the + // > page. + // https://developer.mozilla.org/en-US/docs/Web/HTML/Element/script#notes + try self.evalScript(e); + }, + else => continue, + } + } + + // TODO wait for deferred scripts // TODO dispatch DOMContentLoaded before the transition to "complete", // at the point where all subresources apart from async script elements // have loaded. // https://html.spec.whatwg.org/#reporting-document-loading-status + // eval async scripts. + for (sasync.items) |e| { + try self.evalScript(e); + } + + // TODO wait for async scripts + // TODO set document.readyState to complete } + + // evalScript evaluates the src in priority. + // if no src is present, we evaluate the text source. + // https://html.spec.whatwg.org/multipage/scripting.html#script-processing-model + fn evalScript(self: *Page, e: *parser.Element) !void { + // https://html.spec.whatwg.org/multipage/webappapis.html#fetch-a-classic-script + const opt_src = try parser.elementGetAttribute(e, "src"); + if (opt_src) |src| { + // TODO resolve the url. + log.info("starting GET {s}", .{src}); + var fetchres = try self.loader.fetch(self.allocator, src); + defer fetchres.deinit(); + + log.info("GET {s}: {d}", .{ src, fetchres.status }); + + if (fetchres.status != .ok) { + return error.BadStatusCode; + } + + // TODO check content-type + + // check no body + // TODO If el's result is null, then fire an event named error at + // el, and return. + if (fetchres.body == null) return; + + var res = jsruntime.JSResult{}; + try self.env.run(self.allocator, fetchres.body.?, src, &res, null); + defer res.deinit(self.allocator); + + log.debug("eval script {s}: {s}", .{ src, res.result }); + + // TODO If el's from an external file is true, then fire an event + // named load at el. + + return; + } + + const opt_text = try parser.nodeTextContent(parser.elementToNode(e)); + if (opt_text) |text| { + // TODO handle charset attribute + var res = jsruntime.JSResult{}; + try self.env.run(self.allocator, text, "", &res, null); + defer res.deinit(self.allocator); + + log.debug("eval script: {s}", .{res.result}); + + return; + } + + // nothing has been loaded. + // TODO If el's result is null, then fire an event named error at + // el, and return. + } + + // > type + // > Attribute is not set (default), an empty string, or a JavaScript MIME + // > type indicates that the script is a "classic script", containing + // > JavaScript code. + // https://developer.mozilla.org/en-US/docs/Web/HTML/Element/script#attribute_is_not_set_default_an_empty_string_or_a_javascript_mime_type + fn isJS(stype: ?[]const u8) bool { + return stype == null or stype.?.len == 0 or std.mem.eql(u8, stype.?, "application/javascript") or !std.mem.eql(u8, stype.?, "module"); + } }; From 5e8c9ccd1f5eba987bbe47a4443481f1f678b4e2 Mon Sep 17 00:00:00 2001 From: Pierre Tachoire Date: Fri, 22 Dec 2023 09:29:54 +0100 Subject: [PATCH 12/46] browser: use std.Uri to fetch and resolve URIs --- src/browser/browser.zig | 34 ++++++++++++++++++++++++++-------- src/browser/loader.zig | 4 ++-- 2 files changed, 28 insertions(+), 10 deletions(-) diff --git a/src/browser/browser.zig b/src/browser/browser.zig index 037f71e1..19ea16ec 100644 --- a/src/browser/browser.zig +++ b/src/browser/browser.zig @@ -113,6 +113,10 @@ pub const Page = struct { env: *Env, window: *Window, + // handle url + rawuri: ?[]const u8 = null, + uri: std.Uri = undefined, + fn init( allocator: std.mem.Allocator, loader: *Loader, @@ -132,17 +136,28 @@ pub const Page = struct { // TODO unload document: https://html.spec.whatwg.org/#unloading-documents } + pub fn deinit(self: *Page) void { + if (self.url != null) { + self.allocator.free(self.url); + } + } + // spec reference: https://html.spec.whatwg.org/#document-lifecycle pub fn navigate(self: *Page, uri: []const u8) !void { log.debug("starting GET {s}", .{uri}); + // own the url + if (self.rawuri) |prev| self.allocator.free(prev); + self.rawuri = try self.allocator.dupe(u8, uri); + self.uri = std.Uri.parse(self.rawuri.?) catch try std.Uri.parseWithoutScheme(self.rawuri.?); + // TODO handle fragment in url. // load the data - var result = try self.loader.fetch(self.allocator, uri); + var result = try self.loader.fetch(self.allocator, self.uri); defer result.deinit(); - log.info("GET {s} {d}", .{ uri, result.status }); + log.info("GET {any} {d}", .{ self.uri, result.status }); // TODO handle redirection if (result.status != .ok) return error.BadStatusCode; @@ -277,12 +292,15 @@ pub const Page = struct { // https://html.spec.whatwg.org/multipage/webappapis.html#fetch-a-classic-script const opt_src = try parser.elementGetAttribute(e, "src"); if (opt_src) |src| { - // TODO resolve the url. - log.info("starting GET {s}", .{src}); - var fetchres = try self.loader.fetch(self.allocator, src); + log.debug("starting GET {s}", .{src}); + + const u = std.Uri.parse(src) catch try std.Uri.parseWithoutScheme(src); + const ru = try std.Uri.resolve(self.uri, u, false, self.allocator); + + var fetchres = try self.loader.fetch(self.allocator, ru); defer fetchres.deinit(); - log.info("GET {s}: {d}", .{ src, fetchres.status }); + log.info("GET {any}: {d}", .{ ru, fetchres.status }); if (fetchres.status != .ok) { return error.BadStatusCode; @@ -299,7 +317,7 @@ pub const Page = struct { try self.env.run(self.allocator, fetchres.body.?, src, &res, null); defer res.deinit(self.allocator); - log.debug("eval script {s}: {s}", .{ src, res.result }); + log.debug("eval remote {s}: {s}", .{ src, res.result }); // TODO If el's from an external file is true, then fire an event // named load at el. @@ -314,7 +332,7 @@ pub const Page = struct { try self.env.run(self.allocator, text, "", &res, null); defer res.deinit(self.allocator); - log.debug("eval script: {s}", .{res.result}); + log.debug("eval inline: {s}", .{res.result}); return; } diff --git a/src/browser/loader.zig b/src/browser/loader.zig index 8892f607..70bcd0aa 100644 --- a/src/browser/loader.zig +++ b/src/browser/loader.zig @@ -26,7 +26,7 @@ pub const Loader = struct { } // the caller must deinit the FetchResult. - pub fn fetch(self: *Loader, allocator: std.mem.Allocator, uri: []const u8) !std.http.Client.FetchResult { + pub fn fetch(self: *Loader, allocator: std.mem.Allocator, uri: std.Uri) !std.http.Client.FetchResult { var headers = try std.http.Headers.initList(allocator, &[_]std.http.Field{ .{ .name = "User-Agent", .value = user_agent }, .{ .name = "Accept", .value = "*/*" }, @@ -35,7 +35,7 @@ pub const Loader = struct { defer headers.deinit(); return try self.client.fetch(allocator, .{ - .location = .{ .url = uri }, + .location = .{ .uri = uri }, .headers = headers, .payload = .none, }); From 670e920633785e14ec73fa7756442bd22b227d58 Mon Sep 17 00:00:00 2001 From: Pierre Tachoire Date: Fri, 22 Dec 2023 09:31:07 +0100 Subject: [PATCH 13/46] browser: rename log scope --- src/browser/browser.zig | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/browser/browser.zig b/src/browser/browser.zig index 19ea16ec..7f8c3aae 100644 --- a/src/browser/browser.zig +++ b/src/browser/browser.zig @@ -16,7 +16,7 @@ const Walker = @import("../dom/html_collection.zig").WalkerDepthFirst; const FetchResult = std.http.Client.FetchResult; -const log = std.log.scoped(.lpd_browser); +const log = std.log.scoped(.browser); // Browser is an instance of the browser. // You can create multiple browser instances. From 51cd959e8c919ce85cd298b81ba45ffe4959063e Mon Sep 17 00:00:00 2001 From: Pierre Tachoire Date: Fri, 22 Dec 2023 09:36:28 +0100 Subject: [PATCH 14/46] browser: catch evalJS error and log them --- src/browser/browser.zig | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/src/browser/browser.zig b/src/browser/browser.zig index 7f8c3aae..92536c29 100644 --- a/src/browser/browser.zig +++ b/src/browser/browser.zig @@ -262,7 +262,7 @@ pub const Page = struct { // > immediately before the browser continues to parse the // > page. // https://developer.mozilla.org/en-US/docs/Web/HTML/Element/script#notes - try self.evalScript(e); + self.evalScript(e) catch |err| log.warn("evaljs: {any}", .{err}); }, else => continue, } @@ -277,7 +277,7 @@ pub const Page = struct { // eval async scripts. for (sasync.items) |e| { - try self.evalScript(e); + self.evalScript(e) catch |err| log.warn("evaljs: {any}", .{err}); } // TODO wait for async scripts From 0652faaf20293a0797e241b3f433e820325285b6 Mon Sep 17 00:00:00 2001 From: Pierre Tachoire Date: Fri, 22 Dec 2023 09:39:06 +0100 Subject: [PATCH 15/46] browser: more consitent eval js result log --- src/browser/browser.zig | 12 ++++++++++-- 1 file changed, 10 insertions(+), 2 deletions(-) diff --git a/src/browser/browser.zig b/src/browser/browser.zig index 92536c29..690ec1d0 100644 --- a/src/browser/browser.zig +++ b/src/browser/browser.zig @@ -317,7 +317,11 @@ pub const Page = struct { try self.env.run(self.allocator, fetchres.body.?, src, &res, null); defer res.deinit(self.allocator); - log.debug("eval remote {s}: {s}", .{ src, res.result }); + if (res.success) { + log.debug("eval remote {s}: {s}", .{ src, res.result }); + } else { + log.info("eval remote {s}: {s}", .{ src, res.result }); + } // TODO If el's from an external file is true, then fire an event // named load at el. @@ -332,7 +336,11 @@ pub const Page = struct { try self.env.run(self.allocator, text, "", &res, null); defer res.deinit(self.allocator); - log.debug("eval inline: {s}", .{res.result}); + if (res.success) { + log.debug("eval inline: {s}", .{res.result}); + } else { + log.info("eval inline: {s}", .{res.result}); + } return; } From 3be4e050eec6527ad958e2797f81fb3a31f1e88c Mon Sep 17 00:00:00 2001 From: Pierre Tachoire Date: Fri, 22 Dec 2023 14:56:47 +0100 Subject: [PATCH 16/46] browser: add a mime type parser --- src/browser/mime.zig | 132 +++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 132 insertions(+) create mode 100644 src/browser/mime.zig diff --git a/src/browser/mime.zig b/src/browser/mime.zig new file mode 100644 index 00000000..77c8fd47 --- /dev/null +++ b/src/browser/mime.zig @@ -0,0 +1,132 @@ +const std = @import("std"); +const testing = std.testing; + +const Self = @This(); + +const MimeError = error{ + Empty, + TooBig, + InvalidChar, + Invalid, +}; + +mtype: []const u8, +msubtype: []const u8, +params: []const u8, + +// https://mimesniff.spec.whatwg.org/#http-token-code-point +fn isHTTPCodePoint(c: u8) bool { + return switch (c) { + '!', '#', '$', '%', '&', '\'', '*', '+', '-', '.', '^' => return true, + '_', '`', '|', '~' => return true, + else => std.ascii.isAlphanumeric(c), + }; +} + +// https://mimesniff.spec.whatwg.org/#parsing-a-mime-type +// The parser disallows trailing spaces. +pub fn parse(s: []const u8) Self.MimeError!Self { + const ln = s.len; + if (ln == 0) return MimeError.Empty; + // limit input size + if (ln > 255) return MimeError.TooBig; + + const states = enum { startmtype, mtype, startmsubtype, msubtype, startparams, params }; + var state: states = .startmtype; + + var res = Self{ + .mtype = "", + .msubtype = "", + .params = "", + }; + + var i: usize = 0; + var start: usize = 0; + while (i < ln) { + defer i += 1; + const c = s[i]; + switch (state) { + .startmtype => { + // ignore leading spaces + if (std.ascii.isWhitespace(c)) continue; + if (!isHTTPCodePoint(c)) return MimeError.InvalidChar; + state = .mtype; + start = i; + }, + .mtype => { + if (c == '/') { + if (start == i - 1) return MimeError.Empty; + res.mtype = s[start..i]; + state = .startmsubtype; + continue; + } + if (!isHTTPCodePoint(c)) return MimeError.InvalidChar; + }, + .startmsubtype => { + // ignore leading spaces + if (std.ascii.isWhitespace(c)) continue; + if (!isHTTPCodePoint(c)) return MimeError.InvalidChar; + state = .msubtype; + start = i; + }, + .msubtype => { + if (c == ';') { + if (start == i - 1) return MimeError.Empty; + res.msubtype = s[start..i]; + state = .startparams; + continue; + } + }, + .startparams => { + // ignore leading spaces + if (std.ascii.isWhitespace(c)) continue; + if (!isHTTPCodePoint(c)) return MimeError.InvalidChar; + state = .msubtype; + start = i; + }, + .params => { + if (start == i - 1) return MimeError.Empty; + //TODO parse params + res.params = s[i..]; + }, + } + } + + if (state != .msubtype and state != .params) { + return MimeError.Invalid; + } + + if (state == .msubtype) { + if (start == i - 1) return MimeError.Invalid; + res.msubtype = s[start..i]; + } + + return res; +} + +test "parse valid" { + for ([_][]const u8{ + "text/html", + "text/javascript1.1", + "text/plain; charset=UTF-8", + " \ttext/html", + "text/ \thtml", + }) |tc| { + std.debug.print("case {s}\n", .{tc}); + const m = try Self.parse(tc); + std.debug.print("res: {s}/{s}\n", .{ m.mtype, m.msubtype }); + } +} + +test "parse invalid" { + for ([_][]const u8{ + "", + "text/html;", + "/text/html", + "/html", + }) |tc| { + std.debug.print("case {s}\n", .{tc}); + _ = Self.parse(tc) catch continue; + try testing.expect(false); + } +} From c94528dbd9e6283fcb6a8a0b31d69e785cb51755 Mon Sep 17 00:00:00 2001 From: Pierre Tachoire Date: Fri, 22 Dec 2023 16:04:51 +0100 Subject: [PATCH 17/46] browser: refacto mime type parser --- src/browser/mime.zig | 191 ++++++++++++++++++++++++++++--------------- 1 file changed, 123 insertions(+), 68 deletions(-) diff --git a/src/browser/mime.zig b/src/browser/mime.zig index 77c8fd47..16518e46 100644 --- a/src/browser/mime.zig +++ b/src/browser/mime.zig @@ -6,14 +6,99 @@ const Self = @This(); const MimeError = error{ Empty, TooBig, - InvalidChar, Invalid, + InvalidChar, }; mtype: []const u8, msubtype: []const u8, params: []const u8, +const reader = struct { + s: []const u8, + i: usize = 0, + + fn until(self: *reader, c: u8) []const u8 { + const ln = self.s.len; + const start = self.i; + while (self.i < ln) { + if (c == self.s[self.i]) return self.s[start..self.i]; + self.i += 1; + } + + return self.s[start..self.i]; + } + + fn tail(self: *reader) []const u8 { + if (self.i > self.s.len) return ""; + defer self.i = self.s.len; + return self.s[self.i..]; + } + + fn skip(self: *reader) bool { + if (self.i >= self.s.len) return false; + self.i += 1; + return true; + } +}; + +test "reader.skip" { + var r = reader{ .s = "foo" }; + try testing.expect(r.skip()); + try testing.expect(r.skip()); + try testing.expect(r.skip()); + try testing.expect(!r.skip()); + try testing.expect(!r.skip()); +} + +test "reader.tail" { + var r = reader{ .s = "foo" }; + try testing.expectEqualStrings("foo", r.tail()); + try testing.expectEqualStrings("", r.tail()); +} + +test "reader.until" { + var r = reader{ .s = "foo.bar.baz" }; + try testing.expectEqualStrings("foo", r.until('.')); + _ = r.skip(); + try testing.expectEqualStrings("bar", r.until('.')); + _ = r.skip(); + try testing.expectEqualStrings("baz", r.until('.')); + + r = reader{ .s = "foo" }; + try testing.expectEqualStrings("foo", r.until('.')); + + r = reader{ .s = "" }; + try testing.expectEqualStrings("", r.until('.')); +} + +fn trim(s: []const u8) []const u8 { + const ln = s.len; + if (ln == 0) { + return ""; + } + var start: usize = 0; + while (start < ln) { + if (!std.ascii.isWhitespace(s[start])) break; + start += 1; + } + + var end: usize = ln; + while (end > 0) { + if (!std.ascii.isWhitespace(s[end - 1])) break; + end -= 1; + } + + return s[start..end]; +} + +test "trim" { + try testing.expectEqualStrings("", trim("")); + try testing.expectEqualStrings("foo", trim("foo")); + try testing.expectEqualStrings("foo", trim(" \n\tfoo")); + try testing.expectEqualStrings("foo", trim("foo \n\t")); +} + // https://mimesniff.spec.whatwg.org/#http-token-code-point fn isHTTPCodePoint(c: u8) bool { return switch (c) { @@ -23,83 +108,43 @@ fn isHTTPCodePoint(c: u8) bool { }; } +fn valid(s: []const u8) bool { + const ln = s.len; + var i: usize = 0; + while (i < ln) { + if (!isHTTPCodePoint(s[i])) return false; + i += 1; + } + return true; +} + // https://mimesniff.spec.whatwg.org/#parsing-a-mime-type -// The parser disallows trailing spaces. pub fn parse(s: []const u8) Self.MimeError!Self { const ln = s.len; if (ln == 0) return MimeError.Empty; // limit input size if (ln > 255) return MimeError.TooBig; - const states = enum { startmtype, mtype, startmsubtype, msubtype, startparams, params }; - var state: states = .startmtype; - var res = Self{ .mtype = "", .msubtype = "", .params = "", }; - var i: usize = 0; - var start: usize = 0; - while (i < ln) { - defer i += 1; - const c = s[i]; - switch (state) { - .startmtype => { - // ignore leading spaces - if (std.ascii.isWhitespace(c)) continue; - if (!isHTTPCodePoint(c)) return MimeError.InvalidChar; - state = .mtype; - start = i; - }, - .mtype => { - if (c == '/') { - if (start == i - 1) return MimeError.Empty; - res.mtype = s[start..i]; - state = .startmsubtype; - continue; - } - if (!isHTTPCodePoint(c)) return MimeError.InvalidChar; - }, - .startmsubtype => { - // ignore leading spaces - if (std.ascii.isWhitespace(c)) continue; - if (!isHTTPCodePoint(c)) return MimeError.InvalidChar; - state = .msubtype; - start = i; - }, - .msubtype => { - if (c == ';') { - if (start == i - 1) return MimeError.Empty; - res.msubtype = s[start..i]; - state = .startparams; - continue; - } - }, - .startparams => { - // ignore leading spaces - if (std.ascii.isWhitespace(c)) continue; - if (!isHTTPCodePoint(c)) return MimeError.InvalidChar; - state = .msubtype; - start = i; - }, - .params => { - if (start == i - 1) return MimeError.Empty; - //TODO parse params - res.params = s[i..]; - }, - } - } + var r = reader{ .s = s }; - if (state != .msubtype and state != .params) { - return MimeError.Invalid; - } + res.mtype = trim(r.until('/')); + if (res.mtype.len == 0) return MimeError.Invalid; + if (!valid(res.mtype)) return MimeError.InvalidChar; - if (state == .msubtype) { - if (start == i - 1) return MimeError.Invalid; - res.msubtype = s[start..i]; - } + if (!r.skip()) return MimeError.Invalid; + res.msubtype = trim(r.until(';')); + if (res.msubtype.len == 0) return MimeError.Invalid; + if (!valid(res.msubtype)) return MimeError.InvalidChar; + + if (!r.skip()) return res; + res.params = trim(r.tail()); + if (res.params.len == 0) return MimeError.Invalid; return res; } @@ -107,25 +152,35 @@ pub fn parse(s: []const u8) Self.MimeError!Self { test "parse valid" { for ([_][]const u8{ "text/html", - "text/javascript1.1", - "text/plain; charset=UTF-8", " \ttext/html", + "text \t/html", "text/ \thtml", + "text/html \t", }) |tc| { - std.debug.print("case {s}\n", .{tc}); const m = try Self.parse(tc); - std.debug.print("res: {s}/{s}\n", .{ m.mtype, m.msubtype }); + try testing.expectEqualStrings("text", m.mtype); + try testing.expectEqualStrings("html", m.msubtype); } + const m2 = try Self.parse("text/javascript1.5"); + try testing.expectEqualStrings("text", m2.mtype); + try testing.expectEqualStrings("javascript1.5", m2.msubtype); + + const m3 = try Self.parse("text/html; charset=UTF-8"); + try testing.expectEqualStrings("text", m3.mtype); + try testing.expectEqualStrings("html", m3.msubtype); + try testing.expectEqualStrings("charset=UTF-8", m3.params); } test "parse invalid" { for ([_][]const u8{ "", + "te xt/html;", + "te@xt/html;", + "text/ht@ml;", "text/html;", "/text/html", "/html", }) |tc| { - std.debug.print("case {s}\n", .{tc}); _ = Self.parse(tc) catch continue; try testing.expect(false); } From 20b2bfa00e227f1208faceb278f56d74910b7675 Mon Sep 17 00:00:00 2001 From: Pierre Tachoire Date: Fri, 22 Dec 2023 16:21:39 +0100 Subject: [PATCH 18/46] browser: parse http content-type --- src/browser/browser.zig | 17 ++++++++++++++--- src/browser/mime.zig | 16 ++++++++++------ 2 files changed, 24 insertions(+), 9 deletions(-) diff --git a/src/browser/browser.zig b/src/browser/browser.zig index 690ec1d0..d52845fa 100644 --- a/src/browser/browser.zig +++ b/src/browser/browser.zig @@ -2,6 +2,7 @@ const std = @import("std"); const parser = @import("../netsurf.zig"); const Loader = @import("loader.zig").Loader; +const Mime = @import("mime.zig"); const jsruntime = @import("jsruntime"); const Loop = jsruntime.Loop; @@ -166,9 +167,19 @@ pub const Page = struct { // TODO handle charset // https://html.spec.whatwg.org/#content-type - - // TODO check content-type - try self.loadHTMLDoc(&result); + const ct = result.headers.getFirstValue("Content-Type") orelse { + // no content type in HTTP headers. + // TODO try to sniff mime type from the body. + log.info("no content-type HTTP header", .{}); + return; + }; + const mime = try Mime.parse(ct); + if (mime.eql(Mime.HTML)) { + // TODO check content-type + try self.loadHTMLDoc(&result); + } else { + log.info("none HTML document: {s}", .{ct}); + } } // https://html.spec.whatwg.org/#read-html diff --git a/src/browser/mime.zig b/src/browser/mime.zig index 16518e46..da99e1e4 100644 --- a/src/browser/mime.zig +++ b/src/browser/mime.zig @@ -14,6 +14,9 @@ mtype: []const u8, msubtype: []const u8, params: []const u8, +pub const HTML = Self{ .mtype = "text", .msubtype = "html", .params = "" }; +pub const Javascript = Self{ .mtype = "application", .msubtype = "javascript", .params = "" }; + const reader = struct { s: []const u8, i: usize = 0, @@ -125,12 +128,7 @@ pub fn parse(s: []const u8) Self.MimeError!Self { // limit input size if (ln > 255) return MimeError.TooBig; - var res = Self{ - .mtype = "", - .msubtype = "", - .params = "", - }; - + var res = Self{ .mtype = "", .msubtype = "", .params = "" }; var r = reader{ .s = s }; res.mtype = trim(r.until('/')); @@ -185,3 +183,9 @@ test "parse invalid" { try testing.expect(false); } } + +// Compare type and subtype. +pub fn eql(self: Self, b: Self) bool { + if (!std.mem.eql(u8, self.mtype, b.mtype)) return false; + return std.mem.eql(u8, self.msubtype, b.msubtype); +} From c2ade9061b12e841d3a27b29acfaa51452bb780a Mon Sep 17 00:00:00 2001 From: Pierre Tachoire Date: Fri, 5 Jan 2024 11:04:17 +0100 Subject: [PATCH 19/46] move window from nav to html --- src/apiweb.zig | 2 -- src/browser/browser.zig | 2 +- src/html/html.zig | 2 ++ src/{nav => html}/window.zig | 0 src/main_wpt.zig | 1 - src/nav/nav.zig | 7 ------- 6 files changed, 3 insertions(+), 11 deletions(-) rename src/{nav => html}/window.zig (100%) delete mode 100644 src/nav/nav.zig diff --git a/src/apiweb.zig b/src/apiweb.zig index 32831909..08f4c3f5 100644 --- a/src/apiweb.zig +++ b/src/apiweb.zig @@ -4,7 +4,6 @@ const Console = @import("jsruntime").Console; const DOM = @import("dom/dom.zig"); const HTML = @import("html/html.zig"); -const nav = @import("nav/nav.zig"); pub const HTMLDocument = @import("html/document.zig").HTMLDocument; @@ -13,5 +12,4 @@ pub const Interfaces = generate.Tuple(.{ Console, DOM.Interfaces, HTML.Interfaces, - nav.Interfaces, }); diff --git a/src/browser/browser.zig b/src/browser/browser.zig index d52845fa..2111e290 100644 --- a/src/browser/browser.zig +++ b/src/browser/browser.zig @@ -12,7 +12,7 @@ const TPL = jsruntime.TPL; const apiweb = @import("../apiweb.zig"); const apis = jsruntime.compile(apiweb.Interfaces); -const Window = @import("../nav/window.zig").Window; +const Window = @import("../html/window.zig").Window; const Walker = @import("../dom/html_collection.zig").WalkerDepthFirst; const FetchResult = std.http.Client.FetchResult; diff --git a/src/html/html.zig b/src/html/html.zig index a77e2cea..90d96f2b 100644 --- a/src/html/html.zig +++ b/src/html/html.zig @@ -2,10 +2,12 @@ const generate = @import("../generate.zig"); const HTMLDocument = @import("document.zig").HTMLDocument; const HTMLElem = @import("elements.zig"); +const Window = @import("window.zig").Window; pub const Interfaces = generate.Tuple(.{ HTMLDocument, HTMLElem.HTMLElement, HTMLElem.HTMLMediaElement, HTMLElem.Interfaces, + Window, }); diff --git a/src/nav/window.zig b/src/html/window.zig similarity index 100% rename from src/nav/window.zig rename to src/html/window.zig diff --git a/src/main_wpt.zig b/src/main_wpt.zig index e252ad52..4d4dde57 100644 --- a/src/main_wpt.zig +++ b/src/main_wpt.zig @@ -7,7 +7,6 @@ const FileLoader = @import("wpt/fileloader.zig").FileLoader; const wpt = @import("wpt/run.zig"); const apiweb = @import("apiweb.zig"); -const nav = @import("nav/nav.zig"); const HTMLElem = @import("html/elements.zig"); const wpt_dir = "tests/wpt"; diff --git a/src/nav/nav.zig b/src/nav/nav.zig deleted file mode 100644 index 9aa96f3a..00000000 --- a/src/nav/nav.zig +++ /dev/null @@ -1,7 +0,0 @@ -const generate = @import("../generate.zig"); - -const Window = @import("window.zig").Window; - -pub const Interfaces = generate.Tuple(.{ - Window, -}); From b96a251c37bee7da30903b747d5f80cd86e22990 Mon Sep 17 00:00:00 2001 From: Pierre Tachoire Date: Fri, 5 Jan 2024 11:44:50 +0100 Subject: [PATCH 20/46] main: global document must be the html doc --- src/browser/browser.zig | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/browser/browser.zig b/src/browser/browser.zig index 2111e290..92e34f19 100644 --- a/src/browser/browser.zig +++ b/src/browser/browser.zig @@ -206,7 +206,7 @@ pub const Page = struct { log.debug("setup global env", .{}); try self.env.addObject(apis, self.window, "window"); try self.env.addObject(apis, self.window, "self"); - try self.env.addObject(apis, doc, "document"); + try self.env.addObject(apis, html_doc, "document"); // browse the DOM tree to retrieve scripts var sasync = std.ArrayList(*parser.Element).init(self.allocator); From b778749511b6105f97eba7a143ea55d0e6c2186a Mon Sep 17 00:00:00 2001 From: Pierre Tachoire Date: Fri, 5 Jan 2024 11:45:57 +0100 Subject: [PATCH 21/46] browser: remove useless switch --- src/browser/browser.zig | 97 ++++++++++++++++++++--------------------- 1 file changed, 48 insertions(+), 49 deletions(-) diff --git a/src/browser/browser.zig b/src/browser/browser.zig index 92e34f19..7060b8c7 100644 --- a/src/browser/browser.zig +++ b/src/browser/browser.zig @@ -225,58 +225,57 @@ pub const Page = struct { const e = parser.nodeToElement(next.?); const tag = try parser.elementHTMLGetTagType(@as(*parser.ElementHTML, @ptrCast(e))); - switch (tag) { - .script => { - // ignore non-js script. - // > type - // > Attribute is not set (default), an empty string, or a JavaScript MIME - // > type indicates that the script is a "classic script", containing - // > JavaScript code. - // https://developer.mozilla.org/en-US/docs/Web/HTML/Element/script#attribute_is_not_set_default_an_empty_string_or_a_javascript_mime_type - const stype = try parser.elementGetAttribute(e, "type"); - if (!isJS(stype)) { - continue; - } - // Ignore the defer attribute b/c we analyze all script - // after the document has been parsed. - // https://developer.mozilla.org/en-US/docs/Web/HTML/Element/script#defer + // ignore non-script tags + if (tag != .script) continue; - // TODO use fetchpriority - // https://developer.mozilla.org/en-US/docs/Web/HTML/Element/script#fetchpriority - - // > async - // > For classic scripts, if the async attribute is present, - // > then the classic script will be fetched in parallel to - // > parsing and evaluated as soon as it is available. - // https://developer.mozilla.org/en-US/docs/Web/HTML/Element/script#async - if (try parser.elementGetAttribute(e, "async") != null) { - try sasync.append(e); - continue; - } - - // TODO handle for attribute - // TODO handle event attribute - - // TODO defer - // > This Boolean attribute is set to indicate to a browser - // > that the script is meant to be executed after the - // > document has been parsed, but before firing - // > DOMContentLoaded. - // https://developer.mozilla.org/en-US/docs/Web/HTML/Element/script#defer - // defer allow us to load a script w/o blocking the rest of - // evaluations. - - // > Scripts without async, defer or type="module" - // > attributes, as well as inline scripts without the - // > type="module" attribute, are fetched and executed - // > immediately before the browser continues to parse the - // > page. - // https://developer.mozilla.org/en-US/docs/Web/HTML/Element/script#notes - self.evalScript(e) catch |err| log.warn("evaljs: {any}", .{err}); - }, - else => continue, + // ignore non-js script. + // > type + // > Attribute is not set (default), an empty string, or a JavaScript MIME + // > type indicates that the script is a "classic script", containing + // > JavaScript code. + // https://developer.mozilla.org/en-US/docs/Web/HTML/Element/script#attribute_is_not_set_default_an_empty_string_or_a_javascript_mime_type + const stype = try parser.elementGetAttribute(e, "type"); + if (!isJS(stype)) { + continue; } + + // Ignore the defer attribute b/c we analyze all script + // after the document has been parsed. + // https://developer.mozilla.org/en-US/docs/Web/HTML/Element/script#defer + + // TODO use fetchpriority + // https://developer.mozilla.org/en-US/docs/Web/HTML/Element/script#fetchpriority + + // > async + // > For classic scripts, if the async attribute is present, + // > then the classic script will be fetched in parallel to + // > parsing and evaluated as soon as it is available. + // https://developer.mozilla.org/en-US/docs/Web/HTML/Element/script#async + if (try parser.elementGetAttribute(e, "async") != null) { + try sasync.append(e); + continue; + } + + // TODO handle for attribute + // TODO handle event attribute + + // TODO defer + // > This Boolean attribute is set to indicate to a browser + // > that the script is meant to be executed after the + // > document has been parsed, but before firing + // > DOMContentLoaded. + // https://developer.mozilla.org/en-US/docs/Web/HTML/Element/script#defer + // defer allow us to load a script w/o blocking the rest of + // evaluations. + + // > Scripts without async, defer or type="module" + // > attributes, as well as inline scripts without the + // > type="module" attribute, are fetched and executed + // > immediately before the browser continues to parse the + // > page. + // https://developer.mozilla.org/en-US/docs/Web/HTML/Element/script#notes + self.evalScript(e) catch |err| log.warn("evaljs: {any}", .{err}); } // TODO wait for deferred scripts From 2aefd3736c0d96ac4764a4d7b727122d624203f4 Mon Sep 17 00:00:00 2001 From: Pierre Tachoire Date: Fri, 5 Jan 2024 11:54:19 +0100 Subject: [PATCH 22/46] typo fix --- src/browser/browser.zig | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/browser/browser.zig b/src/browser/browser.zig index 7060b8c7..a70969d9 100644 --- a/src/browser/browser.zig +++ b/src/browser/browser.zig @@ -178,7 +178,7 @@ pub const Page = struct { // TODO check content-type try self.loadHTMLDoc(&result); } else { - log.info("none HTML document: {s}", .{ct}); + log.info("non-HTML document: {s}", .{ct}); } } From 2cdbf68526b9dbaeb8396cf6a5f734c95f354c86 Mon Sep 17 00:00:00 2001 From: Pierre Tachoire Date: Fri, 5 Jan 2024 16:28:33 +0100 Subject: [PATCH 23/46] browser: add a func to dump HTML --- src/browser/dump.zig | 94 ++++++++++++++++++++++++++++++++++++++++++++ src/run_tests.zig | 9 +++++ 2 files changed, 103 insertions(+) create mode 100644 src/browser/dump.zig diff --git a/src/browser/dump.zig b/src/browser/dump.zig new file mode 100644 index 00000000..d40cc2f0 --- /dev/null +++ b/src/browser/dump.zig @@ -0,0 +1,94 @@ +const std = @import("std"); +const File = std.fs.File; + +const parser = @import("../netsurf.zig"); +const Walker = @import("../dom/html_collection.zig").WalkerChildren; + +pub fn htmlFile(root: *parser.Element, out: File) !void { + try out.writeAll("\n"); + try nodeFile(root, out); + try out.writeAll("\n"); +} + +fn nodeFile(root: *parser.Element, out: File) !void { + const walker = Walker{}; + var next: ?*parser.Node = null; + while (true) { + next = try walker.get_next(parser.elementToNode(root), next) orelse break; + switch (try parser.nodeType(next.?)) { + .element => { + // open the tag + const tag = try parser.nodeLocalName(next.?); + try out.writeAll("<"); + try out.writeAll(tag); + + // write the attributes + const map = try parser.nodeGetAttributes(next.?); + const ln = try parser.namedNodeMapGetLength(map); + var i: u32 = 0; + while (i < ln) { + const attr = try parser.namedNodeMapItem(map, i) orelse break; + try out.writeAll(" "); + try out.writeAll(try parser.attributeGetName(attr)); + try out.writeAll("=\""); + try out.writeAll(try parser.attributeGetValue(attr)); + try out.writeAll("\""); + i += 1; + } + + try out.writeAll(">"); + + // write the children + // TODO avoid recursion + try nodeFile(parser.nodeToElement(next.?), out); + + // close the tag + try out.writeAll(""); + }, + .text => { + const v = try parser.nodeValue(next.?) orelse continue; + try out.writeAll(v); + }, + .cdata_section => { + const v = try parser.nodeValue(next.?) orelse continue; + try out.writeAll(""); + }, + .comment => { + const v = try parser.nodeValue(next.?) orelse continue; + try out.writeAll(""); + }, + // TODO handle processing instruction dump + .processing_instruction => continue, + // document fragment is outside of the main document DOM, so we + // don't output it. + .document_fragment => continue, + // document will never be called, but required for completeness. + .document => continue, + // done globally instead, but required for completeness. + .document_type => continue, + // deprecated + .attribute => continue, + .entity_reference => continue, + .entity => continue, + .notation => continue, + } + } +} + +// HTMLFileTestFn is run by run_tests.zig +pub fn HTMLFileTestFn(out: File) !void { + const doc_html = try parser.documentHTMLParseFromFileAlloc(std.testing.allocator, "test.html"); + // ignore close error + defer parser.documentHTMLClose(doc_html) catch {}; + + const doc = parser.documentHTMLToDocument(doc_html); + const root = try parser.documentGetDocumentElement(doc) orelse return error.DocumentNullRoot; + + try htmlFile(root, out); +} diff --git a/src/run_tests.zig b/src/run_tests.zig index 08eaea3b..3a8fc499 100644 --- a/src/run_tests.zig +++ b/src/run_tests.zig @@ -122,3 +122,12 @@ test "bug document html parsing #4" { doc = try parser.documentHTMLParse(file.reader(), "UTF-8"); parser.documentHTMLClose(doc) catch {}; } + +const dump = @import("browser/dump.zig"); +test "run browser tests" { + // const out = std.io.getStdOut(); + const out = try std.fs.openFileAbsolute("/dev/null", .{ .mode = .write_only }); + defer out.close(); + + try dump.HTMLFileTestFn(out); +} From cfacfc8db78bf0012ad17d95b0d2e151c1442f9c Mon Sep 17 00:00:00 2001 From: Pierre Tachoire Date: Fri, 5 Jan 2024 16:30:51 +0100 Subject: [PATCH 24/46] get: dump the HTML --- src/browser/browser.zig | 30 ++++++++++++++++++++++++++++-- src/browser/dump.zig | 2 +- src/main_get.zig | 1 + 3 files changed, 30 insertions(+), 3 deletions(-) diff --git a/src/browser/browser.zig b/src/browser/browser.zig index a70969d9..4ee6fb6e 100644 --- a/src/browser/browser.zig +++ b/src/browser/browser.zig @@ -2,6 +2,7 @@ const std = @import("std"); const parser = @import("../netsurf.zig"); const Loader = @import("loader.zig").Loader; +const Dump = @import("dump.zig"); const Mime = @import("mime.zig"); const jsruntime = @import("jsruntime"); @@ -113,11 +114,14 @@ pub const Page = struct { loader: *Loader, env: *Env, window: *Window, + doc: ?*parser.Document = null, // handle url rawuri: ?[]const u8 = null, uri: std.Uri = undefined, + raw_data: ?[]const u8 = null, + fn init( allocator: std.mem.Allocator, loader: *Loader, @@ -138,9 +142,25 @@ pub const Page = struct { } pub fn deinit(self: *Page) void { - if (self.url != null) { - self.allocator.free(self.url); + if (self.raw_data) |s| { + self.allocator.free(s); } + if (self.raw_data) |s| { + self.allocator.free(s); + } + } + + // dump writes the page content into the given file. + pub fn dump(self: *Page, out: std.fs.File) !void { + // no data loaded, nothin to do. + if (self.raw_data == null) return; + + // if no HTML document pointer available, dump the data content only. + if (self.doc == null) return try out.writeAll(self.raw_data.?); + + // if the page has a pointer to a document, dumps the HTML. + const root = try parser.documentGetDocumentElement(self.doc.?) orelse return; + try Dump.htmlFile(root, out); } // spec reference: https://html.spec.whatwg.org/#document-lifecycle @@ -165,6 +185,9 @@ pub const Page = struct { if (result.body == null) return error.NoBody; + // save the body into the page. + self.raw_data = try self.allocator.dupe(u8, result.body.?); + // TODO handle charset // https://html.spec.whatwg.org/#content-type const ct = result.headers.getFirstValue("Content-Type") orelse { @@ -188,6 +211,9 @@ pub const Page = struct { const html_doc = try parser.documentHTMLParseFromStrAlloc(self.allocator, result.body.?); const doc = parser.documentHTMLToDocument(html_doc); + // save a document's pointer in the page. + self.doc = doc; + // TODO set document.readyState to interactive // https://html.spec.whatwg.org/#reporting-document-loading-status diff --git a/src/browser/dump.zig b/src/browser/dump.zig index d40cc2f0..a7a52366 100644 --- a/src/browser/dump.zig +++ b/src/browser/dump.zig @@ -31,7 +31,7 @@ fn nodeFile(root: *parser.Element, out: File) !void { try out.writeAll(" "); try out.writeAll(try parser.attributeGetName(attr)); try out.writeAll("=\""); - try out.writeAll(try parser.attributeGetValue(attr)); + try out.writeAll(try parser.attributeGetValue(attr) orelse ""); try out.writeAll("\""); i += 1; } diff --git a/src/main_get.zig b/src/main_get.zig index 4c66a3a7..24b53695 100644 --- a/src/main_get.zig +++ b/src/main_get.zig @@ -56,4 +56,5 @@ pub fn main() !void { var page = try browser.currentSession().createPage(); defer page.end(); try page.navigate(url); + try page.dump(std.io.getStdOut()); } From e625e17597bf8dc7af62e3484820f9ffa609f21a Mon Sep 17 00:00:00 2001 From: Pierre Tachoire Date: Mon, 8 Jan 2024 11:48:33 +0100 Subject: [PATCH 25/46] get: add --dump option --- src/main_get.zig | 13 +++++++++++-- 1 file changed, 11 insertions(+), 2 deletions(-) diff --git a/src/main_get.zig b/src/main_get.zig index 24b53695..0c6e926b 100644 --- a/src/main_get.zig +++ b/src/main_get.zig @@ -9,7 +9,8 @@ const usage = \\usage: {s} [options] \\ request the url with the browser \\ - \\ -h, --help Print this help message and exit. + \\ -h, --help Print this help message and exit. + \\ --dump Dump document in stdout \\ ; @@ -28,12 +29,17 @@ pub fn main() !void { const execname = args.next().?; var url: []const u8 = ""; + var dump: bool = false; while (args.next()) |arg| { if (std.mem.eql(u8, "-h", arg) or std.mem.eql(u8, "--help", arg)) { try std.io.getStdErr().writer().print(usage, .{execname}); std.os.exit(0); } + if (std.mem.eql(u8, "--dump", arg)) { + dump = true; + continue; + } // allow only one url if (url.len != 0) { try std.io.getStdErr().writer().print(usage, .{execname}); @@ -56,5 +62,8 @@ pub fn main() !void { var page = try browser.currentSession().createPage(); defer page.end(); try page.navigate(url); - try page.dump(std.io.getStdOut()); + + if (dump) { + try page.dump(std.io.getStdOut()); + } } From 647fbc6cedad307a735d87123c140b629200cfb3 Mon Sep 17 00:00:00 2001 From: Pierre Tachoire Date: Tue, 9 Jan 2024 11:14:54 +0100 Subject: [PATCH 26/46] browser: update document parse signature --- src/browser/browser.zig | 2 +- src/browser/dump.zig | 5 ++++- 2 files changed, 5 insertions(+), 2 deletions(-) diff --git a/src/browser/browser.zig b/src/browser/browser.zig index 4ee6fb6e..62d4d4fd 100644 --- a/src/browser/browser.zig +++ b/src/browser/browser.zig @@ -208,7 +208,7 @@ pub const Page = struct { // https://html.spec.whatwg.org/#read-html fn loadHTMLDoc(self: *Page, result: *FetchResult) !void { log.debug("parse html", .{}); - const html_doc = try parser.documentHTMLParseFromStrAlloc(self.allocator, result.body.?); + const html_doc = try parser.documentHTMLParseFromStr(result.body.?); const doc = parser.documentHTMLToDocument(html_doc); // save a document's pointer in the page. diff --git a/src/browser/dump.zig b/src/browser/dump.zig index a7a52366..a23bd5b9 100644 --- a/src/browser/dump.zig +++ b/src/browser/dump.zig @@ -83,7 +83,10 @@ fn nodeFile(root: *parser.Element, out: File) !void { // HTMLFileTestFn is run by run_tests.zig pub fn HTMLFileTestFn(out: File) !void { - const doc_html = try parser.documentHTMLParseFromFileAlloc(std.testing.allocator, "test.html"); + const file = try std.fs.cwd().openFile("test.html", .{}); + defer file.close(); + + const doc_html = try parser.documentHTMLParse(file.reader()); // ignore close error defer parser.documentHTMLClose(doc_html) catch {}; From 3a3da494dc155923d9fd5dd8923dd91016657d9e Mon Sep 17 00:00:00 2001 From: Pierre Tachoire Date: Wed, 10 Jan 2024 18:09:02 +0100 Subject: [PATCH 27/46] get: upgrade jsruntime API changes --- src/browser/browser.zig | 16 ++++++++-------- src/main_get.zig | 4 ++++ 2 files changed, 12 insertions(+), 8 deletions(-) diff --git a/src/browser/browser.zig b/src/browser/browser.zig index 62d4d4fd..ea2f056c 100644 --- a/src/browser/browser.zig +++ b/src/browser/browser.zig @@ -1,5 +1,7 @@ const std = @import("std"); +const Types = @import("root").Types; + const parser = @import("../netsurf.zig"); const Loader = @import("loader.zig").Loader; const Dump = @import("dump.zig"); @@ -8,10 +10,8 @@ const Mime = @import("mime.zig"); const jsruntime = @import("jsruntime"); const Loop = jsruntime.Loop; const Env = jsruntime.Env; -const TPL = jsruntime.TPL; const apiweb = @import("../apiweb.zig"); -const apis = jsruntime.compile(apiweb.Interfaces); const Window = @import("../html/window.zig").Window; const Walker = @import("../dom/html_collection.zig").WalkerDepthFirst; @@ -61,12 +61,12 @@ pub const Browser = struct { pub const Session = struct { arena: std.heap.ArenaAllocator, uri: []const u8, - tpls: [apis.len]TPL = undefined, // TODO handle proxy loader: Loader = undefined, env: Env = undefined, loop: Loop = undefined, + jstypes: [Types.len]usize = undefined, window: Window, @@ -84,7 +84,7 @@ pub const Session = struct { self.loop = try Loop.init(aallocator); self.env = try Env.init(aallocator, &self.loop); - try self.env.load(apis, &self.tpls); + try self.env.load(&self.jstypes); return self; } @@ -226,13 +226,13 @@ pub const Page = struct { // start JS env log.debug("start js env", .{}); - try self.env.start(self.allocator, apis); + try self.env.start(self.allocator); // add global objects log.debug("setup global env", .{}); - try self.env.addObject(apis, self.window, "window"); - try self.env.addObject(apis, self.window, "self"); - try self.env.addObject(apis, html_doc, "document"); + try self.env.addObject(self.window, "window"); + try self.env.addObject(self.window, "self"); + try self.env.addObject(html_doc, "document"); // browse the DOM tree to retrieve scripts var sasync = std.ArrayList(*parser.Element).init(self.allocator); diff --git a/src/main_get.zig b/src/main_get.zig index 0c6e926b..a387df47 100644 --- a/src/main_get.zig +++ b/src/main_get.zig @@ -1,6 +1,10 @@ const std = @import("std"); const Browser = @import("browser/browser.zig").Browser; +const jsruntime = @import("jsruntime"); +const apiweb = @import("apiweb.zig"); +pub const Types = jsruntime.reflect(apiweb.Interfaces); + pub const std_options = struct { pub const log_level = .debug; }; From 5d262fc026781626ed62225eb2869f1fa7f04b5e Mon Sep 17 00:00:00 2001 From: Pierre Tachoire Date: Wed, 10 Jan 2024 12:38:56 +0100 Subject: [PATCH 28/46] loader: parse html per chunk --- src/browser/browser.zig | 34 ++++++++++++++++++---------------- src/browser/loader.zig | 34 +++++++++++++++++++++++++++++++++- 2 files changed, 51 insertions(+), 17 deletions(-) diff --git a/src/browser/browser.zig b/src/browser/browser.zig index ea2f056c..5f08b607 100644 --- a/src/browser/browser.zig +++ b/src/browser/browser.zig @@ -152,11 +152,13 @@ pub const Page = struct { // dump writes the page content into the given file. pub fn dump(self: *Page, out: std.fs.File) !void { - // no data loaded, nothin to do. - if (self.raw_data == null) return; // if no HTML document pointer available, dump the data content only. - if (self.doc == null) return try out.writeAll(self.raw_data.?); + if (self.doc == null) { + // no data loaded, nothing to do. + if (self.raw_data == null) return; + return try out.writeAll(self.raw_data.?); + } // if the page has a pointer to a document, dumps the HTML. const root = try parser.documentGetDocumentElement(self.doc.?) orelse return; @@ -175,22 +177,19 @@ pub const Page = struct { // TODO handle fragment in url. // load the data - var result = try self.loader.fetch(self.allocator, self.uri); - defer result.deinit(); + var resp = try self.loader.get(self.allocator, self.uri); + defer resp.deinit(); - log.info("GET {any} {d}", .{ self.uri, result.status }); + const req = resp.req; + + log.info("GET {any} {d}", .{ self.uri, req.response.status }); // TODO handle redirection - if (result.status != .ok) return error.BadStatusCode; - - if (result.body == null) return error.NoBody; - - // save the body into the page. - self.raw_data = try self.allocator.dupe(u8, result.body.?); + if (req.response.status != .ok) return error.BadStatusCode; // TODO handle charset // https://html.spec.whatwg.org/#content-type - const ct = result.headers.getFirstValue("Content-Type") orelse { + const ct = req.response.headers.getFirstValue("Content-Type") orelse { // no content type in HTTP headers. // TODO try to sniff mime type from the body. log.info("no content-type HTTP header", .{}); @@ -199,16 +198,19 @@ pub const Page = struct { const mime = try Mime.parse(ct); if (mime.eql(Mime.HTML)) { // TODO check content-type - try self.loadHTMLDoc(&result); + try self.loadHTMLDoc(req.reader()); } else { log.info("non-HTML document: {s}", .{ct}); + + // save the body into the page. + self.raw_data = try req.reader().readAllAlloc(self.allocator, 16 * 1024 * 1024); } } // https://html.spec.whatwg.org/#read-html - fn loadHTMLDoc(self: *Page, result: *FetchResult) !void { + fn loadHTMLDoc(self: *Page, reader: anytype) !void { log.debug("parse html", .{}); - const html_doc = try parser.documentHTMLParseFromStr(result.body.?); + const html_doc = try parser.documentHTMLParse(reader); const doc = parser.documentHTMLToDocument(html_doc); // save a document's pointer in the page. diff --git a/src/browser/loader.zig b/src/browser/loader.zig index 70bcd0aa..dddd6f01 100644 --- a/src/browser/loader.zig +++ b/src/browser/loader.zig @@ -6,10 +6,12 @@ pub const Loader = struct { client: std.http.Client, pub const Response = struct { - req: std.http.Request, + allocator: std.mem.Allocator, + req: *std.http.Client.Request, pub fn deinit(self: *Response) void { self.req.deinit(); + self.allocator.destroy(self.req); } }; @@ -40,6 +42,36 @@ pub const Loader = struct { .payload = .none, }); } + + // see + // https://ziglang.org/documentation/master/std/#A;std:http.Client.fetch + // for reference. + // The caller is responsible for calling `deinit()` on the `Response`. + pub fn get(self: *Loader, allocator: std.mem.Allocator, uri: std.Uri) !Response { + var headers = try std.http.Headers.initList(allocator, &[_]std.http.Field{ + .{ .name = "User-Agent", .value = user_agent }, + .{ .name = "Accept", .value = "*/*" }, + .{ .name = "Accept-Language", .value = "en-US,en;q=0.5" }, + }); + defer headers.deinit(); + + var resp = Response{ + .allocator = allocator, + .req = try allocator.create(std.http.Client.Request), + }; + errdefer allocator.destroy(resp.req); + + resp.req.* = try self.client.open(.GET, uri, headers, .{ + .handle_redirects = true, // TODO handle redirects manually + }); + errdefer resp.req.deinit(); + + try resp.req.send(.{}); + try resp.req.finish(); + try resp.req.wait(); + + return resp; + } }; test "basic url fetch" { From fb5fd6c7422bf28148d85dbfee64ed3bfb47ea69 Mon Sep 17 00:00:00 2001 From: Pierre Tachoire Date: Thu, 11 Jan 2024 11:00:07 +0100 Subject: [PATCH 29/46] browser: add async script comment --- src/browser/browser.zig | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/src/browser/browser.zig b/src/browser/browser.zig index 5f08b607..10006fec 100644 --- a/src/browser/browser.zig +++ b/src/browser/browser.zig @@ -237,6 +237,10 @@ pub const Page = struct { try self.env.addObject(html_doc, "document"); // browse the DOM tree to retrieve scripts + + // sasync stores scripts which can be run asynchronously. + // for now they are just run after the non-async one in order to + // dispatch DOMContentLoaded the sooner as possible. var sasync = std.ArrayList(*parser.Element).init(self.allocator); defer sasync.deinit(); From a7fe94db8851592200bac4a8bff9011903997324 Mon Sep 17 00:00:00 2001 From: Pierre Tachoire Date: Thu, 11 Jan 2024 11:39:49 +0100 Subject: [PATCH 30/46] browser: use directly the document as node --- src/browser/browser.zig | 7 +++---- src/browser/dump.zig | 17 ++++++++--------- 2 files changed, 11 insertions(+), 13 deletions(-) diff --git a/src/browser/browser.zig b/src/browser/browser.zig index 10006fec..abe7cb33 100644 --- a/src/browser/browser.zig +++ b/src/browser/browser.zig @@ -161,8 +161,7 @@ pub const Page = struct { } // if the page has a pointer to a document, dumps the HTML. - const root = try parser.documentGetDocumentElement(self.doc.?) orelse return; - try Dump.htmlFile(root, out); + try Dump.htmlFile(self.doc.?, out); } // spec reference: https://html.spec.whatwg.org/#document-lifecycle @@ -244,11 +243,11 @@ pub const Page = struct { var sasync = std.ArrayList(*parser.Element).init(self.allocator); defer sasync.deinit(); - const root = try parser.documentGetDocumentElement(doc) orelse return; // TODO send loaded event in this case? + const root = parser.documentToNode(doc); const walker = Walker{}; var next: ?*parser.Node = null; while (true) { - next = try walker.get_next(parser.elementToNode(root), next) orelse break; + next = try walker.get_next(root, next) orelse break; // ignore non-elements nodes. if (try parser.nodeType(next.?) != .element) { diff --git a/src/browser/dump.zig b/src/browser/dump.zig index a23bd5b9..cbf7e3f4 100644 --- a/src/browser/dump.zig +++ b/src/browser/dump.zig @@ -4,17 +4,17 @@ const File = std.fs.File; const parser = @import("../netsurf.zig"); const Walker = @import("../dom/html_collection.zig").WalkerChildren; -pub fn htmlFile(root: *parser.Element, out: File) !void { - try out.writeAll("\n"); - try nodeFile(root, out); - try out.writeAll("\n"); +pub fn htmlFile(doc: *parser.Document, out: File) !void { + try out.writeAll("\n"); + try nodeFile(parser.documentToNode(doc), out); + try out.writeAll("\n"); } -fn nodeFile(root: *parser.Element, out: File) !void { +fn nodeFile(root: *parser.Node, out: File) !void { const walker = Walker{}; var next: ?*parser.Node = null; while (true) { - next = try walker.get_next(parser.elementToNode(root), next) orelse break; + next = try walker.get_next(root, next) orelse break; switch (try parser.nodeType(next.?)) { .element => { // open the tag @@ -40,7 +40,7 @@ fn nodeFile(root: *parser.Element, out: File) !void { // write the children // TODO avoid recursion - try nodeFile(parser.nodeToElement(next.?), out); + try nodeFile(next.?, out); // close the tag try out.writeAll(" Date: Fri, 12 Jan 2024 16:12:14 +0100 Subject: [PATCH 31/46] browser: force UTF-8 by default for page encoding --- src/browser/browser.zig | 3 ++- src/browser/dump.zig | 2 +- 2 files changed, 3 insertions(+), 2 deletions(-) diff --git a/src/browser/browser.zig b/src/browser/browser.zig index abe7cb33..70edcd62 100644 --- a/src/browser/browser.zig +++ b/src/browser/browser.zig @@ -209,7 +209,8 @@ pub const Page = struct { // https://html.spec.whatwg.org/#read-html fn loadHTMLDoc(self: *Page, reader: anytype) !void { log.debug("parse html", .{}); - const html_doc = try parser.documentHTMLParse(reader); + // TODO pass an encoding detected from HTTP headers. + const html_doc = try parser.documentHTMLParse(reader, "UTF-8"); const doc = parser.documentHTMLToDocument(html_doc); // save a document's pointer in the page. diff --git a/src/browser/dump.zig b/src/browser/dump.zig index cbf7e3f4..3ed3de98 100644 --- a/src/browser/dump.zig +++ b/src/browser/dump.zig @@ -86,7 +86,7 @@ pub fn HTMLFileTestFn(out: File) !void { const file = try std.fs.cwd().openFile("test.html", .{}); defer file.close(); - const doc_html = try parser.documentHTMLParse(file.reader()); + const doc_html = try parser.documentHTMLParse(file.reader(), "UTF-8"); // ignore close error defer parser.documentHTMLClose(doc_html) catch {}; From 4408c3dc5da831110ae2656d5f1f477895954be2 Mon Sep 17 00:00:00 2001 From: Pierre Tachoire Date: Mon, 15 Jan 2024 16:49:37 +0100 Subject: [PATCH 32/46] browser: pass vm as argument --- src/browser/browser.zig | 16 ++++++---------- src/main_get.zig | 6 +++--- 2 files changed, 9 insertions(+), 13 deletions(-) diff --git a/src/browser/browser.zig b/src/browser/browser.zig index 70edcd62..71bcd434 100644 --- a/src/browser/browser.zig +++ b/src/browser/browser.zig @@ -22,21 +22,17 @@ const log = std.log.scoped(.browser); // Browser is an instance of the browser. // You can create multiple browser instances. -// It contains only one session but initVM() and deinitVM() must be called only -// once per main. +// A browser contains only one session. +// TODO allow multiple sessions per browser. pub const Browser = struct { allocator: std.mem.Allocator, session: *Session = undefined, - var vm: jsruntime.VM = undefined; - pub fn initVM() void { - vm = jsruntime.VM.init(); - } - pub fn deinitVM() void { - vm.deinit(); - } + pub fn init(allocator: std.mem.Allocator, vm: jsruntime.VM) !Browser { + // We want to ensure the caller initialised a VM, but the browser + // doesn't use it directly... + _ = vm; - pub fn init(allocator: std.mem.Allocator) !Browser { return Browser{ .allocator = allocator, .session = try Session.init(allocator, "about:blank"), diff --git a/src/main_get.zig b/src/main_get.zig index a387df47..bb84b0d8 100644 --- a/src/main_get.zig +++ b/src/main_get.zig @@ -57,10 +57,10 @@ pub fn main() !void { std.os.exit(1); } - Browser.initVM(); - defer Browser.deinitVM(); + const vm = jsruntime.VM.init(); + defer vm.deinit(); - var browser = try Browser.init(allocator); + var browser = try Browser.init(allocator, vm); defer browser.deinit(); var page = try browser.currentSession().createPage(); From 25f290283bbd8c16c3e70c5e38ce7cbe9efd6b42 Mon Sep 17 00:00:00 2001 From: Pierre Tachoire Date: Mon, 15 Jan 2024 16:54:01 +0100 Subject: [PATCH 33/46] browser: rename allocator to alloc --- src/browser/browser.zig | 50 ++++++++++++++++++++--------------------- src/browser/loader.zig | 24 ++++++++++---------- 2 files changed, 37 insertions(+), 37 deletions(-) diff --git a/src/browser/browser.zig b/src/browser/browser.zig index 71bcd434..bf7b4eaa 100644 --- a/src/browser/browser.zig +++ b/src/browser/browser.zig @@ -25,23 +25,23 @@ const log = std.log.scoped(.browser); // A browser contains only one session. // TODO allow multiple sessions per browser. pub const Browser = struct { - allocator: std.mem.Allocator, + alloc: std.mem.Allocator, session: *Session = undefined, - pub fn init(allocator: std.mem.Allocator, vm: jsruntime.VM) !Browser { + pub fn init(alloc: std.mem.Allocator, vm: jsruntime.VM) !Browser { // We want to ensure the caller initialised a VM, but the browser // doesn't use it directly... _ = vm; return Browser{ - .allocator = allocator, - .session = try Session.init(allocator, "about:blank"), + .alloc = alloc, + .session = try Session.init(alloc, "about:blank"), }; } pub fn deinit(self: *Browser) void { self.session.deinit(); - self.allocator.destroy(self.session); + self.alloc.destroy(self.session); } pub fn currentSession(self: *Browser) *Session { @@ -66,11 +66,11 @@ pub const Session = struct { window: Window, - fn init(allocator: std.mem.Allocator, uri: []const u8) !*Session { - var self = try allocator.create(Session); + fn init(alloc: std.mem.Allocator, uri: []const u8) !*Session { + var self = try alloc.create(Session); self.* = Session{ .uri = uri, - .arena = std.heap.ArenaAllocator.init(allocator), + .arena = std.heap.ArenaAllocator.init(alloc), .window = Window.create(null), }; @@ -106,7 +106,7 @@ pub const Session = struct { // You can navigates multiple urls with the same page, but you have to call // end() to stop the previous navigation before starting a new one. pub const Page = struct { - allocator: std.mem.Allocator, + alloc: std.mem.Allocator, loader: *Loader, env: *Env, window: *Window, @@ -119,13 +119,13 @@ pub const Page = struct { raw_data: ?[]const u8 = null, fn init( - allocator: std.mem.Allocator, + alloc: std.mem.Allocator, loader: *Loader, env: *Env, window: *Window, ) Page { return Page{ - .allocator = allocator, + .alloc = alloc, .loader = loader, .env = env, .window = window, @@ -139,10 +139,10 @@ pub const Page = struct { pub fn deinit(self: *Page) void { if (self.raw_data) |s| { - self.allocator.free(s); + self.alloc.free(s); } if (self.raw_data) |s| { - self.allocator.free(s); + self.alloc.free(s); } } @@ -165,14 +165,14 @@ pub const Page = struct { log.debug("starting GET {s}", .{uri}); // own the url - if (self.rawuri) |prev| self.allocator.free(prev); - self.rawuri = try self.allocator.dupe(u8, uri); + if (self.rawuri) |prev| self.alloc.free(prev); + self.rawuri = try self.alloc.dupe(u8, uri); self.uri = std.Uri.parse(self.rawuri.?) catch try std.Uri.parseWithoutScheme(self.rawuri.?); // TODO handle fragment in url. // load the data - var resp = try self.loader.get(self.allocator, self.uri); + var resp = try self.loader.get(self.alloc, self.uri); defer resp.deinit(); const req = resp.req; @@ -198,7 +198,7 @@ pub const Page = struct { log.info("non-HTML document: {s}", .{ct}); // save the body into the page. - self.raw_data = try req.reader().readAllAlloc(self.allocator, 16 * 1024 * 1024); + self.raw_data = try req.reader().readAllAlloc(self.alloc, 16 * 1024 * 1024); } } @@ -224,7 +224,7 @@ pub const Page = struct { // start JS env log.debug("start js env", .{}); - try self.env.start(self.allocator); + try self.env.start(self.alloc); // add global objects log.debug("setup global env", .{}); @@ -237,7 +237,7 @@ pub const Page = struct { // sasync stores scripts which can be run asynchronously. // for now they are just run after the non-async one in order to // dispatch DOMContentLoaded the sooner as possible. - var sasync = std.ArrayList(*parser.Element).init(self.allocator); + var sasync = std.ArrayList(*parser.Element).init(self.alloc); defer sasync.deinit(); const root = parser.documentToNode(doc); @@ -333,9 +333,9 @@ pub const Page = struct { log.debug("starting GET {s}", .{src}); const u = std.Uri.parse(src) catch try std.Uri.parseWithoutScheme(src); - const ru = try std.Uri.resolve(self.uri, u, false, self.allocator); + const ru = try std.Uri.resolve(self.uri, u, false, self.alloc); - var fetchres = try self.loader.fetch(self.allocator, ru); + var fetchres = try self.loader.fetch(self.alloc, ru); defer fetchres.deinit(); log.info("GET {any}: {d}", .{ ru, fetchres.status }); @@ -352,8 +352,8 @@ pub const Page = struct { if (fetchres.body == null) return; var res = jsruntime.JSResult{}; - try self.env.run(self.allocator, fetchres.body.?, src, &res, null); - defer res.deinit(self.allocator); + try self.env.run(self.alloc, fetchres.body.?, src, &res, null); + defer res.deinit(self.alloc); if (res.success) { log.debug("eval remote {s}: {s}", .{ src, res.result }); @@ -371,8 +371,8 @@ pub const Page = struct { if (opt_text) |text| { // TODO handle charset attribute var res = jsruntime.JSResult{}; - try self.env.run(self.allocator, text, "", &res, null); - defer res.deinit(self.allocator); + try self.env.run(self.alloc, text, "", &res, null); + defer res.deinit(self.alloc); if (res.success) { log.debug("eval inline: {s}", .{res.result}); diff --git a/src/browser/loader.zig b/src/browser/loader.zig index dddd6f01..4cfbdd9f 100644 --- a/src/browser/loader.zig +++ b/src/browser/loader.zig @@ -6,19 +6,19 @@ pub const Loader = struct { client: std.http.Client, pub const Response = struct { - allocator: std.mem.Allocator, + alloc: std.mem.Allocator, req: *std.http.Client.Request, pub fn deinit(self: *Response) void { self.req.deinit(); - self.allocator.destroy(self.req); + self.alloc.destroy(self.req); } }; - pub fn init(allocator: std.mem.Allocator) Loader { + pub fn init(alloc: std.mem.Allocator) Loader { return Loader{ .client = std.http.Client{ - .allocator = allocator, + .allocator = alloc, }, }; } @@ -28,15 +28,15 @@ pub const Loader = struct { } // the caller must deinit the FetchResult. - pub fn fetch(self: *Loader, allocator: std.mem.Allocator, uri: std.Uri) !std.http.Client.FetchResult { - var headers = try std.http.Headers.initList(allocator, &[_]std.http.Field{ + pub fn fetch(self: *Loader, alloc: std.mem.Allocator, uri: std.Uri) !std.http.Client.FetchResult { + var headers = try std.http.Headers.initList(alloc, &[_]std.http.Field{ .{ .name = "User-Agent", .value = user_agent }, .{ .name = "Accept", .value = "*/*" }, .{ .name = "Accept-Language", .value = "en-US,en;q=0.5" }, }); defer headers.deinit(); - return try self.client.fetch(allocator, .{ + return try self.client.fetch(alloc, .{ .location = .{ .uri = uri }, .headers = headers, .payload = .none, @@ -47,8 +47,8 @@ pub const Loader = struct { // https://ziglang.org/documentation/master/std/#A;std:http.Client.fetch // for reference. // The caller is responsible for calling `deinit()` on the `Response`. - pub fn get(self: *Loader, allocator: std.mem.Allocator, uri: std.Uri) !Response { - var headers = try std.http.Headers.initList(allocator, &[_]std.http.Field{ + pub fn get(self: *Loader, alloc: std.mem.Allocator, uri: std.Uri) !Response { + var headers = try std.http.Headers.initList(alloc, &[_]std.http.Field{ .{ .name = "User-Agent", .value = user_agent }, .{ .name = "Accept", .value = "*/*" }, .{ .name = "Accept-Language", .value = "en-US,en;q=0.5" }, @@ -56,10 +56,10 @@ pub const Loader = struct { defer headers.deinit(); var resp = Response{ - .allocator = allocator, - .req = try allocator.create(std.http.Client.Request), + .alloc = alloc, + .req = try alloc.create(std.http.Client.Request), }; - errdefer allocator.destroy(resp.req); + errdefer alloc.destroy(resp.req); resp.req.* = try self.client.open(.GET, uri, headers, .{ .handle_redirects = true, // TODO handle redirects manually From 07be51cd1d8a9dc4b5094c52819379fa3ea661b9 Mon Sep 17 00:00:00 2001 From: Pierre Tachoire Date: Mon, 15 Jan 2024 17:55:21 +0100 Subject: [PATCH 34/46] session: self destroy --- src/browser/browser.zig | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/src/browser/browser.zig b/src/browser/browser.zig index bf7b4eaa..0fa1285e 100644 --- a/src/browser/browser.zig +++ b/src/browser/browser.zig @@ -41,7 +41,6 @@ pub const Browser = struct { pub fn deinit(self: *Browser) void { self.session.deinit(); - self.alloc.destroy(self.session); } pub fn currentSession(self: *Browser) *Session { @@ -55,6 +54,7 @@ pub const Browser = struct { // You can create successively multiple pages for a session, but you must // deinit a page before running another one. pub const Session = struct { + alloc: std.mem.Allocator, arena: std.heap.ArenaAllocator, uri: []const u8, @@ -70,6 +70,7 @@ pub const Session = struct { var self = try alloc.create(Session); self.* = Session{ .uri = uri, + .alloc = alloc, .arena = std.heap.ArenaAllocator.init(alloc), .window = Window.create(null), }; @@ -90,6 +91,7 @@ pub const Session = struct { self.loop.deinit(); self.env.deinit(); self.arena.deinit(); + self.alloc.destroy(self); } pub fn createPage(self: *Session) !Page { From 2efda8f452f26c3ce9ba103b9a4da8d9e5ba487b Mon Sep 17 00:00:00 2001 From: Pierre Tachoire Date: Mon, 15 Jan 2024 17:55:35 +0100 Subject: [PATCH 35/46] browser: no need for allocator in the struct --- src/browser/browser.zig | 2 -- 1 file changed, 2 deletions(-) diff --git a/src/browser/browser.zig b/src/browser/browser.zig index 0fa1285e..1c1312c5 100644 --- a/src/browser/browser.zig +++ b/src/browser/browser.zig @@ -25,7 +25,6 @@ const log = std.log.scoped(.browser); // A browser contains only one session. // TODO allow multiple sessions per browser. pub const Browser = struct { - alloc: std.mem.Allocator, session: *Session = undefined, pub fn init(alloc: std.mem.Allocator, vm: jsruntime.VM) !Browser { @@ -34,7 +33,6 @@ pub const Browser = struct { _ = vm; return Browser{ - .alloc = alloc, .session = try Session.init(alloc, "about:blank"), }; } From 55f69b3ae7a0b660ffd053b7cec814879f9d7702 Mon Sep 17 00:00:00 2001 From: Pierre Tachoire Date: Mon, 15 Jan 2024 18:15:26 +0100 Subject: [PATCH 36/46] browser: create a fetchResult func --- src/browser/browser.zig | 74 ++++++++++++++++++++++++++--------------- 1 file changed, 48 insertions(+), 26 deletions(-) diff --git a/src/browser/browser.zig b/src/browser/browser.zig index 1c1312c5..f8472640 100644 --- a/src/browser/browser.zig +++ b/src/browser/browser.zig @@ -332,34 +332,18 @@ pub const Page = struct { if (opt_src) |src| { log.debug("starting GET {s}", .{src}); - const u = std.Uri.parse(src) catch try std.Uri.parseWithoutScheme(src); - const ru = try std.Uri.resolve(self.uri, u, false, self.alloc); + self.fetchScript(src) catch |err| { + switch (err) { + FetchError.BadStatusCode => return err, - var fetchres = try self.loader.fetch(self.alloc, ru); - defer fetchres.deinit(); + // TODO If el's result is null, then fire an event named error at + // el, and return. + FetchError.NoBody => return, - log.info("GET {any}: {d}", .{ ru, fetchres.status }); - - if (fetchres.status != .ok) { - return error.BadStatusCode; - } - - // TODO check content-type - - // check no body - // TODO If el's result is null, then fire an event named error at - // el, and return. - if (fetchres.body == null) return; - - var res = jsruntime.JSResult{}; - try self.env.run(self.alloc, fetchres.body.?, src, &res, null); - defer res.deinit(self.alloc); - - if (res.success) { - log.debug("eval remote {s}: {s}", .{ src, res.result }); - } else { - log.info("eval remote {s}: {s}", .{ src, res.result }); - } + FetchError.JsErr => {}, // nothing to do here. + else => return err, + } + }; // TODO If el's from an external file is true, then fire an event // named load at el. @@ -388,6 +372,44 @@ pub const Page = struct { // el, and return. } + const FetchError = error{ + BadStatusCode, + NoBody, + JsErr, + }; + + // fetchScript senf a GET request to the src and execute the script + // received. + fn fetchScript(self: *Page, src: []const u8) !void { + log.debug("starting fetch script {s}", .{src}); + + const u = std.Uri.parse(src) catch try std.Uri.parseWithoutScheme(src); + const ru = try std.Uri.resolve(self.uri, u, false, self.alloc); + + var fetchres = try self.loader.fetch(self.alloc, ru); + defer fetchres.deinit(); + + log.info("fech script {any}: {d}", .{ ru, fetchres.status }); + + if (fetchres.status != .ok) return FetchError.BadStatusCode; + + // TODO check content-type + + // check no body + if (fetchres.body == null) return FetchError.NoBody; + + var res = jsruntime.JSResult{}; + try self.env.run(self.alloc, fetchres.body.?, src, &res, null); + defer res.deinit(self.alloc); + + if (res.success) { + log.debug("eval remote {s}: {s}", .{ src, res.result }); + } else { + log.info("eval remote {s}: {s}", .{ src, res.result }); + return FetchError.JsErr; + } + } + // > type // > Attribute is not set (default), an empty string, or a JavaScript MIME // > type indicates that the script is a "classic script", containing From ea8c259c25eb0e0af52d3de378da215509153aea Mon Sep 17 00:00:00 2001 From: Pierre Tachoire Date: Mon, 15 Jan 2024 18:17:51 +0100 Subject: [PATCH 37/46] browser: refacto isJS func --- src/browser/browser.zig | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) diff --git a/src/browser/browser.zig b/src/browser/browser.zig index f8472640..e5c86c77 100644 --- a/src/browser/browser.zig +++ b/src/browser/browser.zig @@ -416,6 +416,10 @@ pub const Page = struct { // > JavaScript code. // https://developer.mozilla.org/en-US/docs/Web/HTML/Element/script#attribute_is_not_set_default_an_empty_string_or_a_javascript_mime_type fn isJS(stype: ?[]const u8) bool { - return stype == null or stype.?.len == 0 or std.mem.eql(u8, stype.?, "application/javascript") or !std.mem.eql(u8, stype.?, "module"); + if (stype == null or stype.?.len == 0) return true; + if (std.mem.eql(u8, stype.?, "application/javascript")) return true; + if (!std.mem.eql(u8, stype.?, "module")) return true; + + return false; } }; From e8879cd6db4d692ca0ab9c81cde305a740792136 Mon Sep 17 00:00:00 2001 From: Pierre Tachoire Date: Mon, 15 Jan 2024 18:20:45 +0100 Subject: [PATCH 38/46] browser: add TODO for concurrency --- src/browser/browser.zig | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/src/browser/browser.zig b/src/browser/browser.zig index e5c86c77..369bf76b 100644 --- a/src/browser/browser.zig +++ b/src/browser/browser.zig @@ -223,6 +223,7 @@ pub const Page = struct { // https://html.spec.whatwg.org/#read-html // start JS env + // TODO load the js env concurrently with the HTML parsing. log.debug("start js env", .{}); try self.env.start(self.alloc); @@ -233,6 +234,9 @@ pub const Page = struct { try self.env.addObject(html_doc, "document"); // browse the DOM tree to retrieve scripts + // TODO execute the synchronous scripts during the HTL parsing. + // TODO fetch the script resources concurrently but execute them in the + // declaration order for synchronous ones. // sasync stores scripts which can be run asynchronously. // for now they are just run after the non-async one in order to From c2a842b80eab80a9d6c48b99b11f1a42d0956a7b Mon Sep 17 00:00:00 2001 From: Pierre Tachoire Date: Mon, 15 Jan 2024 18:33:23 +0100 Subject: [PATCH 39/46] browser: remove undefined for session attr --- src/browser/browser.zig | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/browser/browser.zig b/src/browser/browser.zig index 369bf76b..3b9aebc0 100644 --- a/src/browser/browser.zig +++ b/src/browser/browser.zig @@ -25,7 +25,7 @@ const log = std.log.scoped(.browser); // A browser contains only one session. // TODO allow multiple sessions per browser. pub const Browser = struct { - session: *Session = undefined, + session: *Session, pub fn init(alloc: std.mem.Allocator, vm: jsruntime.VM) !Browser { // We want to ensure the caller initialised a VM, but the browser From 69bbd62edf5b661b89d6a499522e414995e2f087 Mon Sep 17 00:00:00 2001 From: Pierre Tachoire Date: Tue, 16 Jan 2024 09:26:30 +0100 Subject: [PATCH 40/46] browser: pass the session to the page --- src/browser/browser.zig | 36 ++++++++++++++---------------------- 1 file changed, 14 insertions(+), 22 deletions(-) diff --git a/src/browser/browser.zig b/src/browser/browser.zig index 3b9aebc0..3efd2c43 100644 --- a/src/browser/browser.zig +++ b/src/browser/browser.zig @@ -95,9 +95,7 @@ pub const Session = struct { pub fn createPage(self: *Session) !Page { return Page.init( self.arena.allocator(), - &self.loader, - &self.env, - &self.window, + self, ); } }; @@ -107,9 +105,7 @@ pub const Session = struct { // end() to stop the previous navigation before starting a new one. pub const Page = struct { alloc: std.mem.Allocator, - loader: *Loader, - env: *Env, - window: *Window, + session: *Session, doc: ?*parser.Document = null, // handle url @@ -120,20 +116,16 @@ pub const Page = struct { fn init( alloc: std.mem.Allocator, - loader: *Loader, - env: *Env, - window: *Window, + session: *Session, ) Page { return Page{ .alloc = alloc, - .loader = loader, - .env = env, - .window = window, + .session = session, }; } pub fn end(self: *Page) void { - self.env.stop(); + self.session.env.stop(); // TODO unload document: https://html.spec.whatwg.org/#unloading-documents } @@ -172,7 +164,7 @@ pub const Page = struct { // TODO handle fragment in url. // load the data - var resp = try self.loader.get(self.alloc, self.uri); + var resp = try self.session.loader.get(self.alloc, self.uri); defer resp.deinit(); const req = resp.req; @@ -218,20 +210,20 @@ pub const Page = struct { // TODO inject the URL to the document including the fragment. // TODO set the referrer to the document. - self.window.replaceDocument(doc); + self.session.window.replaceDocument(doc); // https://html.spec.whatwg.org/#read-html // start JS env // TODO load the js env concurrently with the HTML parsing. log.debug("start js env", .{}); - try self.env.start(self.alloc); + try self.session.env.start(self.alloc); // add global objects log.debug("setup global env", .{}); - try self.env.addObject(self.window, "window"); - try self.env.addObject(self.window, "self"); - try self.env.addObject(html_doc, "document"); + try self.session.env.addObject(self.session.window, "window"); + try self.session.env.addObject(self.session.window, "self"); + try self.session.env.addObject(html_doc, "document"); // browse the DOM tree to retrieve scripts // TODO execute the synchronous scripts during the HTL parsing. @@ -359,7 +351,7 @@ pub const Page = struct { if (opt_text) |text| { // TODO handle charset attribute var res = jsruntime.JSResult{}; - try self.env.run(self.alloc, text, "", &res, null); + try self.session.env.run(self.alloc, text, "", &res, null); defer res.deinit(self.alloc); if (res.success) { @@ -390,7 +382,7 @@ pub const Page = struct { const u = std.Uri.parse(src) catch try std.Uri.parseWithoutScheme(src); const ru = try std.Uri.resolve(self.uri, u, false, self.alloc); - var fetchres = try self.loader.fetch(self.alloc, ru); + var fetchres = try self.session.loader.fetch(self.alloc, ru); defer fetchres.deinit(); log.info("fech script {any}: {d}", .{ ru, fetchres.status }); @@ -403,7 +395,7 @@ pub const Page = struct { if (fetchres.body == null) return FetchError.NoBody; var res = jsruntime.JSResult{}; - try self.env.run(self.alloc, fetchres.body.?, src, &res, null); + try self.session.env.run(self.alloc, fetchres.body.?, src, &res, null); defer res.deinit(self.alloc); if (res.success) { From 01a894b7d0f8b7804326975b7c6d7306853e2e08 Mon Sep 17 00:00:00 2001 From: Pierre Tachoire Date: Tue, 16 Jan 2024 11:46:14 +0100 Subject: [PATCH 41/46] mime: parse charset and boundary from content-type --- src/browser/mime.zig | 40 ++++++++++++++++++++++++++++++++++------ 1 file changed, 34 insertions(+), 6 deletions(-) diff --git a/src/browser/mime.zig b/src/browser/mime.zig index da99e1e4..a880daee 100644 --- a/src/browser/mime.zig +++ b/src/browser/mime.zig @@ -12,10 +12,13 @@ const MimeError = error{ mtype: []const u8, msubtype: []const u8, -params: []const u8, +params: []const u8 = "", -pub const HTML = Self{ .mtype = "text", .msubtype = "html", .params = "" }; -pub const Javascript = Self{ .mtype = "application", .msubtype = "javascript", .params = "" }; +charset: ?[]const u8 = null, +boundary: ?[]const u8 = null, + +pub const HTML = Self{ .mtype = "text", .msubtype = "html" }; +pub const Javascript = Self{ .mtype = "application", .msubtype = "javascript" }; const reader = struct { s: []const u8, @@ -128,7 +131,7 @@ pub fn parse(s: []const u8) Self.MimeError!Self { // limit input size if (ln > 255) return MimeError.TooBig; - var res = Self{ .mtype = "", .msubtype = "", .params = "" }; + var res = Self{ .mtype = "", .msubtype = "" }; var r = reader{ .s = s }; res.mtype = trim(r.until('/')); @@ -144,6 +147,24 @@ pub fn parse(s: []const u8) Self.MimeError!Self { res.params = trim(r.tail()); if (res.params.len == 0) return MimeError.Invalid; + // parse well known parameters. + // don't check invalid parameter format. + var rp = reader{ .s = res.params }; + while (true) { + const name = trim(rp.until('=')); + if (!rp.skip()) return res; + const value = trim(rp.until(';')); + + if (std.ascii.eqlIgnoreCase(name, "charset")) { + res.charset = value; + } + if (std.ascii.eqlIgnoreCase(name, "boundary")) { + res.boundary = value; + } + + if (!rp.skip()) return res; + } + return res; } @@ -163,10 +184,17 @@ test "parse valid" { try testing.expectEqualStrings("text", m2.mtype); try testing.expectEqualStrings("javascript1.5", m2.msubtype); - const m3 = try Self.parse("text/html; charset=UTF-8"); + const m3 = try Self.parse("text/html; charset=utf-8"); try testing.expectEqualStrings("text", m3.mtype); try testing.expectEqualStrings("html", m3.msubtype); - try testing.expectEqualStrings("charset=UTF-8", m3.params); + try testing.expectEqualStrings("charset=utf-8", m3.params); + try testing.expectEqualStrings("utf-8", m3.charset.?); + + const m4 = try Self.parse("text/html; boundary=----"); + try testing.expectEqualStrings("text", m4.mtype); + try testing.expectEqualStrings("html", m4.msubtype); + try testing.expectEqualStrings("boundary=----", m4.params); + try testing.expectEqualStrings("----", m4.boundary.?); } test "parse invalid" { From 5362fcc7b45dc3070bc0e1fa2244369aeff6317a Mon Sep 17 00:00:00 2001 From: Pierre Tachoire Date: Tue, 16 Jan 2024 11:46:54 +0100 Subject: [PATCH 42/46] browser: use charset from headers to parse doc --- src/browser/browser.zig | 15 +++++++++------ 1 file changed, 9 insertions(+), 6 deletions(-) diff --git a/src/browser/browser.zig b/src/browser/browser.zig index 3efd2c43..94ae256c 100644 --- a/src/browser/browser.zig +++ b/src/browser/browser.zig @@ -182,10 +182,10 @@ pub const Page = struct { log.info("no content-type HTTP header", .{}); return; }; + log.debug("header content-type: {s}", .{ct}); const mime = try Mime.parse(ct); if (mime.eql(Mime.HTML)) { - // TODO check content-type - try self.loadHTMLDoc(req.reader()); + try self.loadHTMLDoc(req.reader(), mime.charset orelse "utf-8"); } else { log.info("non-HTML document: {s}", .{ct}); @@ -195,10 +195,13 @@ pub const Page = struct { } // https://html.spec.whatwg.org/#read-html - fn loadHTMLDoc(self: *Page, reader: anytype) !void { - log.debug("parse html", .{}); - // TODO pass an encoding detected from HTTP headers. - const html_doc = try parser.documentHTMLParse(reader, "UTF-8"); + fn loadHTMLDoc(self: *Page, reader: anytype, charset: []const u8) !void { + log.debug("parse html with charset {s}", .{charset}); + + const ccharset = try self.alloc.dupeZ(u8, charset); + defer self.alloc.free(ccharset); + + const html_doc = try parser.documentHTMLParse(reader, ccharset); const doc = parser.documentHTMLToDocument(html_doc); // save a document's pointer in the page. From 2bbaa77891d1094bda2a42c0cc3663245c634c2b Mon Sep 17 00:00:00 2001 From: Pierre Tachoire Date: Tue, 16 Jan 2024 11:47:23 +0100 Subject: [PATCH 43/46] window: document is null by default --- src/html/window.zig | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/src/html/window.zig b/src/html/window.zig index 346e8e06..2ff7557c 100644 --- a/src/html/window.zig +++ b/src/html/window.zig @@ -7,7 +7,7 @@ const parser = @import("../netsurf.zig"); pub const Window = struct { pub const mem_guarantied = true; - document: *parser.Document = undefined, + document: ?*parser.Document = null, target: []const u8, pub fn create(target: ?[]const u8) Window { @@ -32,7 +32,7 @@ pub const Window = struct { return self; } - pub fn get_document(self: *Window) *parser.Document { + pub fn get_document(self: *Window) ?*parser.Document { return self.document; } From 40dac207f9048440cbbbfb3107b5d3a0eeeebfd9 Mon Sep 17 00:00:00 2001 From: Pierre Tachoire Date: Tue, 16 Jan 2024 11:48:12 +0100 Subject: [PATCH 44/46] window: add event target prototype --- src/html/window.zig | 3 +++ 1 file changed, 3 insertions(+) diff --git a/src/html/window.zig b/src/html/window.zig index 2ff7557c..5c844a49 100644 --- a/src/html/window.zig +++ b/src/html/window.zig @@ -2,9 +2,12 @@ const std = @import("std"); const parser = @import("../netsurf.zig"); +const EventTarget = @import("../dom/event_target.zig").EventTarget; + // https://dom.spec.whatwg.org/#interface-window-extensions // https://html.spec.whatwg.org/multipage/nav-history-apis.html#window pub const Window = struct { + pub const prototype = *EventTarget; pub const mem_guarantied = true; document: ?*parser.Document = null, From d22b7a6d29d241a46f17eb4c6d2302936ce70a23 Mon Sep 17 00:00:00 2001 From: Pierre Tachoire Date: Tue, 16 Jan 2024 15:21:56 +0100 Subject: [PATCH 45/46] browser: use an arena for the page --- src/browser/browser.zig | 90 +++++++++++++++++++++++------------------ 1 file changed, 50 insertions(+), 40 deletions(-) diff --git a/src/browser/browser.zig b/src/browser/browser.zig index 94ae256c..62143139 100644 --- a/src/browser/browser.zig +++ b/src/browser/browser.zig @@ -47,23 +47,30 @@ pub const Browser = struct { }; // Session is like a browser's tab. -// It owns the js env and the loader and an allocator arena for all the pages -// of the session. +// It owns the js env and the loader for all the pages of the session. // You can create successively multiple pages for a session, but you must // deinit a page before running another one. pub const Session = struct { + // allocator used to init the arena. alloc: std.mem.Allocator, + + // The arena is used only to bound the js env init b/c it leaks memory. + // see https://github.com/lightpanda-io/jsruntime-lib/issues/181 + // + // The arena is initialised with self.alloc allocator. + // all others Session deps use directly self.alloc and not the arena. arena: std.heap.ArenaAllocator, + uri: []const u8, // TODO handle proxy - loader: Loader = undefined, + loader: Loader, env: Env = undefined, - loop: Loop = undefined, - jstypes: [Types.len]usize = undefined, - + loop: Loop, window: Window, + jstypes: [Types.len]usize = undefined, + fn init(alloc: std.mem.Allocator, uri: []const u8) !*Session { var self = try alloc.create(Session); self.* = Session{ @@ -71,40 +78,37 @@ pub const Session = struct { .alloc = alloc, .arena = std.heap.ArenaAllocator.init(alloc), .window = Window.create(null), + .loader = Loader.init(alloc), + .loop = try Loop.init(alloc), }; - const aallocator = self.arena.allocator(); - - self.loader = Loader.init(aallocator); - self.loop = try Loop.init(aallocator); - self.env = try Env.init(aallocator, &self.loop); - + self.env = try Env.init(self.arena.allocator(), &self.loop); try self.env.load(&self.jstypes); return self; } fn deinit(self: *Session) void { - self.loader.deinit(); - self.loop.deinit(); self.env.deinit(); self.arena.deinit(); + + self.loader.deinit(); + self.loop.deinit(); self.alloc.destroy(self); } pub fn createPage(self: *Session) !Page { - return Page.init( - self.arena.allocator(), - self, - ); + return Page.init(self.alloc, self); } }; // Page navigates to an url. // You can navigates multiple urls with the same page, but you have to call // end() to stop the previous navigation before starting a new one. +// The page handle all its memory in an arena allocator. The arena is reseted +// when end() is called. pub const Page = struct { - alloc: std.mem.Allocator, + arena: std.heap.ArenaAllocator, session: *Session, doc: ?*parser.Document = null, @@ -119,23 +123,21 @@ pub const Page = struct { session: *Session, ) Page { return Page{ - .alloc = alloc, + .arena = std.heap.ArenaAllocator.init(alloc), .session = session, }; } + // reset js env and mem arena. pub fn end(self: *Page) void { self.session.env.stop(); // TODO unload document: https://html.spec.whatwg.org/#unloading-documents + + _ = self.arena.reset(.free_all); } pub fn deinit(self: *Page) void { - if (self.raw_data) |s| { - self.alloc.free(s); - } - if (self.raw_data) |s| { - self.alloc.free(s); - } + self.arena.deinit(); } // dump writes the page content into the given file. @@ -154,17 +156,19 @@ pub const Page = struct { // spec reference: https://html.spec.whatwg.org/#document-lifecycle pub fn navigate(self: *Page, uri: []const u8) !void { + const alloc = self.arena.allocator(); + log.debug("starting GET {s}", .{uri}); // own the url - if (self.rawuri) |prev| self.alloc.free(prev); - self.rawuri = try self.alloc.dupe(u8, uri); + if (self.rawuri) |prev| alloc.free(prev); + self.rawuri = try alloc.dupe(u8, uri); self.uri = std.Uri.parse(self.rawuri.?) catch try std.Uri.parseWithoutScheme(self.rawuri.?); // TODO handle fragment in url. // load the data - var resp = try self.session.loader.get(self.alloc, self.uri); + var resp = try self.session.loader.get(alloc, self.uri); defer resp.deinit(); const req = resp.req; @@ -190,16 +194,18 @@ pub const Page = struct { log.info("non-HTML document: {s}", .{ct}); // save the body into the page. - self.raw_data = try req.reader().readAllAlloc(self.alloc, 16 * 1024 * 1024); + self.raw_data = try req.reader().readAllAlloc(alloc, 16 * 1024 * 1024); } } // https://html.spec.whatwg.org/#read-html fn loadHTMLDoc(self: *Page, reader: anytype, charset: []const u8) !void { + const alloc = self.arena.allocator(); + log.debug("parse html with charset {s}", .{charset}); - const ccharset = try self.alloc.dupeZ(u8, charset); - defer self.alloc.free(ccharset); + const ccharset = try alloc.dupeZ(u8, charset); + defer alloc.free(ccharset); const html_doc = try parser.documentHTMLParse(reader, ccharset); const doc = parser.documentHTMLToDocument(html_doc); @@ -220,7 +226,7 @@ pub const Page = struct { // start JS env // TODO load the js env concurrently with the HTML parsing. log.debug("start js env", .{}); - try self.session.env.start(self.alloc); + try self.session.env.start(alloc); // add global objects log.debug("setup global env", .{}); @@ -236,7 +242,7 @@ pub const Page = struct { // sasync stores scripts which can be run asynchronously. // for now they are just run after the non-async one in order to // dispatch DOMContentLoaded the sooner as possible. - var sasync = std.ArrayList(*parser.Element).init(self.alloc); + var sasync = std.ArrayList(*parser.Element).init(alloc); defer sasync.deinit(); const root = parser.documentToNode(doc); @@ -326,6 +332,8 @@ pub const Page = struct { // if no src is present, we evaluate the text source. // https://html.spec.whatwg.org/multipage/scripting.html#script-processing-model fn evalScript(self: *Page, e: *parser.Element) !void { + const alloc = self.arena.allocator(); + // https://html.spec.whatwg.org/multipage/webappapis.html#fetch-a-classic-script const opt_src = try parser.elementGetAttribute(e, "src"); if (opt_src) |src| { @@ -354,8 +362,8 @@ pub const Page = struct { if (opt_text) |text| { // TODO handle charset attribute var res = jsruntime.JSResult{}; - try self.session.env.run(self.alloc, text, "", &res, null); - defer res.deinit(self.alloc); + try self.session.env.run(alloc, text, "", &res, null); + defer res.deinit(alloc); if (res.success) { log.debug("eval inline: {s}", .{res.result}); @@ -380,12 +388,14 @@ pub const Page = struct { // fetchScript senf a GET request to the src and execute the script // received. fn fetchScript(self: *Page, src: []const u8) !void { + const alloc = self.arena.allocator(); + log.debug("starting fetch script {s}", .{src}); const u = std.Uri.parse(src) catch try std.Uri.parseWithoutScheme(src); - const ru = try std.Uri.resolve(self.uri, u, false, self.alloc); + const ru = try std.Uri.resolve(self.uri, u, false, alloc); - var fetchres = try self.session.loader.fetch(self.alloc, ru); + var fetchres = try self.session.loader.fetch(alloc, ru); defer fetchres.deinit(); log.info("fech script {any}: {d}", .{ ru, fetchres.status }); @@ -398,8 +408,8 @@ pub const Page = struct { if (fetchres.body == null) return FetchError.NoBody; var res = jsruntime.JSResult{}; - try self.session.env.run(self.alloc, fetchres.body.?, src, &res, null); - defer res.deinit(self.alloc); + try self.session.env.run(alloc, fetchres.body.?, src, &res, null); + defer res.deinit(alloc); if (res.success) { log.debug("eval remote {s}: {s}", .{ src, res.result }); From a2e266514f2d36e566f3238a96f29ec762e3fe27 Mon Sep 17 00:00:00 2001 From: Pierre Tachoire Date: Tue, 16 Jan 2024 15:39:35 +0100 Subject: [PATCH 46/46] dom: extract walker from html_collection to its own file --- src/browser/browser.zig | 2 +- src/browser/dump.zig | 2 +- src/dom/document.zig | 2 +- src/dom/element.zig | 2 +- src/dom/html_collection.zig | 88 +++---------------------------------- src/dom/walker.zig | 86 ++++++++++++++++++++++++++++++++++++ src/html/document.zig | 2 +- 7 files changed, 96 insertions(+), 88 deletions(-) create mode 100644 src/dom/walker.zig diff --git a/src/browser/browser.zig b/src/browser/browser.zig index 62143139..18e37cec 100644 --- a/src/browser/browser.zig +++ b/src/browser/browser.zig @@ -14,7 +14,7 @@ const Env = jsruntime.Env; const apiweb = @import("../apiweb.zig"); const Window = @import("../html/window.zig").Window; -const Walker = @import("../dom/html_collection.zig").WalkerDepthFirst; +const Walker = @import("../dom/walker.zig").WalkerDepthFirst; const FetchResult = std.http.Client.FetchResult; diff --git a/src/browser/dump.zig b/src/browser/dump.zig index 3ed3de98..a00bd3c7 100644 --- a/src/browser/dump.zig +++ b/src/browser/dump.zig @@ -2,7 +2,7 @@ const std = @import("std"); const File = std.fs.File; const parser = @import("../netsurf.zig"); -const Walker = @import("../dom/html_collection.zig").WalkerChildren; +const Walker = @import("../dom/walker.zig").WalkerChildren; pub fn htmlFile(doc: *parser.Document, out: File) !void { try out.writeAll("\n"); diff --git a/src/dom/document.zig b/src/dom/document.zig index ee7f9454..9c878871 100644 --- a/src/dom/document.zig +++ b/src/dom/document.zig @@ -10,7 +10,7 @@ const Node = @import("node.zig").Node; const NodeList = @import("nodelist.zig").NodeList; const NodeUnion = @import("node.zig").Union; -const Walker = @import("html_collection.zig").WalkerDepthFirst; +const Walker = @import("walker.zig").WalkerDepthFirst; const collection = @import("html_collection.zig"); const Element = @import("element.zig").Element; diff --git a/src/dom/element.zig b/src/dom/element.zig index e64faf08..cd12f625 100644 --- a/src/dom/element.zig +++ b/src/dom/element.zig @@ -9,7 +9,7 @@ const checkCases = jsruntime.test_utils.checkCases; const collection = @import("html_collection.zig"); const Node = @import("node.zig").Node; -const Walker = @import("html_collection.zig").WalkerDepthFirst; +const Walker = @import("walker.zig").WalkerDepthFirst; const NodeList = @import("nodelist.zig").NodeList; const HTMLElem = @import("../html/elements.zig"); pub const Union = @import("../html/elements.zig").Union; diff --git a/src/dom/html_collection.zig b/src/dom/html_collection.zig index f5e3d6d1..5cff8f92 100644 --- a/src/dom/html_collection.zig +++ b/src/dom/html_collection.zig @@ -11,6 +11,11 @@ const utils = @import("utils.z"); const Element = @import("element.zig").Element; const Union = @import("element.zig").Union; +const Walker = @import("walker.zig").Walker; +const WalkerDepthFirst = @import("walker.zig").WalkerDepthFirst; +const WalkerChildren = @import("walker.zig").WalkerChildren; +const WalkerNone = @import("walker.zig").WalkerNone; + const Matcher = union(enum) { matchByName: MatchByName, matchByTagName: MatchByTagName, @@ -255,89 +260,6 @@ pub fn HTMLCollectionByAnchors( }; } -const Walker = union(enum) { - walkerDepthFirst: WalkerDepthFirst, - walkerChildren: WalkerChildren, - walkerNone: WalkerNone, - - pub fn get_next(self: Walker, root: *parser.Node, cur: ?*parser.Node) !?*parser.Node { - switch (self) { - inline else => |case| return case.get_next(root, cur), - } - } -}; - -// WalkerDepthFirst iterates over the DOM tree to return the next following -// node or null at the end. -// -// This implementation is a zig version of Netsurf code. -// http://source.netsurf-browser.org/libdom.git/tree/src/html/html_collection.c#n177 -// -// The iteration is a depth first as required by the specification. -// https://dom.spec.whatwg.org/#htmlcollection -// https://dom.spec.whatwg.org/#concept-tree-order -pub const WalkerDepthFirst = struct { - pub fn get_next(_: WalkerDepthFirst, root: *parser.Node, cur: ?*parser.Node) !?*parser.Node { - var n = cur orelse root; - - // TODO deinit next - if (try parser.nodeFirstChild(n)) |next| { - return next; - } - - // TODO deinit next - if (try parser.nodeNextSibling(n)) |next| { - return next; - } - - // TODO deinit parent - // Back to the parent of cur. - // If cur has no parent, then the iteration is over. - var parent = try parser.nodeParentNode(n) orelse return null; - - // TODO deinit lastchild - var lastchild = try parser.nodeLastChild(parent); - while (n != root and n == lastchild) { - n = parent; - - // TODO deinit parent - // Back to the prev's parent. - // If prev has no parent, then the loop must stop. - parent = try parser.nodeParentNode(n) orelse break; - - // TODO deinit lastchild - lastchild = try parser.nodeLastChild(parent); - } - - if (n == root) { - return null; - } - - return try parser.nodeNextSibling(n); - } -}; - -// WalkerChildren iterates over the root's children only. -pub const WalkerChildren = struct { - pub fn get_next(_: WalkerChildren, root: *parser.Node, cur: ?*parser.Node) !?*parser.Node { - // On walk start, we return the first root's child. - if (cur == null) return try parser.nodeFirstChild(root); - - // If cur is root, then return null. - // This is a special case, if the root is included in the walk, we - // don't want to go further to find children. - if (root == cur.?) return null; - - return try parser.nodeNextSibling(cur.?); - } -}; - -pub const WalkerNone = struct { - pub fn get_next(_: WalkerNone, _: *parser.Node, _: ?*parser.Node) !?*parser.Node { - return null; - } -}; - pub const HTMLCollectionIterator = struct { pub const mem_guarantied = true; diff --git a/src/dom/walker.zig b/src/dom/walker.zig new file mode 100644 index 00000000..205936cb --- /dev/null +++ b/src/dom/walker.zig @@ -0,0 +1,86 @@ +const std = @import("std"); + +const parser = @import("../netsurf.zig"); + +pub const Walker = union(enum) { + walkerDepthFirst: WalkerDepthFirst, + walkerChildren: WalkerChildren, + walkerNone: WalkerNone, + + pub fn get_next(self: Walker, root: *parser.Node, cur: ?*parser.Node) !?*parser.Node { + switch (self) { + inline else => |case| return case.get_next(root, cur), + } + } +}; + +// WalkerDepthFirst iterates over the DOM tree to return the next following +// node or null at the end. +// +// This implementation is a zig version of Netsurf code. +// http://source.netsurf-browser.org/libdom.git/tree/src/html/html_collection.c#n177 +// +// The iteration is a depth first as required by the specification. +// https://dom.spec.whatwg.org/#htmlcollection +// https://dom.spec.whatwg.org/#concept-tree-order +pub const WalkerDepthFirst = struct { + pub fn get_next(_: WalkerDepthFirst, root: *parser.Node, cur: ?*parser.Node) !?*parser.Node { + var n = cur orelse root; + + // TODO deinit next + if (try parser.nodeFirstChild(n)) |next| { + return next; + } + + // TODO deinit next + if (try parser.nodeNextSibling(n)) |next| { + return next; + } + + // TODO deinit parent + // Back to the parent of cur. + // If cur has no parent, then the iteration is over. + var parent = try parser.nodeParentNode(n) orelse return null; + + // TODO deinit lastchild + var lastchild = try parser.nodeLastChild(parent); + while (n != root and n == lastchild) { + n = parent; + + // TODO deinit parent + // Back to the prev's parent. + // If prev has no parent, then the loop must stop. + parent = try parser.nodeParentNode(n) orelse break; + + // TODO deinit lastchild + lastchild = try parser.nodeLastChild(parent); + } + + if (n == root) { + return null; + } + + return try parser.nodeNextSibling(n); + } +}; + +// WalkerChildren iterates over the root's children only. +pub const WalkerChildren = struct { + pub fn get_next(_: WalkerChildren, root: *parser.Node, cur: ?*parser.Node) !?*parser.Node { + // On walk start, we return the first root's child. + if (cur == null) return try parser.nodeFirstChild(root); + + // If cur is root, then return null. + // This is a special case, if the root is included in the walk, we + // don't want to go further to find children. + if (root == cur.?) return null; + + return try parser.nodeNextSibling(cur.?); + } +}; + +pub const WalkerNone = struct { + pub fn get_next(_: WalkerNone, _: *parser.Node, _: ?*parser.Node) !?*parser.Node { + return null; + } +}; diff --git a/src/html/document.zig b/src/html/document.zig index 96dc96cd..d463ab29 100644 --- a/src/html/document.zig +++ b/src/html/document.zig @@ -12,7 +12,7 @@ const NodeList = @import("../dom/nodelist.zig").NodeList; const HTMLElem = @import("elements.zig"); const collection = @import("../dom/html_collection.zig"); -const Walker = collection.WalkerDepthFirst; +const Walker = @import("../dom/walker.zig").WalkerDepthFirst; // WEB IDL https://html.spec.whatwg.org/#the-document-object pub const HTMLDocument = struct {