Merge pull request #137 from lightpanda-io/window

start for loader and browser APi
2025-12-14 23:38:57 +00:00 · 2024-01-17 18:33:41 +01:00
parent edadc92ec8 a2e266514f
commit ee3b62de56
18 changed files with 1091 additions and 94 deletions
--- a/build.zig
+++ b/build.zig
@@ -115,6 +115,29 @@ pub fn build(b: *std.build.Builder) !void {
    // step
    const wpt_step = b.step("wpt", "WPT tests");
    wpt_step.dependOn(&wpt_cmd.step);
+
+    // get
+    // -----
+
+    // compile and install
+    const get = b.addExecutable(.{
+        .name = "browsercore-get",
+        .root_source_file = .{ .path = "src/main_get.zig" },
+        .target = target,
+        .optimize = mode,
+    });
+    try common(get, options);
+    b.installArtifact(get);
+
+    // run
+    const get_cmd = b.addRunArtifact(get);
+    get_cmd.step.dependOn(b.getInstallStep());
+    if (b.args) |args| {
+        get_cmd.addArgs(args);
+    }
+    // step
+    const get_step = b.step("get", "request URL");
+    get_step.dependOn(&get_cmd.step);
 }

 fn common(
--- a/src/apiweb.zig
+++ b/src/apiweb.zig
--- a/src/browser/browser.zig
+++ b/src/browser/browser.zig
@@ -0,0 +1,434 @@
+const std = @import("std");
+
+const Types = @import("root").Types;
+
+const parser = @import("../netsurf.zig");
+const Loader = @import("loader.zig").Loader;
+const Dump = @import("dump.zig");
+const Mime = @import("mime.zig");
+
+const jsruntime = @import("jsruntime");
+const Loop = jsruntime.Loop;
+const Env = jsruntime.Env;
+
+const apiweb = @import("../apiweb.zig");
+
+const Window = @import("../html/window.zig").Window;
+const Walker = @import("../dom/walker.zig").WalkerDepthFirst;
+
+const FetchResult = std.http.Client.FetchResult;
+
+const log = std.log.scoped(.browser);
+
+// Browser is an instance of the browser.
+// You can create multiple browser instances.
+// A browser contains only one session.
+// TODO allow multiple sessions per browser.
+pub const Browser = struct {
+    session: *Session,
+
+    pub fn init(alloc: std.mem.Allocator, vm: jsruntime.VM) !Browser {
+        // We want to ensure the caller initialised a VM, but the browser
+        // doesn't use it directly...
+        _ = vm;
+
+        return Browser{
+            .session = try Session.init(alloc, "about:blank"),
+        };
+    }
+
+    pub fn deinit(self: *Browser) void {
+        self.session.deinit();
+    }
+
+    pub fn currentSession(self: *Browser) *Session {
+        return self.session;
+    }
+};
+
+// Session is like a browser's tab.
+// It owns the js env and the loader for all the pages of the session.
+// You can create successively multiple pages for a session, but you must
+// deinit a page before running another one.
+pub const Session = struct {
+    // allocator used to init the arena.
+    alloc: std.mem.Allocator,
+
+    // The arena is used only to bound the js env init b/c it leaks memory.
+    // see https://github.com/lightpanda-io/jsruntime-lib/issues/181
+    //
+    // The arena is initialised with self.alloc allocator.
+    // all others Session deps use directly self.alloc and not the arena.
+    arena: std.heap.ArenaAllocator,
+
+    uri: []const u8,
+
+    // TODO handle proxy
+    loader: Loader,
+    env: Env = undefined,
+    loop: Loop,
+    window: Window,
+
+    jstypes: [Types.len]usize = undefined,
+
+    fn init(alloc: std.mem.Allocator, uri: []const u8) !*Session {
+        var self = try alloc.create(Session);
+        self.* = Session{
+            .uri = uri,
+            .alloc = alloc,
+            .arena = std.heap.ArenaAllocator.init(alloc),
+            .window = Window.create(null),
+            .loader = Loader.init(alloc),
+            .loop = try Loop.init(alloc),
+        };
+
+        self.env = try Env.init(self.arena.allocator(), &self.loop);
+        try self.env.load(&self.jstypes);
+
+        return self;
+    }
+
+    fn deinit(self: *Session) void {
+        self.env.deinit();
+        self.arena.deinit();
+
+        self.loader.deinit();
+        self.loop.deinit();
+        self.alloc.destroy(self);
+    }
+
+    pub fn createPage(self: *Session) !Page {
+        return Page.init(self.alloc, self);
+    }
+};
+
+// Page navigates to an url.
+// You can navigates multiple urls with the same page, but you have to call
+// end() to stop the previous navigation before starting a new one.
+// The page handle all its memory in an arena allocator. The arena is reseted
+// when end() is called.
+pub const Page = struct {
+    arena: std.heap.ArenaAllocator,
+    session: *Session,
+    doc: ?*parser.Document = null,
+
+    // handle url
+    rawuri: ?[]const u8 = null,
+    uri: std.Uri = undefined,
+
+    raw_data: ?[]const u8 = null,
+
+    fn init(
+        alloc: std.mem.Allocator,
+        session: *Session,
+    ) Page {
+        return Page{
+            .arena = std.heap.ArenaAllocator.init(alloc),
+            .session = session,
+        };
+    }
+
+    // reset js env and mem arena.
+    pub fn end(self: *Page) void {
+        self.session.env.stop();
+        // TODO unload document: https://html.spec.whatwg.org/#unloading-documents
+
+        _ = self.arena.reset(.free_all);
+    }
+
+    pub fn deinit(self: *Page) void {
+        self.arena.deinit();
+    }
+
+    // dump writes the page content into the given file.
+    pub fn dump(self: *Page, out: std.fs.File) !void {
+
+        // if no HTML document pointer available, dump the data content only.
+        if (self.doc == null) {
+            // no data loaded, nothing to do.
+            if (self.raw_data == null) return;
+            return try out.writeAll(self.raw_data.?);
+        }
+
+        // if the page has a pointer to a document, dumps the HTML.
+        try Dump.htmlFile(self.doc.?, out);
+    }
+
+    // spec reference: https://html.spec.whatwg.org/#document-lifecycle
+    pub fn navigate(self: *Page, uri: []const u8) !void {
+        const alloc = self.arena.allocator();
+
+        log.debug("starting GET {s}", .{uri});
+
+        // own the url
+        if (self.rawuri) |prev| alloc.free(prev);
+        self.rawuri = try alloc.dupe(u8, uri);
+        self.uri = std.Uri.parse(self.rawuri.?) catch try std.Uri.parseWithoutScheme(self.rawuri.?);
+
+        // TODO handle fragment in url.
+
+        // load the data
+        var resp = try self.session.loader.get(alloc, self.uri);
+        defer resp.deinit();
+
+        const req = resp.req;
+
+        log.info("GET {any} {d}", .{ self.uri, req.response.status });
+
+        // TODO handle redirection
+        if (req.response.status != .ok) return error.BadStatusCode;
+
+        // TODO handle charset
+        // https://html.spec.whatwg.org/#content-type
+        const ct = req.response.headers.getFirstValue("Content-Type") orelse {
+            // no content type in HTTP headers.
+            // TODO try to sniff mime type from the body.
+            log.info("no content-type HTTP header", .{});
+            return;
+        };
+        log.debug("header content-type: {s}", .{ct});
+        const mime = try Mime.parse(ct);
+        if (mime.eql(Mime.HTML)) {
+            try self.loadHTMLDoc(req.reader(), mime.charset orelse "utf-8");
+        } else {
+            log.info("non-HTML document: {s}", .{ct});
+
+            // save the body into the page.
+            self.raw_data = try req.reader().readAllAlloc(alloc, 16 * 1024 * 1024);
+        }
+    }
+
+    // https://html.spec.whatwg.org/#read-html
+    fn loadHTMLDoc(self: *Page, reader: anytype, charset: []const u8) !void {
+        const alloc = self.arena.allocator();
+
+        log.debug("parse html with charset {s}", .{charset});
+
+        const ccharset = try alloc.dupeZ(u8, charset);
+        defer alloc.free(ccharset);
+
+        const html_doc = try parser.documentHTMLParse(reader, ccharset);
+        const doc = parser.documentHTMLToDocument(html_doc);
+
+        // save a document's pointer in the page.
+        self.doc = doc;
+
+        // TODO set document.readyState to interactive
+        // https://html.spec.whatwg.org/#reporting-document-loading-status
+
+        // TODO inject the URL to the document including the fragment.
+        // TODO set the referrer to the document.
+
+        self.session.window.replaceDocument(doc);
+
+        // https://html.spec.whatwg.org/#read-html
+
+        // start JS env
+        // TODO load the js env concurrently with the HTML parsing.
+        log.debug("start js env", .{});
+        try self.session.env.start(alloc);
+
+        // add global objects
+        log.debug("setup global env", .{});
+        try self.session.env.addObject(self.session.window, "window");
+        try self.session.env.addObject(self.session.window, "self");
+        try self.session.env.addObject(html_doc, "document");
+
+        // browse the DOM tree to retrieve scripts
+        // TODO execute the synchronous scripts during the HTL parsing.
+        // TODO fetch the script resources concurrently but execute them in the
+        // declaration order for synchronous ones.
+
+        // sasync stores scripts which can be run asynchronously.
+        // for now they are just run after the non-async one in order to
+        // dispatch DOMContentLoaded the sooner as possible.
+        var sasync = std.ArrayList(*parser.Element).init(alloc);
+        defer sasync.deinit();
+
+        const root = parser.documentToNode(doc);
+        const walker = Walker{};
+        var next: ?*parser.Node = null;
+        while (true) {
+            next = try walker.get_next(root, next) orelse break;
+
+            // ignore non-elements nodes.
+            if (try parser.nodeType(next.?) != .element) {
+                continue;
+            }
+
+            const e = parser.nodeToElement(next.?);
+            const tag = try parser.elementHTMLGetTagType(@as(*parser.ElementHTML, @ptrCast(e)));
+
+            // ignore non-script tags
+            if (tag != .script) continue;
+
+            // ignore non-js script.
+            // > type
+            // > Attribute is not set (default), an empty string, or a JavaScript MIME
+            // > type indicates that the script is a "classic script", containing
+            // > JavaScript code.
+            // https://developer.mozilla.org/en-US/docs/Web/HTML/Element/script#attribute_is_not_set_default_an_empty_string_or_a_javascript_mime_type
+            const stype = try parser.elementGetAttribute(e, "type");
+            if (!isJS(stype)) {
+                continue;
+            }
+
+            // Ignore the defer attribute b/c we analyze all script
+            // after the document has been parsed.
+            // https://developer.mozilla.org/en-US/docs/Web/HTML/Element/script#defer
+
+            // TODO use fetchpriority
+            // https://developer.mozilla.org/en-US/docs/Web/HTML/Element/script#fetchpriority
+
+            // > async
+            // > For classic scripts, if the async attribute is present,
+            // > then the classic script will be fetched in parallel to
+            // > parsing and evaluated as soon as it is available.
+            // https://developer.mozilla.org/en-US/docs/Web/HTML/Element/script#async
+            if (try parser.elementGetAttribute(e, "async") != null) {
+                try sasync.append(e);
+                continue;
+            }
+
+            // TODO handle for attribute
+            // TODO handle event attribute
+
+            // TODO defer
+            // > This Boolean attribute is set to indicate to a browser
+            // > that the script is meant to be executed after the
+            // > document has been parsed, but before firing
+            // > DOMContentLoaded.
+            // https://developer.mozilla.org/en-US/docs/Web/HTML/Element/script#defer
+            // defer allow us to load a script w/o blocking the rest of
+            // evaluations.
+
+            // > Scripts without async, defer or type="module"
+            // > attributes, as well as inline scripts without the
+            // > type="module" attribute, are fetched and executed
+            // > immediately before the browser continues to parse the
+            // > page.
+            // https://developer.mozilla.org/en-US/docs/Web/HTML/Element/script#notes
+            self.evalScript(e) catch |err| log.warn("evaljs: {any}", .{err});
+        }
+
+        // TODO wait for deferred scripts
+
+        // TODO dispatch DOMContentLoaded before the transition to "complete",
+        // at the point where all subresources apart from async script elements
+        // have loaded.
+        // https://html.spec.whatwg.org/#reporting-document-loading-status
+
+        // eval async scripts.
+        for (sasync.items) |e| {
+            self.evalScript(e) catch |err| log.warn("evaljs: {any}", .{err});
+        }
+
+        // TODO wait for async scripts
+
+        // TODO set document.readyState to complete
+    }
+
+    // evalScript evaluates the src in priority.
+    // if no src is present, we evaluate the text source.
+    // https://html.spec.whatwg.org/multipage/scripting.html#script-processing-model
+    fn evalScript(self: *Page, e: *parser.Element) !void {
+        const alloc = self.arena.allocator();
+
+        // https://html.spec.whatwg.org/multipage/webappapis.html#fetch-a-classic-script
+        const opt_src = try parser.elementGetAttribute(e, "src");
+        if (opt_src) |src| {
+            log.debug("starting GET {s}", .{src});
+
+            self.fetchScript(src) catch |err| {
+                switch (err) {
+                    FetchError.BadStatusCode => return err,
+
+                    // TODO If el's result is null, then fire an event named error at
+                    // el, and return.
+                    FetchError.NoBody => return,
+
+                    FetchError.JsErr => {}, // nothing to do here.
+                    else => return err,
+                }
+            };
+
+            // TODO If el's from an external file is true, then fire an event
+            // named load at el.
+
+            return;
+        }
+
+        const opt_text = try parser.nodeTextContent(parser.elementToNode(e));
+        if (opt_text) |text| {
+            // TODO handle charset attribute
+            var res = jsruntime.JSResult{};
+            try self.session.env.run(alloc, text, "", &res, null);
+            defer res.deinit(alloc);
+
+            if (res.success) {
+                log.debug("eval inline: {s}", .{res.result});
+            } else {
+                log.info("eval inline: {s}", .{res.result});
+            }
+
+            return;
+        }
+
+        // nothing has been loaded.
+        // TODO If el's result is null, then fire an event named error at
+        // el, and return.
+    }
+
+    const FetchError = error{
+        BadStatusCode,
+        NoBody,
+        JsErr,
+    };
+
+    // fetchScript senf a GET request to the src and execute the script
+    // received.
+    fn fetchScript(self: *Page, src: []const u8) !void {
+        const alloc = self.arena.allocator();
+
+        log.debug("starting fetch script {s}", .{src});
+
+        const u = std.Uri.parse(src) catch try std.Uri.parseWithoutScheme(src);
+        const ru = try std.Uri.resolve(self.uri, u, false, alloc);
+
+        var fetchres = try self.session.loader.fetch(alloc, ru);
+        defer fetchres.deinit();
+
+        log.info("fech script {any}: {d}", .{ ru, fetchres.status });
+
+        if (fetchres.status != .ok) return FetchError.BadStatusCode;
+
+        // TODO check content-type
+
+        // check no body
+        if (fetchres.body == null) return FetchError.NoBody;
+
+        var res = jsruntime.JSResult{};
+        try self.session.env.run(alloc, fetchres.body.?, src, &res, null);
+        defer res.deinit(alloc);
+
+        if (res.success) {
+            log.debug("eval remote {s}: {s}", .{ src, res.result });
+        } else {
+            log.info("eval remote {s}: {s}", .{ src, res.result });
+            return FetchError.JsErr;
+        }
+    }
+
+    // > type
+    // > Attribute is not set (default), an empty string, or a JavaScript MIME
+    // > type indicates that the script is a "classic script", containing
+    // > JavaScript code.
+    // https://developer.mozilla.org/en-US/docs/Web/HTML/Element/script#attribute_is_not_set_default_an_empty_string_or_a_javascript_mime_type
+    fn isJS(stype: ?[]const u8) bool {
+        if (stype == null or stype.?.len == 0) return true;
+        if (std.mem.eql(u8, stype.?, "application/javascript")) return true;
+        if (!std.mem.eql(u8, stype.?, "module")) return true;
+
+        return false;
+    }
+};
--- a/src/browser/dump.zig
+++ b/src/browser/dump.zig
@@ -0,0 +1,96 @@
+const std = @import("std");
+const File = std.fs.File;
+
+const parser = @import("../netsurf.zig");
+const Walker = @import("../dom/walker.zig").WalkerChildren;
+
+pub fn htmlFile(doc: *parser.Document, out: File) !void {
+    try out.writeAll("<!DOCTYPE html>\n");
+    try nodeFile(parser.documentToNode(doc), out);
+    try out.writeAll("\n");
+}
+
+fn nodeFile(root: *parser.Node, out: File) !void {
+    const walker = Walker{};
+    var next: ?*parser.Node = null;
+    while (true) {
+        next = try walker.get_next(root, next) orelse break;
+        switch (try parser.nodeType(next.?)) {
+            .element => {
+                // open the tag
+                const tag = try parser.nodeLocalName(next.?);
+                try out.writeAll("<");
+                try out.writeAll(tag);
+
+                // write the attributes
+                const map = try parser.nodeGetAttributes(next.?);
+                const ln = try parser.namedNodeMapGetLength(map);
+                var i: u32 = 0;
+                while (i < ln) {
+                    const attr = try parser.namedNodeMapItem(map, i) orelse break;
+                    try out.writeAll(" ");
+                    try out.writeAll(try parser.attributeGetName(attr));
+                    try out.writeAll("=\"");
+                    try out.writeAll(try parser.attributeGetValue(attr) orelse "");
+                    try out.writeAll("\"");
+                    i += 1;
+                }
+
+                try out.writeAll(">");
+
+                // write the children
+                // TODO avoid recursion
+                try nodeFile(next.?, out);
+
+                // close the tag
+                try out.writeAll("</");
+                try out.writeAll(tag);
+                try out.writeAll(">");
+            },
+            .text => {
+                const v = try parser.nodeValue(next.?) orelse continue;
+                try out.writeAll(v);
+            },
+            .cdata_section => {
+                const v = try parser.nodeValue(next.?) orelse continue;
+                try out.writeAll("<![CDATA[");
+                try out.writeAll(v);
+                try out.writeAll("]]>");
+            },
+            .comment => {
+                const v = try parser.nodeValue(next.?) orelse continue;
+                try out.writeAll("<!--");
+                try out.writeAll(v);
+                try out.writeAll("-->");
+            },
+            // TODO handle processing instruction dump
+            .processing_instruction => continue,
+            // document fragment is outside of the main document DOM, so we
+            // don't output it.
+            .document_fragment => continue,
+            // document will never be called, but required for completeness.
+            .document => continue,
+            // done globally instead, but required for completeness.
+            .document_type => continue,
+            // deprecated
+            .attribute => continue,
+            .entity_reference => continue,
+            .entity => continue,
+            .notation => continue,
+        }
+    }
+}
+
+// HTMLFileTestFn is run by run_tests.zig
+pub fn HTMLFileTestFn(out: File) !void {
+    const file = try std.fs.cwd().openFile("test.html", .{});
+    defer file.close();
+
+    const doc_html = try parser.documentHTMLParse(file.reader(), "UTF-8");
+    // ignore close error
+    defer parser.documentHTMLClose(doc_html) catch {};
+
+    const doc = parser.documentHTMLToDocument(doc_html);
+
+    try htmlFile(doc, out);
+}
--- a/src/browser/loader.zig
+++ b/src/browser/loader.zig
@@ -0,0 +1,86 @@
+const std = @import("std");
+
+const user_agent = "Lightpanda.io/1.0";
+
+pub const Loader = struct {
+    client: std.http.Client,
+
+    pub const Response = struct {
+        alloc: std.mem.Allocator,
+        req: *std.http.Client.Request,
+
+        pub fn deinit(self: *Response) void {
+            self.req.deinit();
+            self.alloc.destroy(self.req);
+        }
+    };
+
+    pub fn init(alloc: std.mem.Allocator) Loader {
+        return Loader{
+            .client = std.http.Client{
+                .allocator = alloc,
+            },
+        };
+    }
+
+    pub fn deinit(self: *Loader) void {
+        self.client.deinit();
+    }
+
+    // the caller must deinit the FetchResult.
+    pub fn fetch(self: *Loader, alloc: std.mem.Allocator, uri: std.Uri) !std.http.Client.FetchResult {
+        var headers = try std.http.Headers.initList(alloc, &[_]std.http.Field{
+            .{ .name = "User-Agent", .value = user_agent },
+            .{ .name = "Accept", .value = "*/*" },
+            .{ .name = "Accept-Language", .value = "en-US,en;q=0.5" },
+        });
+        defer headers.deinit();
+
+        return try self.client.fetch(alloc, .{
+            .location = .{ .uri = uri },
+            .headers = headers,
+            .payload = .none,
+        });
+    }
+
+    // see
+    // https://ziglang.org/documentation/master/std/#A;std:http.Client.fetch
+    // for reference.
+    // The caller is responsible for calling `deinit()` on the `Response`.
+    pub fn get(self: *Loader, alloc: std.mem.Allocator, uri: std.Uri) !Response {
+        var headers = try std.http.Headers.initList(alloc, &[_]std.http.Field{
+            .{ .name = "User-Agent", .value = user_agent },
+            .{ .name = "Accept", .value = "*/*" },
+            .{ .name = "Accept-Language", .value = "en-US,en;q=0.5" },
+        });
+        defer headers.deinit();
+
+        var resp = Response{
+            .alloc = alloc,
+            .req = try alloc.create(std.http.Client.Request),
+        };
+        errdefer alloc.destroy(resp.req);
+
+        resp.req.* = try self.client.open(.GET, uri, headers, .{
+            .handle_redirects = true, // TODO handle redirects manually
+        });
+        errdefer resp.req.deinit();
+
+        try resp.req.send(.{});
+        try resp.req.finish();
+        try resp.req.wait();
+
+        return resp;
+    }
+};
+
+test "basic url fetch" {
+    const alloc = std.testing.allocator;
+    var loader = Loader.init(alloc);
+    defer loader.deinit();
+
+    var result = try loader.fetch(alloc, "https://en.wikipedia.org/wiki/Main_Page");
+    defer result.deinit();
+
+    try std.testing.expect(result.status == std.http.Status.ok);
+}
--- a/src/browser/mime.zig
+++ b/src/browser/mime.zig
@@ -0,0 +1,219 @@
+const std = @import("std");
+const testing = std.testing;
+
+const Self = @This();
+
+const MimeError = error{
+    Empty,
+    TooBig,
+    Invalid,
+    InvalidChar,
+};
+
+mtype: []const u8,
+msubtype: []const u8,
+params: []const u8 = "",
+
+charset: ?[]const u8 = null,
+boundary: ?[]const u8 = null,
+
+pub const HTML = Self{ .mtype = "text", .msubtype = "html" };
+pub const Javascript = Self{ .mtype = "application", .msubtype = "javascript" };
+
+const reader = struct {
+    s: []const u8,
+    i: usize = 0,
+
+    fn until(self: *reader, c: u8) []const u8 {
+        const ln = self.s.len;
+        const start = self.i;
+        while (self.i < ln) {
+            if (c == self.s[self.i]) return self.s[start..self.i];
+            self.i += 1;
+        }
+
+        return self.s[start..self.i];
+    }
+
+    fn tail(self: *reader) []const u8 {
+        if (self.i > self.s.len) return "";
+        defer self.i = self.s.len;
+        return self.s[self.i..];
+    }
+
+    fn skip(self: *reader) bool {
+        if (self.i >= self.s.len) return false;
+        self.i += 1;
+        return true;
+    }
+};
+
+test "reader.skip" {
+    var r = reader{ .s = "foo" };
+    try testing.expect(r.skip());
+    try testing.expect(r.skip());
+    try testing.expect(r.skip());
+    try testing.expect(!r.skip());
+    try testing.expect(!r.skip());
+}
+
+test "reader.tail" {
+    var r = reader{ .s = "foo" };
+    try testing.expectEqualStrings("foo", r.tail());
+    try testing.expectEqualStrings("", r.tail());
+}
+
+test "reader.until" {
+    var r = reader{ .s = "foo.bar.baz" };
+    try testing.expectEqualStrings("foo", r.until('.'));
+    _ = r.skip();
+    try testing.expectEqualStrings("bar", r.until('.'));
+    _ = r.skip();
+    try testing.expectEqualStrings("baz", r.until('.'));
+
+    r = reader{ .s = "foo" };
+    try testing.expectEqualStrings("foo", r.until('.'));
+
+    r = reader{ .s = "" };
+    try testing.expectEqualStrings("", r.until('.'));
+}
+
+fn trim(s: []const u8) []const u8 {
+    const ln = s.len;
+    if (ln == 0) {
+        return "";
+    }
+    var start: usize = 0;
+    while (start < ln) {
+        if (!std.ascii.isWhitespace(s[start])) break;
+        start += 1;
+    }
+
+    var end: usize = ln;
+    while (end > 0) {
+        if (!std.ascii.isWhitespace(s[end - 1])) break;
+        end -= 1;
+    }
+
+    return s[start..end];
+}
+
+test "trim" {
+    try testing.expectEqualStrings("", trim(""));
+    try testing.expectEqualStrings("foo", trim("foo"));
+    try testing.expectEqualStrings("foo", trim(" \n\tfoo"));
+    try testing.expectEqualStrings("foo", trim("foo \n\t"));
+}
+
+// https://mimesniff.spec.whatwg.org/#http-token-code-point
+fn isHTTPCodePoint(c: u8) bool {
+    return switch (c) {
+        '!', '#', '$', '%', '&', '\'', '*', '+', '-', '.', '^' => return true,
+        '_', '`', '|', '~' => return true,
+        else => std.ascii.isAlphanumeric(c),
+    };
+}
+
+fn valid(s: []const u8) bool {
+    const ln = s.len;
+    var i: usize = 0;
+    while (i < ln) {
+        if (!isHTTPCodePoint(s[i])) return false;
+        i += 1;
+    }
+    return true;
+}
+
+// https://mimesniff.spec.whatwg.org/#parsing-a-mime-type
+pub fn parse(s: []const u8) Self.MimeError!Self {
+    const ln = s.len;
+    if (ln == 0) return MimeError.Empty;
+    // limit input size
+    if (ln > 255) return MimeError.TooBig;
+
+    var res = Self{ .mtype = "", .msubtype = "" };
+    var r = reader{ .s = s };
+
+    res.mtype = trim(r.until('/'));
+    if (res.mtype.len == 0) return MimeError.Invalid;
+    if (!valid(res.mtype)) return MimeError.InvalidChar;
+
+    if (!r.skip()) return MimeError.Invalid;
+    res.msubtype = trim(r.until(';'));
+    if (res.msubtype.len == 0) return MimeError.Invalid;
+    if (!valid(res.msubtype)) return MimeError.InvalidChar;
+
+    if (!r.skip()) return res;
+    res.params = trim(r.tail());
+    if (res.params.len == 0) return MimeError.Invalid;
+
+    // parse well known parameters.
+    // don't check invalid parameter format.
+    var rp = reader{ .s = res.params };
+    while (true) {
+        const name = trim(rp.until('='));
+        if (!rp.skip()) return res;
+        const value = trim(rp.until(';'));
+
+        if (std.ascii.eqlIgnoreCase(name, "charset")) {
+            res.charset = value;
+        }
+        if (std.ascii.eqlIgnoreCase(name, "boundary")) {
+            res.boundary = value;
+        }
+
+        if (!rp.skip()) return res;
+    }
+
+    return res;
+}
+
+test "parse valid" {
+    for ([_][]const u8{
+        "text/html",
+        " \ttext/html",
+        "text \t/html",
+        "text/ \thtml",
+        "text/html \t",
+    }) |tc| {
+        const m = try Self.parse(tc);
+        try testing.expectEqualStrings("text", m.mtype);
+        try testing.expectEqualStrings("html", m.msubtype);
+    }
+    const m2 = try Self.parse("text/javascript1.5");
+    try testing.expectEqualStrings("text", m2.mtype);
+    try testing.expectEqualStrings("javascript1.5", m2.msubtype);
+
+    const m3 = try Self.parse("text/html; charset=utf-8");
+    try testing.expectEqualStrings("text", m3.mtype);
+    try testing.expectEqualStrings("html", m3.msubtype);
+    try testing.expectEqualStrings("charset=utf-8", m3.params);
+    try testing.expectEqualStrings("utf-8", m3.charset.?);
+
+    const m4 = try Self.parse("text/html; boundary=----");
+    try testing.expectEqualStrings("text", m4.mtype);
+    try testing.expectEqualStrings("html", m4.msubtype);
+    try testing.expectEqualStrings("boundary=----", m4.params);
+    try testing.expectEqualStrings("----", m4.boundary.?);
+}
+
+test "parse invalid" {
+    for ([_][]const u8{
+        "",
+        "te xt/html;",
+        "te@xt/html;",
+        "text/ht@ml;",
+        "text/html;",
+        "/text/html",
+        "/html",
+    }) |tc| {
+        _ = Self.parse(tc) catch continue;
+        try testing.expect(false);
+    }
+}
+
+// Compare type and subtype.
+pub fn eql(self: Self, b: Self) bool {
+    if (!std.mem.eql(u8, self.mtype, b.mtype)) return false;
+    return std.mem.eql(u8, self.msubtype, b.msubtype);
+}
--- a/src/dom/document.zig
+++ b/src/dom/document.zig
@@ -10,7 +10,7 @@ const Node = @import("node.zig").Node;
 const NodeList = @import("nodelist.zig").NodeList;
 const NodeUnion = @import("node.zig").Union;

-const Walker = @import("html_collection.zig").WalkerDepthFirst;
+const Walker = @import("walker.zig").WalkerDepthFirst;
 const collection = @import("html_collection.zig");

 const Element = @import("element.zig").Element;
--- a/src/dom/element.zig
+++ b/src/dom/element.zig
@@ -9,7 +9,7 @@ const checkCases = jsruntime.test_utils.checkCases;
 const collection = @import("html_collection.zig");

 const Node = @import("node.zig").Node;
-const Walker = @import("html_collection.zig").WalkerDepthFirst;
+const Walker = @import("walker.zig").WalkerDepthFirst;
 const NodeList = @import("nodelist.zig").NodeList;
 const HTMLElem = @import("../html/elements.zig");
 pub const Union = @import("../html/elements.zig").Union;
--- a/src/dom/html_collection.zig
+++ b/src/dom/html_collection.zig
@@ -11,6 +11,11 @@ const utils = @import("utils.z");
 const Element = @import("element.zig").Element;
 const Union = @import("element.zig").Union;

+const Walker = @import("walker.zig").Walker;
+const WalkerDepthFirst = @import("walker.zig").WalkerDepthFirst;
+const WalkerChildren = @import("walker.zig").WalkerChildren;
+const WalkerNone = @import("walker.zig").WalkerNone;
+
 const Matcher = union(enum) {
    matchByName: MatchByName,
    matchByTagName: MatchByTagName,
@@ -255,89 +260,6 @@ pub fn HTMLCollectionByAnchors(
    };
 }

-const Walker = union(enum) {
-    walkerDepthFirst: WalkerDepthFirst,
-    walkerChildren: WalkerChildren,
-    walkerNone: WalkerNone,
-
-    pub fn get_next(self: Walker, root: *parser.Node, cur: ?*parser.Node) !?*parser.Node {
-        switch (self) {
-            inline else => |case| return case.get_next(root, cur),
-        }
-    }
-};
-
-// WalkerDepthFirst iterates over the DOM tree to return the next following
-// node or null at the end.
-//
-// This implementation is a zig version of Netsurf code.
-// http://source.netsurf-browser.org/libdom.git/tree/src/html/html_collection.c#n177
-//
-// The iteration is a depth first as required by the specification.
-// https://dom.spec.whatwg.org/#htmlcollection
-// https://dom.spec.whatwg.org/#concept-tree-order
-pub const WalkerDepthFirst = struct {
-    pub fn get_next(_: WalkerDepthFirst, root: *parser.Node, cur: ?*parser.Node) !?*parser.Node {
-        var n = cur orelse root;
-
-        // TODO deinit next
-        if (try parser.nodeFirstChild(n)) |next| {
-            return next;
-        }
-
-        // TODO deinit next
-        if (try parser.nodeNextSibling(n)) |next| {
-            return next;
-        }
-
-        // TODO deinit parent
-        // Back to the parent of cur.
-        // If cur has no parent, then the iteration is over.
-        var parent = try parser.nodeParentNode(n) orelse return null;
-
-        // TODO deinit lastchild
-        var lastchild = try parser.nodeLastChild(parent);
-        while (n != root and n == lastchild) {
-            n = parent;
-
-            // TODO deinit parent
-            // Back to the prev's parent.
-            // If prev has no parent, then the loop must stop.
-            parent = try parser.nodeParentNode(n) orelse break;
-
-            // TODO deinit lastchild
-            lastchild = try parser.nodeLastChild(parent);
-        }
-
-        if (n == root) {
-            return null;
-        }
-
-        return try parser.nodeNextSibling(n);
-    }
-};
-
-// WalkerChildren iterates over the root's children only.
-pub const WalkerChildren = struct {
-    pub fn get_next(_: WalkerChildren, root: *parser.Node, cur: ?*parser.Node) !?*parser.Node {
-        // On walk start, we return the first root's child.
-        if (cur == null) return try parser.nodeFirstChild(root);
-
-        // If cur is root, then return null.
-        // This is a special case, if the root is included in the walk, we
-        // don't want to go further to find children.
-        if (root == cur.?) return null;
-
-        return try parser.nodeNextSibling(cur.?);
-    }
-};
-
-pub const WalkerNone = struct {
-    pub fn get_next(_: WalkerNone, _: *parser.Node, _: ?*parser.Node) !?*parser.Node {
-        return null;
-    }
-};
-
 pub const HTMLCollectionIterator = struct {
    pub const mem_guarantied = true;

--- a/src/dom/walker.zig
+++ b/src/dom/walker.zig
@@ -0,0 +1,86 @@
+const std = @import("std");
+
+const parser = @import("../netsurf.zig");
+
+pub const Walker = union(enum) {
+    walkerDepthFirst: WalkerDepthFirst,
+    walkerChildren: WalkerChildren,
+    walkerNone: WalkerNone,
+
+    pub fn get_next(self: Walker, root: *parser.Node, cur: ?*parser.Node) !?*parser.Node {
+        switch (self) {
+            inline else => |case| return case.get_next(root, cur),
+        }
+    }
+};
+
+// WalkerDepthFirst iterates over the DOM tree to return the next following
+// node or null at the end.
+//
+// This implementation is a zig version of Netsurf code.
+// http://source.netsurf-browser.org/libdom.git/tree/src/html/html_collection.c#n177
+//
+// The iteration is a depth first as required by the specification.
+// https://dom.spec.whatwg.org/#htmlcollection
+// https://dom.spec.whatwg.org/#concept-tree-order
+pub const WalkerDepthFirst = struct {
+    pub fn get_next(_: WalkerDepthFirst, root: *parser.Node, cur: ?*parser.Node) !?*parser.Node {
+        var n = cur orelse root;
+
+        // TODO deinit next
+        if (try parser.nodeFirstChild(n)) |next| {
+            return next;
+        }
+
+        // TODO deinit next
+        if (try parser.nodeNextSibling(n)) |next| {
+            return next;
+        }
+
+        // TODO deinit parent
+        // Back to the parent of cur.
+        // If cur has no parent, then the iteration is over.
+        var parent = try parser.nodeParentNode(n) orelse return null;
+
+        // TODO deinit lastchild
+        var lastchild = try parser.nodeLastChild(parent);
+        while (n != root and n == lastchild) {
+            n = parent;
+
+            // TODO deinit parent
+            // Back to the prev's parent.
+            // If prev has no parent, then the loop must stop.
+            parent = try parser.nodeParentNode(n) orelse break;
+
+            // TODO deinit lastchild
+            lastchild = try parser.nodeLastChild(parent);
+        }
+
+        if (n == root) {
+            return null;
+        }
+
+        return try parser.nodeNextSibling(n);
+    }
+};
+
+// WalkerChildren iterates over the root's children only.
+pub const WalkerChildren = struct {
+    pub fn get_next(_: WalkerChildren, root: *parser.Node, cur: ?*parser.Node) !?*parser.Node {
+        // On walk start, we return the first root's child.
+        if (cur == null) return try parser.nodeFirstChild(root);
+
+        // If cur is root, then return null.
+        // This is a special case, if the root is included in the walk, we
+        // don't want to go further to find children.
+        if (root == cur.?) return null;
+
+        return try parser.nodeNextSibling(cur.?);
+    }
+};
+
+pub const WalkerNone = struct {
+    pub fn get_next(_: WalkerNone, _: *parser.Node, _: ?*parser.Node) !?*parser.Node {
+        return null;
+    }
+};
--- a/src/html/document.zig
+++ b/src/html/document.zig
@@ -12,7 +12,7 @@ const NodeList = @import("../dom/nodelist.zig").NodeList;
 const HTMLElem = @import("elements.zig");

 const collection = @import("../dom/html_collection.zig");
-const Walker = collection.WalkerDepthFirst;
+const Walker = @import("../dom/walker.zig").WalkerDepthFirst;

 // WEB IDL https://html.spec.whatwg.org/#the-document-object
 pub const HTMLDocument = struct {
--- a/src/html/html.zig
+++ b/src/html/html.zig
@@ -2,10 +2,12 @@ const generate = @import("../generate.zig");

 const HTMLDocument = @import("document.zig").HTMLDocument;
 const HTMLElem = @import("elements.zig");
+const Window = @import("window.zig").Window;

 pub const Interfaces = generate.Tuple(.{
    HTMLDocument,
    HTMLElem.HTMLElement,
    HTMLElem.HTMLMediaElement,
    HTMLElem.Interfaces,
+    Window,
 });
--- a/src/html/window.zig
+++ b/src/html/window.zig
@@ -0,0 +1,47 @@
+const std = @import("std");
+
+const parser = @import("../netsurf.zig");
+
+const EventTarget = @import("../dom/event_target.zig").EventTarget;
+
+// https://dom.spec.whatwg.org/#interface-window-extensions
+// https://html.spec.whatwg.org/multipage/nav-history-apis.html#window
+pub const Window = struct {
+    pub const prototype = *EventTarget;
+    pub const mem_guarantied = true;
+
+    document: ?*parser.Document = null,
+    target: []const u8,
+
+    pub fn create(target: ?[]const u8) Window {
+        return Window{
+            .target = target orelse "",
+        };
+    }
+
+    pub fn replaceDocument(self: *Window, doc: *parser.Document) void {
+        self.document = doc;
+    }
+
+    pub fn get_window(self: *Window) *Window {
+        return self;
+    }
+
+    pub fn get_self(self: *Window) *Window {
+        return self;
+    }
+
+    pub fn get_parent(self: *Window) *Window {
+        return self;
+    }
+
+    pub fn get_document(self: *Window) ?*parser.Document {
+        return self.document;
+    }
+
+    pub fn get_name(self: *Window) []const u8 {
+        return self.target;
+    }
+
+    // TODO we need to re-implement EventTarget interface.
+};
--- a/src/main.zig
+++ b/src/main.zig
@@ -3,9 +3,9 @@ const std = @import("std");
 const jsruntime = @import("jsruntime");

 const parser = @import("netsurf.zig");
-const DOM = @import("dom.zig");
+const apiweb = @import("apiweb.zig");

-pub const Types = jsruntime.reflect(DOM.Interfaces);
+pub const Types = jsruntime.reflect(apiweb.Interfaces);

 const socket_path = "/tmp/browsercore-server.sock";

--- a/src/main_get.zig
+++ b/src/main_get.zig
@@ -0,0 +1,73 @@
+const std = @import("std");
+const Browser = @import("browser/browser.zig").Browser;
+
+const jsruntime = @import("jsruntime");
+const apiweb = @import("apiweb.zig");
+pub const Types = jsruntime.reflect(apiweb.Interfaces);
+
+pub const std_options = struct {
+    pub const log_level = .debug;
+};
+
+const usage =
+    \\usage: {s} [options] <url>
+    \\  request the url with the browser
+    \\
+    \\  -h, --help      Print this help message and exit.
+    \\  --dump          Dump document in stdout
+    \\
+;
+
+pub fn main() !void {
+    var gpa = std.heap.GeneralPurposeAllocator(.{}){};
+    defer {
+        const check = gpa.deinit();
+        if (check == .leak) {
+            std.log.warn("leaks detected\n", .{});
+        }
+    }
+    const allocator = gpa.allocator();
+
+    var args = try std.process.argsWithAllocator(allocator);
+    defer args.deinit();
+
+    const execname = args.next().?;
+    var url: []const u8 = "";
+    var dump: bool = false;
+
+    while (args.next()) |arg| {
+        if (std.mem.eql(u8, "-h", arg) or std.mem.eql(u8, "--help", arg)) {
+            try std.io.getStdErr().writer().print(usage, .{execname});
+            std.os.exit(0);
+        }
+        if (std.mem.eql(u8, "--dump", arg)) {
+            dump = true;
+            continue;
+        }
+        // allow only one url
+        if (url.len != 0) {
+            try std.io.getStdErr().writer().print(usage, .{execname});
+            std.os.exit(1);
+        }
+        url = arg;
+    }
+
+    if (url.len == 0) {
+        try std.io.getStdErr().writer().print(usage, .{execname});
+        std.os.exit(1);
+    }
+
+    const vm = jsruntime.VM.init();
+    defer vm.deinit();
+
+    var browser = try Browser.init(allocator, vm);
+    defer browser.deinit();
+
+    var page = try browser.currentSession().createPage();
+    defer page.end();
+    try page.navigate(url);
+
+    if (dump) {
+        try page.dump(std.io.getStdOut());
+    }
+}
--- a/src/main_shell.zig
+++ b/src/main_shell.zig
@@ -3,11 +3,11 @@ const std = @import("std");
 const jsruntime = @import("jsruntime");

 const parser = @import("netsurf.zig");
-const DOM = @import("dom.zig");
+const apiweb = @import("apiweb.zig");

 const html_test = @import("html_test.zig").html;

-pub const Types = jsruntime.reflect(DOM.Interfaces);
+pub const Types = jsruntime.reflect(apiweb.Interfaces);

 var doc: *parser.DocumentHTML = undefined;

--- a/src/main_wpt.zig
+++ b/src/main_wpt.zig
@@ -6,7 +6,7 @@ const Suite = @import("wpt/testcase.zig").Suite;
 const FileLoader = @import("wpt/fileloader.zig").FileLoader;
 const wpt = @import("wpt/run.zig");

-const DOM = @import("dom.zig");
+const apiweb = @import("apiweb.zig");
 const HTMLElem = @import("html/elements.zig");

 const wpt_dir = "tests/wpt";
@@ -29,7 +29,7 @@ const Out = enum {
    text,
 };

-pub const Types = jsruntime.reflect(DOM.Interfaces);
+pub const Types = jsruntime.reflect(apiweb.Interfaces);

 // TODO For now the WPT tests run is specific to WPT.
 // It manually load js framwork libs, and run the first script w/ js content in
--- a/src/run_tests.zig
+++ b/src/run_tests.zig
@@ -5,7 +5,7 @@ const jsruntime = @import("jsruntime");
 const generate = @import("generate.zig");

 const parser = @import("netsurf.zig");
-const DOM = @import("dom.zig");
+const apiweb = @import("apiweb.zig");

 const documentTestExecFn = @import("dom/document.zig").testExecFn;
 const HTMLDocumentTestExecFn = @import("html/document.zig").testExecFn;
@@ -21,7 +21,7 @@ const DOMTokenListExecFn = @import("dom/token_list.zig").testExecFn;
 const NodeListTestExecFn = @import("dom/nodelist.zig").testExecFn;
 const AttrTestExecFn = @import("dom/attribute.zig").testExecFn;

-pub const Types = jsruntime.reflect(DOM.Interfaces);
+pub const Types = jsruntime.reflect(apiweb.Interfaces);

 var doc: *parser.DocumentHTML = undefined;

@@ -122,3 +122,12 @@ test "bug document html parsing #4" {
    doc = try parser.documentHTMLParse(file.reader(), "UTF-8");
    parser.documentHTMLClose(doc) catch {};
 }
+
+const dump = @import("browser/dump.zig");
+test "run browser tests" {
+    // const out = std.io.getStdOut();
+    const out = try std.fs.openFileAbsolute("/dev/null", .{ .mode = .write_only });
+    defer out.close();
+
+    try dump.HTMLFileTestFn(out);
+}