diff --git a/build.zig b/build.zig index 3004d839..90d200e9 100644 --- a/build.zig +++ b/build.zig @@ -115,6 +115,29 @@ pub fn build(b: *std.build.Builder) !void { // step const wpt_step = b.step("wpt", "WPT tests"); wpt_step.dependOn(&wpt_cmd.step); + + // get + // ----- + + // compile and install + const get = b.addExecutable(.{ + .name = "browsercore-get", + .root_source_file = .{ .path = "src/main_get.zig" }, + .target = target, + .optimize = mode, + }); + try common(get, options); + b.installArtifact(get); + + // run + const get_cmd = b.addRunArtifact(get); + get_cmd.step.dependOn(b.getInstallStep()); + if (b.args) |args| { + get_cmd.addArgs(args); + } + // step + const get_step = b.step("get", "request URL"); + get_step.dependOn(&get_cmd.step); } fn common( diff --git a/src/dom.zig b/src/apiweb.zig similarity index 100% rename from src/dom.zig rename to src/apiweb.zig diff --git a/src/browser/browser.zig b/src/browser/browser.zig new file mode 100644 index 00000000..18e37cec --- /dev/null +++ b/src/browser/browser.zig @@ -0,0 +1,434 @@ +const std = @import("std"); + +const Types = @import("root").Types; + +const parser = @import("../netsurf.zig"); +const Loader = @import("loader.zig").Loader; +const Dump = @import("dump.zig"); +const Mime = @import("mime.zig"); + +const jsruntime = @import("jsruntime"); +const Loop = jsruntime.Loop; +const Env = jsruntime.Env; + +const apiweb = @import("../apiweb.zig"); + +const Window = @import("../html/window.zig").Window; +const Walker = @import("../dom/walker.zig").WalkerDepthFirst; + +const FetchResult = std.http.Client.FetchResult; + +const log = std.log.scoped(.browser); + +// Browser is an instance of the browser. +// You can create multiple browser instances. +// A browser contains only one session. +// TODO allow multiple sessions per browser. +pub const Browser = struct { + session: *Session, + + pub fn init(alloc: std.mem.Allocator, vm: jsruntime.VM) !Browser { + // We want to ensure the caller initialised a VM, but the browser + // doesn't use it directly... + _ = vm; + + return Browser{ + .session = try Session.init(alloc, "about:blank"), + }; + } + + pub fn deinit(self: *Browser) void { + self.session.deinit(); + } + + pub fn currentSession(self: *Browser) *Session { + return self.session; + } +}; + +// Session is like a browser's tab. +// It owns the js env and the loader for all the pages of the session. +// You can create successively multiple pages for a session, but you must +// deinit a page before running another one. +pub const Session = struct { + // allocator used to init the arena. + alloc: std.mem.Allocator, + + // The arena is used only to bound the js env init b/c it leaks memory. + // see https://github.com/lightpanda-io/jsruntime-lib/issues/181 + // + // The arena is initialised with self.alloc allocator. + // all others Session deps use directly self.alloc and not the arena. + arena: std.heap.ArenaAllocator, + + uri: []const u8, + + // TODO handle proxy + loader: Loader, + env: Env = undefined, + loop: Loop, + window: Window, + + jstypes: [Types.len]usize = undefined, + + fn init(alloc: std.mem.Allocator, uri: []const u8) !*Session { + var self = try alloc.create(Session); + self.* = Session{ + .uri = uri, + .alloc = alloc, + .arena = std.heap.ArenaAllocator.init(alloc), + .window = Window.create(null), + .loader = Loader.init(alloc), + .loop = try Loop.init(alloc), + }; + + self.env = try Env.init(self.arena.allocator(), &self.loop); + try self.env.load(&self.jstypes); + + return self; + } + + fn deinit(self: *Session) void { + self.env.deinit(); + self.arena.deinit(); + + self.loader.deinit(); + self.loop.deinit(); + self.alloc.destroy(self); + } + + pub fn createPage(self: *Session) !Page { + return Page.init(self.alloc, self); + } +}; + +// Page navigates to an url. +// You can navigates multiple urls with the same page, but you have to call +// end() to stop the previous navigation before starting a new one. +// The page handle all its memory in an arena allocator. The arena is reseted +// when end() is called. +pub const Page = struct { + arena: std.heap.ArenaAllocator, + session: *Session, + doc: ?*parser.Document = null, + + // handle url + rawuri: ?[]const u8 = null, + uri: std.Uri = undefined, + + raw_data: ?[]const u8 = null, + + fn init( + alloc: std.mem.Allocator, + session: *Session, + ) Page { + return Page{ + .arena = std.heap.ArenaAllocator.init(alloc), + .session = session, + }; + } + + // reset js env and mem arena. + pub fn end(self: *Page) void { + self.session.env.stop(); + // TODO unload document: https://html.spec.whatwg.org/#unloading-documents + + _ = self.arena.reset(.free_all); + } + + pub fn deinit(self: *Page) void { + self.arena.deinit(); + } + + // dump writes the page content into the given file. + pub fn dump(self: *Page, out: std.fs.File) !void { + + // if no HTML document pointer available, dump the data content only. + if (self.doc == null) { + // no data loaded, nothing to do. + if (self.raw_data == null) return; + return try out.writeAll(self.raw_data.?); + } + + // if the page has a pointer to a document, dumps the HTML. + try Dump.htmlFile(self.doc.?, out); + } + + // spec reference: https://html.spec.whatwg.org/#document-lifecycle + pub fn navigate(self: *Page, uri: []const u8) !void { + const alloc = self.arena.allocator(); + + log.debug("starting GET {s}", .{uri}); + + // own the url + if (self.rawuri) |prev| alloc.free(prev); + self.rawuri = try alloc.dupe(u8, uri); + self.uri = std.Uri.parse(self.rawuri.?) catch try std.Uri.parseWithoutScheme(self.rawuri.?); + + // TODO handle fragment in url. + + // load the data + var resp = try self.session.loader.get(alloc, self.uri); + defer resp.deinit(); + + const req = resp.req; + + log.info("GET {any} {d}", .{ self.uri, req.response.status }); + + // TODO handle redirection + if (req.response.status != .ok) return error.BadStatusCode; + + // TODO handle charset + // https://html.spec.whatwg.org/#content-type + const ct = req.response.headers.getFirstValue("Content-Type") orelse { + // no content type in HTTP headers. + // TODO try to sniff mime type from the body. + log.info("no content-type HTTP header", .{}); + return; + }; + log.debug("header content-type: {s}", .{ct}); + const mime = try Mime.parse(ct); + if (mime.eql(Mime.HTML)) { + try self.loadHTMLDoc(req.reader(), mime.charset orelse "utf-8"); + } else { + log.info("non-HTML document: {s}", .{ct}); + + // save the body into the page. + self.raw_data = try req.reader().readAllAlloc(alloc, 16 * 1024 * 1024); + } + } + + // https://html.spec.whatwg.org/#read-html + fn loadHTMLDoc(self: *Page, reader: anytype, charset: []const u8) !void { + const alloc = self.arena.allocator(); + + log.debug("parse html with charset {s}", .{charset}); + + const ccharset = try alloc.dupeZ(u8, charset); + defer alloc.free(ccharset); + + const html_doc = try parser.documentHTMLParse(reader, ccharset); + const doc = parser.documentHTMLToDocument(html_doc); + + // save a document's pointer in the page. + self.doc = doc; + + // TODO set document.readyState to interactive + // https://html.spec.whatwg.org/#reporting-document-loading-status + + // TODO inject the URL to the document including the fragment. + // TODO set the referrer to the document. + + self.session.window.replaceDocument(doc); + + // https://html.spec.whatwg.org/#read-html + + // start JS env + // TODO load the js env concurrently with the HTML parsing. + log.debug("start js env", .{}); + try self.session.env.start(alloc); + + // add global objects + log.debug("setup global env", .{}); + try self.session.env.addObject(self.session.window, "window"); + try self.session.env.addObject(self.session.window, "self"); + try self.session.env.addObject(html_doc, "document"); + + // browse the DOM tree to retrieve scripts + // TODO execute the synchronous scripts during the HTL parsing. + // TODO fetch the script resources concurrently but execute them in the + // declaration order for synchronous ones. + + // sasync stores scripts which can be run asynchronously. + // for now they are just run after the non-async one in order to + // dispatch DOMContentLoaded the sooner as possible. + var sasync = std.ArrayList(*parser.Element).init(alloc); + defer sasync.deinit(); + + const root = parser.documentToNode(doc); + const walker = Walker{}; + var next: ?*parser.Node = null; + while (true) { + next = try walker.get_next(root, next) orelse break; + + // ignore non-elements nodes. + if (try parser.nodeType(next.?) != .element) { + continue; + } + + const e = parser.nodeToElement(next.?); + const tag = try parser.elementHTMLGetTagType(@as(*parser.ElementHTML, @ptrCast(e))); + + // ignore non-script tags + if (tag != .script) continue; + + // ignore non-js script. + // > type + // > Attribute is not set (default), an empty string, or a JavaScript MIME + // > type indicates that the script is a "classic script", containing + // > JavaScript code. + // https://developer.mozilla.org/en-US/docs/Web/HTML/Element/script#attribute_is_not_set_default_an_empty_string_or_a_javascript_mime_type + const stype = try parser.elementGetAttribute(e, "type"); + if (!isJS(stype)) { + continue; + } + + // Ignore the defer attribute b/c we analyze all script + // after the document has been parsed. + // https://developer.mozilla.org/en-US/docs/Web/HTML/Element/script#defer + + // TODO use fetchpriority + // https://developer.mozilla.org/en-US/docs/Web/HTML/Element/script#fetchpriority + + // > async + // > For classic scripts, if the async attribute is present, + // > then the classic script will be fetched in parallel to + // > parsing and evaluated as soon as it is available. + // https://developer.mozilla.org/en-US/docs/Web/HTML/Element/script#async + if (try parser.elementGetAttribute(e, "async") != null) { + try sasync.append(e); + continue; + } + + // TODO handle for attribute + // TODO handle event attribute + + // TODO defer + // > This Boolean attribute is set to indicate to a browser + // > that the script is meant to be executed after the + // > document has been parsed, but before firing + // > DOMContentLoaded. + // https://developer.mozilla.org/en-US/docs/Web/HTML/Element/script#defer + // defer allow us to load a script w/o blocking the rest of + // evaluations. + + // > Scripts without async, defer or type="module" + // > attributes, as well as inline scripts without the + // > type="module" attribute, are fetched and executed + // > immediately before the browser continues to parse the + // > page. + // https://developer.mozilla.org/en-US/docs/Web/HTML/Element/script#notes + self.evalScript(e) catch |err| log.warn("evaljs: {any}", .{err}); + } + + // TODO wait for deferred scripts + + // TODO dispatch DOMContentLoaded before the transition to "complete", + // at the point where all subresources apart from async script elements + // have loaded. + // https://html.spec.whatwg.org/#reporting-document-loading-status + + // eval async scripts. + for (sasync.items) |e| { + self.evalScript(e) catch |err| log.warn("evaljs: {any}", .{err}); + } + + // TODO wait for async scripts + + // TODO set document.readyState to complete + } + + // evalScript evaluates the src in priority. + // if no src is present, we evaluate the text source. + // https://html.spec.whatwg.org/multipage/scripting.html#script-processing-model + fn evalScript(self: *Page, e: *parser.Element) !void { + const alloc = self.arena.allocator(); + + // https://html.spec.whatwg.org/multipage/webappapis.html#fetch-a-classic-script + const opt_src = try parser.elementGetAttribute(e, "src"); + if (opt_src) |src| { + log.debug("starting GET {s}", .{src}); + + self.fetchScript(src) catch |err| { + switch (err) { + FetchError.BadStatusCode => return err, + + // TODO If el's result is null, then fire an event named error at + // el, and return. + FetchError.NoBody => return, + + FetchError.JsErr => {}, // nothing to do here. + else => return err, + } + }; + + // TODO If el's from an external file is true, then fire an event + // named load at el. + + return; + } + + const opt_text = try parser.nodeTextContent(parser.elementToNode(e)); + if (opt_text) |text| { + // TODO handle charset attribute + var res = jsruntime.JSResult{}; + try self.session.env.run(alloc, text, "", &res, null); + defer res.deinit(alloc); + + if (res.success) { + log.debug("eval inline: {s}", .{res.result}); + } else { + log.info("eval inline: {s}", .{res.result}); + } + + return; + } + + // nothing has been loaded. + // TODO If el's result is null, then fire an event named error at + // el, and return. + } + + const FetchError = error{ + BadStatusCode, + NoBody, + JsErr, + }; + + // fetchScript senf a GET request to the src and execute the script + // received. + fn fetchScript(self: *Page, src: []const u8) !void { + const alloc = self.arena.allocator(); + + log.debug("starting fetch script {s}", .{src}); + + const u = std.Uri.parse(src) catch try std.Uri.parseWithoutScheme(src); + const ru = try std.Uri.resolve(self.uri, u, false, alloc); + + var fetchres = try self.session.loader.fetch(alloc, ru); + defer fetchres.deinit(); + + log.info("fech script {any}: {d}", .{ ru, fetchres.status }); + + if (fetchres.status != .ok) return FetchError.BadStatusCode; + + // TODO check content-type + + // check no body + if (fetchres.body == null) return FetchError.NoBody; + + var res = jsruntime.JSResult{}; + try self.session.env.run(alloc, fetchres.body.?, src, &res, null); + defer res.deinit(alloc); + + if (res.success) { + log.debug("eval remote {s}: {s}", .{ src, res.result }); + } else { + log.info("eval remote {s}: {s}", .{ src, res.result }); + return FetchError.JsErr; + } + } + + // > type + // > Attribute is not set (default), an empty string, or a JavaScript MIME + // > type indicates that the script is a "classic script", containing + // > JavaScript code. + // https://developer.mozilla.org/en-US/docs/Web/HTML/Element/script#attribute_is_not_set_default_an_empty_string_or_a_javascript_mime_type + fn isJS(stype: ?[]const u8) bool { + if (stype == null or stype.?.len == 0) return true; + if (std.mem.eql(u8, stype.?, "application/javascript")) return true; + if (!std.mem.eql(u8, stype.?, "module")) return true; + + return false; + } +}; diff --git a/src/browser/dump.zig b/src/browser/dump.zig new file mode 100644 index 00000000..a00bd3c7 --- /dev/null +++ b/src/browser/dump.zig @@ -0,0 +1,96 @@ +const std = @import("std"); +const File = std.fs.File; + +const parser = @import("../netsurf.zig"); +const Walker = @import("../dom/walker.zig").WalkerChildren; + +pub fn htmlFile(doc: *parser.Document, out: File) !void { + try out.writeAll("\n"); + try nodeFile(parser.documentToNode(doc), out); + try out.writeAll("\n"); +} + +fn nodeFile(root: *parser.Node, out: File) !void { + const walker = Walker{}; + var next: ?*parser.Node = null; + while (true) { + next = try walker.get_next(root, next) orelse break; + switch (try parser.nodeType(next.?)) { + .element => { + // open the tag + const tag = try parser.nodeLocalName(next.?); + try out.writeAll("<"); + try out.writeAll(tag); + + // write the attributes + const map = try parser.nodeGetAttributes(next.?); + const ln = try parser.namedNodeMapGetLength(map); + var i: u32 = 0; + while (i < ln) { + const attr = try parser.namedNodeMapItem(map, i) orelse break; + try out.writeAll(" "); + try out.writeAll(try parser.attributeGetName(attr)); + try out.writeAll("=\""); + try out.writeAll(try parser.attributeGetValue(attr) orelse ""); + try out.writeAll("\""); + i += 1; + } + + try out.writeAll(">"); + + // write the children + // TODO avoid recursion + try nodeFile(next.?, out); + + // close the tag + try out.writeAll(""); + }, + .text => { + const v = try parser.nodeValue(next.?) orelse continue; + try out.writeAll(v); + }, + .cdata_section => { + const v = try parser.nodeValue(next.?) orelse continue; + try out.writeAll(""); + }, + .comment => { + const v = try parser.nodeValue(next.?) orelse continue; + try out.writeAll(""); + }, + // TODO handle processing instruction dump + .processing_instruction => continue, + // document fragment is outside of the main document DOM, so we + // don't output it. + .document_fragment => continue, + // document will never be called, but required for completeness. + .document => continue, + // done globally instead, but required for completeness. + .document_type => continue, + // deprecated + .attribute => continue, + .entity_reference => continue, + .entity => continue, + .notation => continue, + } + } +} + +// HTMLFileTestFn is run by run_tests.zig +pub fn HTMLFileTestFn(out: File) !void { + const file = try std.fs.cwd().openFile("test.html", .{}); + defer file.close(); + + const doc_html = try parser.documentHTMLParse(file.reader(), "UTF-8"); + // ignore close error + defer parser.documentHTMLClose(doc_html) catch {}; + + const doc = parser.documentHTMLToDocument(doc_html); + + try htmlFile(doc, out); +} diff --git a/src/browser/loader.zig b/src/browser/loader.zig new file mode 100644 index 00000000..4cfbdd9f --- /dev/null +++ b/src/browser/loader.zig @@ -0,0 +1,86 @@ +const std = @import("std"); + +const user_agent = "Lightpanda.io/1.0"; + +pub const Loader = struct { + client: std.http.Client, + + pub const Response = struct { + alloc: std.mem.Allocator, + req: *std.http.Client.Request, + + pub fn deinit(self: *Response) void { + self.req.deinit(); + self.alloc.destroy(self.req); + } + }; + + pub fn init(alloc: std.mem.Allocator) Loader { + return Loader{ + .client = std.http.Client{ + .allocator = alloc, + }, + }; + } + + pub fn deinit(self: *Loader) void { + self.client.deinit(); + } + + // the caller must deinit the FetchResult. + pub fn fetch(self: *Loader, alloc: std.mem.Allocator, uri: std.Uri) !std.http.Client.FetchResult { + var headers = try std.http.Headers.initList(alloc, &[_]std.http.Field{ + .{ .name = "User-Agent", .value = user_agent }, + .{ .name = "Accept", .value = "*/*" }, + .{ .name = "Accept-Language", .value = "en-US,en;q=0.5" }, + }); + defer headers.deinit(); + + return try self.client.fetch(alloc, .{ + .location = .{ .uri = uri }, + .headers = headers, + .payload = .none, + }); + } + + // see + // https://ziglang.org/documentation/master/std/#A;std:http.Client.fetch + // for reference. + // The caller is responsible for calling `deinit()` on the `Response`. + pub fn get(self: *Loader, alloc: std.mem.Allocator, uri: std.Uri) !Response { + var headers = try std.http.Headers.initList(alloc, &[_]std.http.Field{ + .{ .name = "User-Agent", .value = user_agent }, + .{ .name = "Accept", .value = "*/*" }, + .{ .name = "Accept-Language", .value = "en-US,en;q=0.5" }, + }); + defer headers.deinit(); + + var resp = Response{ + .alloc = alloc, + .req = try alloc.create(std.http.Client.Request), + }; + errdefer alloc.destroy(resp.req); + + resp.req.* = try self.client.open(.GET, uri, headers, .{ + .handle_redirects = true, // TODO handle redirects manually + }); + errdefer resp.req.deinit(); + + try resp.req.send(.{}); + try resp.req.finish(); + try resp.req.wait(); + + return resp; + } +}; + +test "basic url fetch" { + const alloc = std.testing.allocator; + var loader = Loader.init(alloc); + defer loader.deinit(); + + var result = try loader.fetch(alloc, "https://en.wikipedia.org/wiki/Main_Page"); + defer result.deinit(); + + try std.testing.expect(result.status == std.http.Status.ok); +} diff --git a/src/browser/mime.zig b/src/browser/mime.zig new file mode 100644 index 00000000..a880daee --- /dev/null +++ b/src/browser/mime.zig @@ -0,0 +1,219 @@ +const std = @import("std"); +const testing = std.testing; + +const Self = @This(); + +const MimeError = error{ + Empty, + TooBig, + Invalid, + InvalidChar, +}; + +mtype: []const u8, +msubtype: []const u8, +params: []const u8 = "", + +charset: ?[]const u8 = null, +boundary: ?[]const u8 = null, + +pub const HTML = Self{ .mtype = "text", .msubtype = "html" }; +pub const Javascript = Self{ .mtype = "application", .msubtype = "javascript" }; + +const reader = struct { + s: []const u8, + i: usize = 0, + + fn until(self: *reader, c: u8) []const u8 { + const ln = self.s.len; + const start = self.i; + while (self.i < ln) { + if (c == self.s[self.i]) return self.s[start..self.i]; + self.i += 1; + } + + return self.s[start..self.i]; + } + + fn tail(self: *reader) []const u8 { + if (self.i > self.s.len) return ""; + defer self.i = self.s.len; + return self.s[self.i..]; + } + + fn skip(self: *reader) bool { + if (self.i >= self.s.len) return false; + self.i += 1; + return true; + } +}; + +test "reader.skip" { + var r = reader{ .s = "foo" }; + try testing.expect(r.skip()); + try testing.expect(r.skip()); + try testing.expect(r.skip()); + try testing.expect(!r.skip()); + try testing.expect(!r.skip()); +} + +test "reader.tail" { + var r = reader{ .s = "foo" }; + try testing.expectEqualStrings("foo", r.tail()); + try testing.expectEqualStrings("", r.tail()); +} + +test "reader.until" { + var r = reader{ .s = "foo.bar.baz" }; + try testing.expectEqualStrings("foo", r.until('.')); + _ = r.skip(); + try testing.expectEqualStrings("bar", r.until('.')); + _ = r.skip(); + try testing.expectEqualStrings("baz", r.until('.')); + + r = reader{ .s = "foo" }; + try testing.expectEqualStrings("foo", r.until('.')); + + r = reader{ .s = "" }; + try testing.expectEqualStrings("", r.until('.')); +} + +fn trim(s: []const u8) []const u8 { + const ln = s.len; + if (ln == 0) { + return ""; + } + var start: usize = 0; + while (start < ln) { + if (!std.ascii.isWhitespace(s[start])) break; + start += 1; + } + + var end: usize = ln; + while (end > 0) { + if (!std.ascii.isWhitespace(s[end - 1])) break; + end -= 1; + } + + return s[start..end]; +} + +test "trim" { + try testing.expectEqualStrings("", trim("")); + try testing.expectEqualStrings("foo", trim("foo")); + try testing.expectEqualStrings("foo", trim(" \n\tfoo")); + try testing.expectEqualStrings("foo", trim("foo \n\t")); +} + +// https://mimesniff.spec.whatwg.org/#http-token-code-point +fn isHTTPCodePoint(c: u8) bool { + return switch (c) { + '!', '#', '$', '%', '&', '\'', '*', '+', '-', '.', '^' => return true, + '_', '`', '|', '~' => return true, + else => std.ascii.isAlphanumeric(c), + }; +} + +fn valid(s: []const u8) bool { + const ln = s.len; + var i: usize = 0; + while (i < ln) { + if (!isHTTPCodePoint(s[i])) return false; + i += 1; + } + return true; +} + +// https://mimesniff.spec.whatwg.org/#parsing-a-mime-type +pub fn parse(s: []const u8) Self.MimeError!Self { + const ln = s.len; + if (ln == 0) return MimeError.Empty; + // limit input size + if (ln > 255) return MimeError.TooBig; + + var res = Self{ .mtype = "", .msubtype = "" }; + var r = reader{ .s = s }; + + res.mtype = trim(r.until('/')); + if (res.mtype.len == 0) return MimeError.Invalid; + if (!valid(res.mtype)) return MimeError.InvalidChar; + + if (!r.skip()) return MimeError.Invalid; + res.msubtype = trim(r.until(';')); + if (res.msubtype.len == 0) return MimeError.Invalid; + if (!valid(res.msubtype)) return MimeError.InvalidChar; + + if (!r.skip()) return res; + res.params = trim(r.tail()); + if (res.params.len == 0) return MimeError.Invalid; + + // parse well known parameters. + // don't check invalid parameter format. + var rp = reader{ .s = res.params }; + while (true) { + const name = trim(rp.until('=')); + if (!rp.skip()) return res; + const value = trim(rp.until(';')); + + if (std.ascii.eqlIgnoreCase(name, "charset")) { + res.charset = value; + } + if (std.ascii.eqlIgnoreCase(name, "boundary")) { + res.boundary = value; + } + + if (!rp.skip()) return res; + } + + return res; +} + +test "parse valid" { + for ([_][]const u8{ + "text/html", + " \ttext/html", + "text \t/html", + "text/ \thtml", + "text/html \t", + }) |tc| { + const m = try Self.parse(tc); + try testing.expectEqualStrings("text", m.mtype); + try testing.expectEqualStrings("html", m.msubtype); + } + const m2 = try Self.parse("text/javascript1.5"); + try testing.expectEqualStrings("text", m2.mtype); + try testing.expectEqualStrings("javascript1.5", m2.msubtype); + + const m3 = try Self.parse("text/html; charset=utf-8"); + try testing.expectEqualStrings("text", m3.mtype); + try testing.expectEqualStrings("html", m3.msubtype); + try testing.expectEqualStrings("charset=utf-8", m3.params); + try testing.expectEqualStrings("utf-8", m3.charset.?); + + const m4 = try Self.parse("text/html; boundary=----"); + try testing.expectEqualStrings("text", m4.mtype); + try testing.expectEqualStrings("html", m4.msubtype); + try testing.expectEqualStrings("boundary=----", m4.params); + try testing.expectEqualStrings("----", m4.boundary.?); +} + +test "parse invalid" { + for ([_][]const u8{ + "", + "te xt/html;", + "te@xt/html;", + "text/ht@ml;", + "text/html;", + "/text/html", + "/html", + }) |tc| { + _ = Self.parse(tc) catch continue; + try testing.expect(false); + } +} + +// Compare type and subtype. +pub fn eql(self: Self, b: Self) bool { + if (!std.mem.eql(u8, self.mtype, b.mtype)) return false; + return std.mem.eql(u8, self.msubtype, b.msubtype); +} diff --git a/src/dom/document.zig b/src/dom/document.zig index ee7f9454..9c878871 100644 --- a/src/dom/document.zig +++ b/src/dom/document.zig @@ -10,7 +10,7 @@ const Node = @import("node.zig").Node; const NodeList = @import("nodelist.zig").NodeList; const NodeUnion = @import("node.zig").Union; -const Walker = @import("html_collection.zig").WalkerDepthFirst; +const Walker = @import("walker.zig").WalkerDepthFirst; const collection = @import("html_collection.zig"); const Element = @import("element.zig").Element; diff --git a/src/dom/element.zig b/src/dom/element.zig index e64faf08..cd12f625 100644 --- a/src/dom/element.zig +++ b/src/dom/element.zig @@ -9,7 +9,7 @@ const checkCases = jsruntime.test_utils.checkCases; const collection = @import("html_collection.zig"); const Node = @import("node.zig").Node; -const Walker = @import("html_collection.zig").WalkerDepthFirst; +const Walker = @import("walker.zig").WalkerDepthFirst; const NodeList = @import("nodelist.zig").NodeList; const HTMLElem = @import("../html/elements.zig"); pub const Union = @import("../html/elements.zig").Union; diff --git a/src/dom/html_collection.zig b/src/dom/html_collection.zig index f5e3d6d1..5cff8f92 100644 --- a/src/dom/html_collection.zig +++ b/src/dom/html_collection.zig @@ -11,6 +11,11 @@ const utils = @import("utils.z"); const Element = @import("element.zig").Element; const Union = @import("element.zig").Union; +const Walker = @import("walker.zig").Walker; +const WalkerDepthFirst = @import("walker.zig").WalkerDepthFirst; +const WalkerChildren = @import("walker.zig").WalkerChildren; +const WalkerNone = @import("walker.zig").WalkerNone; + const Matcher = union(enum) { matchByName: MatchByName, matchByTagName: MatchByTagName, @@ -255,89 +260,6 @@ pub fn HTMLCollectionByAnchors( }; } -const Walker = union(enum) { - walkerDepthFirst: WalkerDepthFirst, - walkerChildren: WalkerChildren, - walkerNone: WalkerNone, - - pub fn get_next(self: Walker, root: *parser.Node, cur: ?*parser.Node) !?*parser.Node { - switch (self) { - inline else => |case| return case.get_next(root, cur), - } - } -}; - -// WalkerDepthFirst iterates over the DOM tree to return the next following -// node or null at the end. -// -// This implementation is a zig version of Netsurf code. -// http://source.netsurf-browser.org/libdom.git/tree/src/html/html_collection.c#n177 -// -// The iteration is a depth first as required by the specification. -// https://dom.spec.whatwg.org/#htmlcollection -// https://dom.spec.whatwg.org/#concept-tree-order -pub const WalkerDepthFirst = struct { - pub fn get_next(_: WalkerDepthFirst, root: *parser.Node, cur: ?*parser.Node) !?*parser.Node { - var n = cur orelse root; - - // TODO deinit next - if (try parser.nodeFirstChild(n)) |next| { - return next; - } - - // TODO deinit next - if (try parser.nodeNextSibling(n)) |next| { - return next; - } - - // TODO deinit parent - // Back to the parent of cur. - // If cur has no parent, then the iteration is over. - var parent = try parser.nodeParentNode(n) orelse return null; - - // TODO deinit lastchild - var lastchild = try parser.nodeLastChild(parent); - while (n != root and n == lastchild) { - n = parent; - - // TODO deinit parent - // Back to the prev's parent. - // If prev has no parent, then the loop must stop. - parent = try parser.nodeParentNode(n) orelse break; - - // TODO deinit lastchild - lastchild = try parser.nodeLastChild(parent); - } - - if (n == root) { - return null; - } - - return try parser.nodeNextSibling(n); - } -}; - -// WalkerChildren iterates over the root's children only. -pub const WalkerChildren = struct { - pub fn get_next(_: WalkerChildren, root: *parser.Node, cur: ?*parser.Node) !?*parser.Node { - // On walk start, we return the first root's child. - if (cur == null) return try parser.nodeFirstChild(root); - - // If cur is root, then return null. - // This is a special case, if the root is included in the walk, we - // don't want to go further to find children. - if (root == cur.?) return null; - - return try parser.nodeNextSibling(cur.?); - } -}; - -pub const WalkerNone = struct { - pub fn get_next(_: WalkerNone, _: *parser.Node, _: ?*parser.Node) !?*parser.Node { - return null; - } -}; - pub const HTMLCollectionIterator = struct { pub const mem_guarantied = true; diff --git a/src/dom/walker.zig b/src/dom/walker.zig new file mode 100644 index 00000000..205936cb --- /dev/null +++ b/src/dom/walker.zig @@ -0,0 +1,86 @@ +const std = @import("std"); + +const parser = @import("../netsurf.zig"); + +pub const Walker = union(enum) { + walkerDepthFirst: WalkerDepthFirst, + walkerChildren: WalkerChildren, + walkerNone: WalkerNone, + + pub fn get_next(self: Walker, root: *parser.Node, cur: ?*parser.Node) !?*parser.Node { + switch (self) { + inline else => |case| return case.get_next(root, cur), + } + } +}; + +// WalkerDepthFirst iterates over the DOM tree to return the next following +// node or null at the end. +// +// This implementation is a zig version of Netsurf code. +// http://source.netsurf-browser.org/libdom.git/tree/src/html/html_collection.c#n177 +// +// The iteration is a depth first as required by the specification. +// https://dom.spec.whatwg.org/#htmlcollection +// https://dom.spec.whatwg.org/#concept-tree-order +pub const WalkerDepthFirst = struct { + pub fn get_next(_: WalkerDepthFirst, root: *parser.Node, cur: ?*parser.Node) !?*parser.Node { + var n = cur orelse root; + + // TODO deinit next + if (try parser.nodeFirstChild(n)) |next| { + return next; + } + + // TODO deinit next + if (try parser.nodeNextSibling(n)) |next| { + return next; + } + + // TODO deinit parent + // Back to the parent of cur. + // If cur has no parent, then the iteration is over. + var parent = try parser.nodeParentNode(n) orelse return null; + + // TODO deinit lastchild + var lastchild = try parser.nodeLastChild(parent); + while (n != root and n == lastchild) { + n = parent; + + // TODO deinit parent + // Back to the prev's parent. + // If prev has no parent, then the loop must stop. + parent = try parser.nodeParentNode(n) orelse break; + + // TODO deinit lastchild + lastchild = try parser.nodeLastChild(parent); + } + + if (n == root) { + return null; + } + + return try parser.nodeNextSibling(n); + } +}; + +// WalkerChildren iterates over the root's children only. +pub const WalkerChildren = struct { + pub fn get_next(_: WalkerChildren, root: *parser.Node, cur: ?*parser.Node) !?*parser.Node { + // On walk start, we return the first root's child. + if (cur == null) return try parser.nodeFirstChild(root); + + // If cur is root, then return null. + // This is a special case, if the root is included in the walk, we + // don't want to go further to find children. + if (root == cur.?) return null; + + return try parser.nodeNextSibling(cur.?); + } +}; + +pub const WalkerNone = struct { + pub fn get_next(_: WalkerNone, _: *parser.Node, _: ?*parser.Node) !?*parser.Node { + return null; + } +}; diff --git a/src/html/document.zig b/src/html/document.zig index 96dc96cd..d463ab29 100644 --- a/src/html/document.zig +++ b/src/html/document.zig @@ -12,7 +12,7 @@ const NodeList = @import("../dom/nodelist.zig").NodeList; const HTMLElem = @import("elements.zig"); const collection = @import("../dom/html_collection.zig"); -const Walker = collection.WalkerDepthFirst; +const Walker = @import("../dom/walker.zig").WalkerDepthFirst; // WEB IDL https://html.spec.whatwg.org/#the-document-object pub const HTMLDocument = struct { diff --git a/src/html/html.zig b/src/html/html.zig index a77e2cea..90d96f2b 100644 --- a/src/html/html.zig +++ b/src/html/html.zig @@ -2,10 +2,12 @@ const generate = @import("../generate.zig"); const HTMLDocument = @import("document.zig").HTMLDocument; const HTMLElem = @import("elements.zig"); +const Window = @import("window.zig").Window; pub const Interfaces = generate.Tuple(.{ HTMLDocument, HTMLElem.HTMLElement, HTMLElem.HTMLMediaElement, HTMLElem.Interfaces, + Window, }); diff --git a/src/html/window.zig b/src/html/window.zig new file mode 100644 index 00000000..5c844a49 --- /dev/null +++ b/src/html/window.zig @@ -0,0 +1,47 @@ +const std = @import("std"); + +const parser = @import("../netsurf.zig"); + +const EventTarget = @import("../dom/event_target.zig").EventTarget; + +// https://dom.spec.whatwg.org/#interface-window-extensions +// https://html.spec.whatwg.org/multipage/nav-history-apis.html#window +pub const Window = struct { + pub const prototype = *EventTarget; + pub const mem_guarantied = true; + + document: ?*parser.Document = null, + target: []const u8, + + pub fn create(target: ?[]const u8) Window { + return Window{ + .target = target orelse "", + }; + } + + pub fn replaceDocument(self: *Window, doc: *parser.Document) void { + self.document = doc; + } + + pub fn get_window(self: *Window) *Window { + return self; + } + + pub fn get_self(self: *Window) *Window { + return self; + } + + pub fn get_parent(self: *Window) *Window { + return self; + } + + pub fn get_document(self: *Window) ?*parser.Document { + return self.document; + } + + pub fn get_name(self: *Window) []const u8 { + return self.target; + } + + // TODO we need to re-implement EventTarget interface. +}; diff --git a/src/main.zig b/src/main.zig index 9341072e..4469cddb 100644 --- a/src/main.zig +++ b/src/main.zig @@ -3,9 +3,9 @@ const std = @import("std"); const jsruntime = @import("jsruntime"); const parser = @import("netsurf.zig"); -const DOM = @import("dom.zig"); +const apiweb = @import("apiweb.zig"); -pub const Types = jsruntime.reflect(DOM.Interfaces); +pub const Types = jsruntime.reflect(apiweb.Interfaces); const socket_path = "/tmp/browsercore-server.sock"; diff --git a/src/main_get.zig b/src/main_get.zig new file mode 100644 index 00000000..bb84b0d8 --- /dev/null +++ b/src/main_get.zig @@ -0,0 +1,73 @@ +const std = @import("std"); +const Browser = @import("browser/browser.zig").Browser; + +const jsruntime = @import("jsruntime"); +const apiweb = @import("apiweb.zig"); +pub const Types = jsruntime.reflect(apiweb.Interfaces); + +pub const std_options = struct { + pub const log_level = .debug; +}; + +const usage = + \\usage: {s} [options] + \\ request the url with the browser + \\ + \\ -h, --help Print this help message and exit. + \\ --dump Dump document in stdout + \\ +; + +pub fn main() !void { + var gpa = std.heap.GeneralPurposeAllocator(.{}){}; + defer { + const check = gpa.deinit(); + if (check == .leak) { + std.log.warn("leaks detected\n", .{}); + } + } + const allocator = gpa.allocator(); + + var args = try std.process.argsWithAllocator(allocator); + defer args.deinit(); + + const execname = args.next().?; + var url: []const u8 = ""; + var dump: bool = false; + + while (args.next()) |arg| { + if (std.mem.eql(u8, "-h", arg) or std.mem.eql(u8, "--help", arg)) { + try std.io.getStdErr().writer().print(usage, .{execname}); + std.os.exit(0); + } + if (std.mem.eql(u8, "--dump", arg)) { + dump = true; + continue; + } + // allow only one url + if (url.len != 0) { + try std.io.getStdErr().writer().print(usage, .{execname}); + std.os.exit(1); + } + url = arg; + } + + if (url.len == 0) { + try std.io.getStdErr().writer().print(usage, .{execname}); + std.os.exit(1); + } + + const vm = jsruntime.VM.init(); + defer vm.deinit(); + + var browser = try Browser.init(allocator, vm); + defer browser.deinit(); + + var page = try browser.currentSession().createPage(); + defer page.end(); + try page.navigate(url); + + if (dump) { + try page.dump(std.io.getStdOut()); + } +} diff --git a/src/main_shell.zig b/src/main_shell.zig index 35d1fb39..920c77bf 100644 --- a/src/main_shell.zig +++ b/src/main_shell.zig @@ -3,11 +3,11 @@ const std = @import("std"); const jsruntime = @import("jsruntime"); const parser = @import("netsurf.zig"); -const DOM = @import("dom.zig"); +const apiweb = @import("apiweb.zig"); const html_test = @import("html_test.zig").html; -pub const Types = jsruntime.reflect(DOM.Interfaces); +pub const Types = jsruntime.reflect(apiweb.Interfaces); var doc: *parser.DocumentHTML = undefined; diff --git a/src/main_wpt.zig b/src/main_wpt.zig index e91f42c6..4d4dde57 100644 --- a/src/main_wpt.zig +++ b/src/main_wpt.zig @@ -6,7 +6,7 @@ const Suite = @import("wpt/testcase.zig").Suite; const FileLoader = @import("wpt/fileloader.zig").FileLoader; const wpt = @import("wpt/run.zig"); -const DOM = @import("dom.zig"); +const apiweb = @import("apiweb.zig"); const HTMLElem = @import("html/elements.zig"); const wpt_dir = "tests/wpt"; @@ -29,7 +29,7 @@ const Out = enum { text, }; -pub const Types = jsruntime.reflect(DOM.Interfaces); +pub const Types = jsruntime.reflect(apiweb.Interfaces); // TODO For now the WPT tests run is specific to WPT. // It manually load js framwork libs, and run the first script w/ js content in diff --git a/src/run_tests.zig b/src/run_tests.zig index 1ba0f8b3..3a8fc499 100644 --- a/src/run_tests.zig +++ b/src/run_tests.zig @@ -5,7 +5,7 @@ const jsruntime = @import("jsruntime"); const generate = @import("generate.zig"); const parser = @import("netsurf.zig"); -const DOM = @import("dom.zig"); +const apiweb = @import("apiweb.zig"); const documentTestExecFn = @import("dom/document.zig").testExecFn; const HTMLDocumentTestExecFn = @import("html/document.zig").testExecFn; @@ -21,7 +21,7 @@ const DOMTokenListExecFn = @import("dom/token_list.zig").testExecFn; const NodeListTestExecFn = @import("dom/nodelist.zig").testExecFn; const AttrTestExecFn = @import("dom/attribute.zig").testExecFn; -pub const Types = jsruntime.reflect(DOM.Interfaces); +pub const Types = jsruntime.reflect(apiweb.Interfaces); var doc: *parser.DocumentHTML = undefined; @@ -122,3 +122,12 @@ test "bug document html parsing #4" { doc = try parser.documentHTMLParse(file.reader(), "UTF-8"); parser.documentHTMLClose(doc) catch {}; } + +const dump = @import("browser/dump.zig"); +test "run browser tests" { + // const out = std.io.getStdOut(); + const out = try std.fs.openFileAbsolute("/dev/null", .{ .mode = .write_only }); + defer out.close(); + + try dump.HTMLFileTestFn(out); +}