Merge pull request #137 from lightpanda-io/window

start for loader and browser APi
This commit is contained in:
Pierre Tachoire
2024-01-17 18:33:41 +01:00
committed by GitHub
18 changed files with 1091 additions and 94 deletions

View File

@@ -115,6 +115,29 @@ pub fn build(b: *std.build.Builder) !void {
// step
const wpt_step = b.step("wpt", "WPT tests");
wpt_step.dependOn(&wpt_cmd.step);
// get
// -----
// compile and install
const get = b.addExecutable(.{
.name = "browsercore-get",
.root_source_file = .{ .path = "src/main_get.zig" },
.target = target,
.optimize = mode,
});
try common(get, options);
b.installArtifact(get);
// run
const get_cmd = b.addRunArtifact(get);
get_cmd.step.dependOn(b.getInstallStep());
if (b.args) |args| {
get_cmd.addArgs(args);
}
// step
const get_step = b.step("get", "request URL");
get_step.dependOn(&get_cmd.step);
}
fn common(

434
src/browser/browser.zig Normal file
View File

@@ -0,0 +1,434 @@
const std = @import("std");
const Types = @import("root").Types;
const parser = @import("../netsurf.zig");
const Loader = @import("loader.zig").Loader;
const Dump = @import("dump.zig");
const Mime = @import("mime.zig");
const jsruntime = @import("jsruntime");
const Loop = jsruntime.Loop;
const Env = jsruntime.Env;
const apiweb = @import("../apiweb.zig");
const Window = @import("../html/window.zig").Window;
const Walker = @import("../dom/walker.zig").WalkerDepthFirst;
const FetchResult = std.http.Client.FetchResult;
const log = std.log.scoped(.browser);
// Browser is an instance of the browser.
// You can create multiple browser instances.
// A browser contains only one session.
// TODO allow multiple sessions per browser.
pub const Browser = struct {
session: *Session,
pub fn init(alloc: std.mem.Allocator, vm: jsruntime.VM) !Browser {
// We want to ensure the caller initialised a VM, but the browser
// doesn't use it directly...
_ = vm;
return Browser{
.session = try Session.init(alloc, "about:blank"),
};
}
pub fn deinit(self: *Browser) void {
self.session.deinit();
}
pub fn currentSession(self: *Browser) *Session {
return self.session;
}
};
// Session is like a browser's tab.
// It owns the js env and the loader for all the pages of the session.
// You can create successively multiple pages for a session, but you must
// deinit a page before running another one.
pub const Session = struct {
// allocator used to init the arena.
alloc: std.mem.Allocator,
// The arena is used only to bound the js env init b/c it leaks memory.
// see https://github.com/lightpanda-io/jsruntime-lib/issues/181
//
// The arena is initialised with self.alloc allocator.
// all others Session deps use directly self.alloc and not the arena.
arena: std.heap.ArenaAllocator,
uri: []const u8,
// TODO handle proxy
loader: Loader,
env: Env = undefined,
loop: Loop,
window: Window,
jstypes: [Types.len]usize = undefined,
fn init(alloc: std.mem.Allocator, uri: []const u8) !*Session {
var self = try alloc.create(Session);
self.* = Session{
.uri = uri,
.alloc = alloc,
.arena = std.heap.ArenaAllocator.init(alloc),
.window = Window.create(null),
.loader = Loader.init(alloc),
.loop = try Loop.init(alloc),
};
self.env = try Env.init(self.arena.allocator(), &self.loop);
try self.env.load(&self.jstypes);
return self;
}
fn deinit(self: *Session) void {
self.env.deinit();
self.arena.deinit();
self.loader.deinit();
self.loop.deinit();
self.alloc.destroy(self);
}
pub fn createPage(self: *Session) !Page {
return Page.init(self.alloc, self);
}
};
// Page navigates to an url.
// You can navigates multiple urls with the same page, but you have to call
// end() to stop the previous navigation before starting a new one.
// The page handle all its memory in an arena allocator. The arena is reseted
// when end() is called.
pub const Page = struct {
arena: std.heap.ArenaAllocator,
session: *Session,
doc: ?*parser.Document = null,
// handle url
rawuri: ?[]const u8 = null,
uri: std.Uri = undefined,
raw_data: ?[]const u8 = null,
fn init(
alloc: std.mem.Allocator,
session: *Session,
) Page {
return Page{
.arena = std.heap.ArenaAllocator.init(alloc),
.session = session,
};
}
// reset js env and mem arena.
pub fn end(self: *Page) void {
self.session.env.stop();
// TODO unload document: https://html.spec.whatwg.org/#unloading-documents
_ = self.arena.reset(.free_all);
}
pub fn deinit(self: *Page) void {
self.arena.deinit();
}
// dump writes the page content into the given file.
pub fn dump(self: *Page, out: std.fs.File) !void {
// if no HTML document pointer available, dump the data content only.
if (self.doc == null) {
// no data loaded, nothing to do.
if (self.raw_data == null) return;
return try out.writeAll(self.raw_data.?);
}
// if the page has a pointer to a document, dumps the HTML.
try Dump.htmlFile(self.doc.?, out);
}
// spec reference: https://html.spec.whatwg.org/#document-lifecycle
pub fn navigate(self: *Page, uri: []const u8) !void {
const alloc = self.arena.allocator();
log.debug("starting GET {s}", .{uri});
// own the url
if (self.rawuri) |prev| alloc.free(prev);
self.rawuri = try alloc.dupe(u8, uri);
self.uri = std.Uri.parse(self.rawuri.?) catch try std.Uri.parseWithoutScheme(self.rawuri.?);
// TODO handle fragment in url.
// load the data
var resp = try self.session.loader.get(alloc, self.uri);
defer resp.deinit();
const req = resp.req;
log.info("GET {any} {d}", .{ self.uri, req.response.status });
// TODO handle redirection
if (req.response.status != .ok) return error.BadStatusCode;
// TODO handle charset
// https://html.spec.whatwg.org/#content-type
const ct = req.response.headers.getFirstValue("Content-Type") orelse {
// no content type in HTTP headers.
// TODO try to sniff mime type from the body.
log.info("no content-type HTTP header", .{});
return;
};
log.debug("header content-type: {s}", .{ct});
const mime = try Mime.parse(ct);
if (mime.eql(Mime.HTML)) {
try self.loadHTMLDoc(req.reader(), mime.charset orelse "utf-8");
} else {
log.info("non-HTML document: {s}", .{ct});
// save the body into the page.
self.raw_data = try req.reader().readAllAlloc(alloc, 16 * 1024 * 1024);
}
}
// https://html.spec.whatwg.org/#read-html
fn loadHTMLDoc(self: *Page, reader: anytype, charset: []const u8) !void {
const alloc = self.arena.allocator();
log.debug("parse html with charset {s}", .{charset});
const ccharset = try alloc.dupeZ(u8, charset);
defer alloc.free(ccharset);
const html_doc = try parser.documentHTMLParse(reader, ccharset);
const doc = parser.documentHTMLToDocument(html_doc);
// save a document's pointer in the page.
self.doc = doc;
// TODO set document.readyState to interactive
// https://html.spec.whatwg.org/#reporting-document-loading-status
// TODO inject the URL to the document including the fragment.
// TODO set the referrer to the document.
self.session.window.replaceDocument(doc);
// https://html.spec.whatwg.org/#read-html
// start JS env
// TODO load the js env concurrently with the HTML parsing.
log.debug("start js env", .{});
try self.session.env.start(alloc);
// add global objects
log.debug("setup global env", .{});
try self.session.env.addObject(self.session.window, "window");
try self.session.env.addObject(self.session.window, "self");
try self.session.env.addObject(html_doc, "document");
// browse the DOM tree to retrieve scripts
// TODO execute the synchronous scripts during the HTL parsing.
// TODO fetch the script resources concurrently but execute them in the
// declaration order for synchronous ones.
// sasync stores scripts which can be run asynchronously.
// for now they are just run after the non-async one in order to
// dispatch DOMContentLoaded the sooner as possible.
var sasync = std.ArrayList(*parser.Element).init(alloc);
defer sasync.deinit();
const root = parser.documentToNode(doc);
const walker = Walker{};
var next: ?*parser.Node = null;
while (true) {
next = try walker.get_next(root, next) orelse break;
// ignore non-elements nodes.
if (try parser.nodeType(next.?) != .element) {
continue;
}
const e = parser.nodeToElement(next.?);
const tag = try parser.elementHTMLGetTagType(@as(*parser.ElementHTML, @ptrCast(e)));
// ignore non-script tags
if (tag != .script) continue;
// ignore non-js script.
// > type
// > Attribute is not set (default), an empty string, or a JavaScript MIME
// > type indicates that the script is a "classic script", containing
// > JavaScript code.
// https://developer.mozilla.org/en-US/docs/Web/HTML/Element/script#attribute_is_not_set_default_an_empty_string_or_a_javascript_mime_type
const stype = try parser.elementGetAttribute(e, "type");
if (!isJS(stype)) {
continue;
}
// Ignore the defer attribute b/c we analyze all script
// after the document has been parsed.
// https://developer.mozilla.org/en-US/docs/Web/HTML/Element/script#defer
// TODO use fetchpriority
// https://developer.mozilla.org/en-US/docs/Web/HTML/Element/script#fetchpriority
// > async
// > For classic scripts, if the async attribute is present,
// > then the classic script will be fetched in parallel to
// > parsing and evaluated as soon as it is available.
// https://developer.mozilla.org/en-US/docs/Web/HTML/Element/script#async
if (try parser.elementGetAttribute(e, "async") != null) {
try sasync.append(e);
continue;
}
// TODO handle for attribute
// TODO handle event attribute
// TODO defer
// > This Boolean attribute is set to indicate to a browser
// > that the script is meant to be executed after the
// > document has been parsed, but before firing
// > DOMContentLoaded.
// https://developer.mozilla.org/en-US/docs/Web/HTML/Element/script#defer
// defer allow us to load a script w/o blocking the rest of
// evaluations.
// > Scripts without async, defer or type="module"
// > attributes, as well as inline scripts without the
// > type="module" attribute, are fetched and executed
// > immediately before the browser continues to parse the
// > page.
// https://developer.mozilla.org/en-US/docs/Web/HTML/Element/script#notes
self.evalScript(e) catch |err| log.warn("evaljs: {any}", .{err});
}
// TODO wait for deferred scripts
// TODO dispatch DOMContentLoaded before the transition to "complete",
// at the point where all subresources apart from async script elements
// have loaded.
// https://html.spec.whatwg.org/#reporting-document-loading-status
// eval async scripts.
for (sasync.items) |e| {
self.evalScript(e) catch |err| log.warn("evaljs: {any}", .{err});
}
// TODO wait for async scripts
// TODO set document.readyState to complete
}
// evalScript evaluates the src in priority.
// if no src is present, we evaluate the text source.
// https://html.spec.whatwg.org/multipage/scripting.html#script-processing-model
fn evalScript(self: *Page, e: *parser.Element) !void {
const alloc = self.arena.allocator();
// https://html.spec.whatwg.org/multipage/webappapis.html#fetch-a-classic-script
const opt_src = try parser.elementGetAttribute(e, "src");
if (opt_src) |src| {
log.debug("starting GET {s}", .{src});
self.fetchScript(src) catch |err| {
switch (err) {
FetchError.BadStatusCode => return err,
// TODO If el's result is null, then fire an event named error at
// el, and return.
FetchError.NoBody => return,
FetchError.JsErr => {}, // nothing to do here.
else => return err,
}
};
// TODO If el's from an external file is true, then fire an event
// named load at el.
return;
}
const opt_text = try parser.nodeTextContent(parser.elementToNode(e));
if (opt_text) |text| {
// TODO handle charset attribute
var res = jsruntime.JSResult{};
try self.session.env.run(alloc, text, "", &res, null);
defer res.deinit(alloc);
if (res.success) {
log.debug("eval inline: {s}", .{res.result});
} else {
log.info("eval inline: {s}", .{res.result});
}
return;
}
// nothing has been loaded.
// TODO If el's result is null, then fire an event named error at
// el, and return.
}
const FetchError = error{
BadStatusCode,
NoBody,
JsErr,
};
// fetchScript senf a GET request to the src and execute the script
// received.
fn fetchScript(self: *Page, src: []const u8) !void {
const alloc = self.arena.allocator();
log.debug("starting fetch script {s}", .{src});
const u = std.Uri.parse(src) catch try std.Uri.parseWithoutScheme(src);
const ru = try std.Uri.resolve(self.uri, u, false, alloc);
var fetchres = try self.session.loader.fetch(alloc, ru);
defer fetchres.deinit();
log.info("fech script {any}: {d}", .{ ru, fetchres.status });
if (fetchres.status != .ok) return FetchError.BadStatusCode;
// TODO check content-type
// check no body
if (fetchres.body == null) return FetchError.NoBody;
var res = jsruntime.JSResult{};
try self.session.env.run(alloc, fetchres.body.?, src, &res, null);
defer res.deinit(alloc);
if (res.success) {
log.debug("eval remote {s}: {s}", .{ src, res.result });
} else {
log.info("eval remote {s}: {s}", .{ src, res.result });
return FetchError.JsErr;
}
}
// > type
// > Attribute is not set (default), an empty string, or a JavaScript MIME
// > type indicates that the script is a "classic script", containing
// > JavaScript code.
// https://developer.mozilla.org/en-US/docs/Web/HTML/Element/script#attribute_is_not_set_default_an_empty_string_or_a_javascript_mime_type
fn isJS(stype: ?[]const u8) bool {
if (stype == null or stype.?.len == 0) return true;
if (std.mem.eql(u8, stype.?, "application/javascript")) return true;
if (!std.mem.eql(u8, stype.?, "module")) return true;
return false;
}
};

96
src/browser/dump.zig Normal file
View File

@@ -0,0 +1,96 @@
const std = @import("std");
const File = std.fs.File;
const parser = @import("../netsurf.zig");
const Walker = @import("../dom/walker.zig").WalkerChildren;
pub fn htmlFile(doc: *parser.Document, out: File) !void {
try out.writeAll("<!DOCTYPE html>\n");
try nodeFile(parser.documentToNode(doc), out);
try out.writeAll("\n");
}
fn nodeFile(root: *parser.Node, out: File) !void {
const walker = Walker{};
var next: ?*parser.Node = null;
while (true) {
next = try walker.get_next(root, next) orelse break;
switch (try parser.nodeType(next.?)) {
.element => {
// open the tag
const tag = try parser.nodeLocalName(next.?);
try out.writeAll("<");
try out.writeAll(tag);
// write the attributes
const map = try parser.nodeGetAttributes(next.?);
const ln = try parser.namedNodeMapGetLength(map);
var i: u32 = 0;
while (i < ln) {
const attr = try parser.namedNodeMapItem(map, i) orelse break;
try out.writeAll(" ");
try out.writeAll(try parser.attributeGetName(attr));
try out.writeAll("=\"");
try out.writeAll(try parser.attributeGetValue(attr) orelse "");
try out.writeAll("\"");
i += 1;
}
try out.writeAll(">");
// write the children
// TODO avoid recursion
try nodeFile(next.?, out);
// close the tag
try out.writeAll("</");
try out.writeAll(tag);
try out.writeAll(">");
},
.text => {
const v = try parser.nodeValue(next.?) orelse continue;
try out.writeAll(v);
},
.cdata_section => {
const v = try parser.nodeValue(next.?) orelse continue;
try out.writeAll("<![CDATA[");
try out.writeAll(v);
try out.writeAll("]]>");
},
.comment => {
const v = try parser.nodeValue(next.?) orelse continue;
try out.writeAll("<!--");
try out.writeAll(v);
try out.writeAll("-->");
},
// TODO handle processing instruction dump
.processing_instruction => continue,
// document fragment is outside of the main document DOM, so we
// don't output it.
.document_fragment => continue,
// document will never be called, but required for completeness.
.document => continue,
// done globally instead, but required for completeness.
.document_type => continue,
// deprecated
.attribute => continue,
.entity_reference => continue,
.entity => continue,
.notation => continue,
}
}
}
// HTMLFileTestFn is run by run_tests.zig
pub fn HTMLFileTestFn(out: File) !void {
const file = try std.fs.cwd().openFile("test.html", .{});
defer file.close();
const doc_html = try parser.documentHTMLParse(file.reader(), "UTF-8");
// ignore close error
defer parser.documentHTMLClose(doc_html) catch {};
const doc = parser.documentHTMLToDocument(doc_html);
try htmlFile(doc, out);
}

86
src/browser/loader.zig Normal file
View File

@@ -0,0 +1,86 @@
const std = @import("std");
const user_agent = "Lightpanda.io/1.0";
pub const Loader = struct {
client: std.http.Client,
pub const Response = struct {
alloc: std.mem.Allocator,
req: *std.http.Client.Request,
pub fn deinit(self: *Response) void {
self.req.deinit();
self.alloc.destroy(self.req);
}
};
pub fn init(alloc: std.mem.Allocator) Loader {
return Loader{
.client = std.http.Client{
.allocator = alloc,
},
};
}
pub fn deinit(self: *Loader) void {
self.client.deinit();
}
// the caller must deinit the FetchResult.
pub fn fetch(self: *Loader, alloc: std.mem.Allocator, uri: std.Uri) !std.http.Client.FetchResult {
var headers = try std.http.Headers.initList(alloc, &[_]std.http.Field{
.{ .name = "User-Agent", .value = user_agent },
.{ .name = "Accept", .value = "*/*" },
.{ .name = "Accept-Language", .value = "en-US,en;q=0.5" },
});
defer headers.deinit();
return try self.client.fetch(alloc, .{
.location = .{ .uri = uri },
.headers = headers,
.payload = .none,
});
}
// see
// https://ziglang.org/documentation/master/std/#A;std:http.Client.fetch
// for reference.
// The caller is responsible for calling `deinit()` on the `Response`.
pub fn get(self: *Loader, alloc: std.mem.Allocator, uri: std.Uri) !Response {
var headers = try std.http.Headers.initList(alloc, &[_]std.http.Field{
.{ .name = "User-Agent", .value = user_agent },
.{ .name = "Accept", .value = "*/*" },
.{ .name = "Accept-Language", .value = "en-US,en;q=0.5" },
});
defer headers.deinit();
var resp = Response{
.alloc = alloc,
.req = try alloc.create(std.http.Client.Request),
};
errdefer alloc.destroy(resp.req);
resp.req.* = try self.client.open(.GET, uri, headers, .{
.handle_redirects = true, // TODO handle redirects manually
});
errdefer resp.req.deinit();
try resp.req.send(.{});
try resp.req.finish();
try resp.req.wait();
return resp;
}
};
test "basic url fetch" {
const alloc = std.testing.allocator;
var loader = Loader.init(alloc);
defer loader.deinit();
var result = try loader.fetch(alloc, "https://en.wikipedia.org/wiki/Main_Page");
defer result.deinit();
try std.testing.expect(result.status == std.http.Status.ok);
}

219
src/browser/mime.zig Normal file
View File

@@ -0,0 +1,219 @@
const std = @import("std");
const testing = std.testing;
const Self = @This();
const MimeError = error{
Empty,
TooBig,
Invalid,
InvalidChar,
};
mtype: []const u8,
msubtype: []const u8,
params: []const u8 = "",
charset: ?[]const u8 = null,
boundary: ?[]const u8 = null,
pub const HTML = Self{ .mtype = "text", .msubtype = "html" };
pub const Javascript = Self{ .mtype = "application", .msubtype = "javascript" };
const reader = struct {
s: []const u8,
i: usize = 0,
fn until(self: *reader, c: u8) []const u8 {
const ln = self.s.len;
const start = self.i;
while (self.i < ln) {
if (c == self.s[self.i]) return self.s[start..self.i];
self.i += 1;
}
return self.s[start..self.i];
}
fn tail(self: *reader) []const u8 {
if (self.i > self.s.len) return "";
defer self.i = self.s.len;
return self.s[self.i..];
}
fn skip(self: *reader) bool {
if (self.i >= self.s.len) return false;
self.i += 1;
return true;
}
};
test "reader.skip" {
var r = reader{ .s = "foo" };
try testing.expect(r.skip());
try testing.expect(r.skip());
try testing.expect(r.skip());
try testing.expect(!r.skip());
try testing.expect(!r.skip());
}
test "reader.tail" {
var r = reader{ .s = "foo" };
try testing.expectEqualStrings("foo", r.tail());
try testing.expectEqualStrings("", r.tail());
}
test "reader.until" {
var r = reader{ .s = "foo.bar.baz" };
try testing.expectEqualStrings("foo", r.until('.'));
_ = r.skip();
try testing.expectEqualStrings("bar", r.until('.'));
_ = r.skip();
try testing.expectEqualStrings("baz", r.until('.'));
r = reader{ .s = "foo" };
try testing.expectEqualStrings("foo", r.until('.'));
r = reader{ .s = "" };
try testing.expectEqualStrings("", r.until('.'));
}
fn trim(s: []const u8) []const u8 {
const ln = s.len;
if (ln == 0) {
return "";
}
var start: usize = 0;
while (start < ln) {
if (!std.ascii.isWhitespace(s[start])) break;
start += 1;
}
var end: usize = ln;
while (end > 0) {
if (!std.ascii.isWhitespace(s[end - 1])) break;
end -= 1;
}
return s[start..end];
}
test "trim" {
try testing.expectEqualStrings("", trim(""));
try testing.expectEqualStrings("foo", trim("foo"));
try testing.expectEqualStrings("foo", trim(" \n\tfoo"));
try testing.expectEqualStrings("foo", trim("foo \n\t"));
}
// https://mimesniff.spec.whatwg.org/#http-token-code-point
fn isHTTPCodePoint(c: u8) bool {
return switch (c) {
'!', '#', '$', '%', '&', '\'', '*', '+', '-', '.', '^' => return true,
'_', '`', '|', '~' => return true,
else => std.ascii.isAlphanumeric(c),
};
}
fn valid(s: []const u8) bool {
const ln = s.len;
var i: usize = 0;
while (i < ln) {
if (!isHTTPCodePoint(s[i])) return false;
i += 1;
}
return true;
}
// https://mimesniff.spec.whatwg.org/#parsing-a-mime-type
pub fn parse(s: []const u8) Self.MimeError!Self {
const ln = s.len;
if (ln == 0) return MimeError.Empty;
// limit input size
if (ln > 255) return MimeError.TooBig;
var res = Self{ .mtype = "", .msubtype = "" };
var r = reader{ .s = s };
res.mtype = trim(r.until('/'));
if (res.mtype.len == 0) return MimeError.Invalid;
if (!valid(res.mtype)) return MimeError.InvalidChar;
if (!r.skip()) return MimeError.Invalid;
res.msubtype = trim(r.until(';'));
if (res.msubtype.len == 0) return MimeError.Invalid;
if (!valid(res.msubtype)) return MimeError.InvalidChar;
if (!r.skip()) return res;
res.params = trim(r.tail());
if (res.params.len == 0) return MimeError.Invalid;
// parse well known parameters.
// don't check invalid parameter format.
var rp = reader{ .s = res.params };
while (true) {
const name = trim(rp.until('='));
if (!rp.skip()) return res;
const value = trim(rp.until(';'));
if (std.ascii.eqlIgnoreCase(name, "charset")) {
res.charset = value;
}
if (std.ascii.eqlIgnoreCase(name, "boundary")) {
res.boundary = value;
}
if (!rp.skip()) return res;
}
return res;
}
test "parse valid" {
for ([_][]const u8{
"text/html",
" \ttext/html",
"text \t/html",
"text/ \thtml",
"text/html \t",
}) |tc| {
const m = try Self.parse(tc);
try testing.expectEqualStrings("text", m.mtype);
try testing.expectEqualStrings("html", m.msubtype);
}
const m2 = try Self.parse("text/javascript1.5");
try testing.expectEqualStrings("text", m2.mtype);
try testing.expectEqualStrings("javascript1.5", m2.msubtype);
const m3 = try Self.parse("text/html; charset=utf-8");
try testing.expectEqualStrings("text", m3.mtype);
try testing.expectEqualStrings("html", m3.msubtype);
try testing.expectEqualStrings("charset=utf-8", m3.params);
try testing.expectEqualStrings("utf-8", m3.charset.?);
const m4 = try Self.parse("text/html; boundary=----");
try testing.expectEqualStrings("text", m4.mtype);
try testing.expectEqualStrings("html", m4.msubtype);
try testing.expectEqualStrings("boundary=----", m4.params);
try testing.expectEqualStrings("----", m4.boundary.?);
}
test "parse invalid" {
for ([_][]const u8{
"",
"te xt/html;",
"te@xt/html;",
"text/ht@ml;",
"text/html;",
"/text/html",
"/html",
}) |tc| {
_ = Self.parse(tc) catch continue;
try testing.expect(false);
}
}
// Compare type and subtype.
pub fn eql(self: Self, b: Self) bool {
if (!std.mem.eql(u8, self.mtype, b.mtype)) return false;
return std.mem.eql(u8, self.msubtype, b.msubtype);
}

View File

@@ -10,7 +10,7 @@ const Node = @import("node.zig").Node;
const NodeList = @import("nodelist.zig").NodeList;
const NodeUnion = @import("node.zig").Union;
const Walker = @import("html_collection.zig").WalkerDepthFirst;
const Walker = @import("walker.zig").WalkerDepthFirst;
const collection = @import("html_collection.zig");
const Element = @import("element.zig").Element;

View File

@@ -9,7 +9,7 @@ const checkCases = jsruntime.test_utils.checkCases;
const collection = @import("html_collection.zig");
const Node = @import("node.zig").Node;
const Walker = @import("html_collection.zig").WalkerDepthFirst;
const Walker = @import("walker.zig").WalkerDepthFirst;
const NodeList = @import("nodelist.zig").NodeList;
const HTMLElem = @import("../html/elements.zig");
pub const Union = @import("../html/elements.zig").Union;

View File

@@ -11,6 +11,11 @@ const utils = @import("utils.z");
const Element = @import("element.zig").Element;
const Union = @import("element.zig").Union;
const Walker = @import("walker.zig").Walker;
const WalkerDepthFirst = @import("walker.zig").WalkerDepthFirst;
const WalkerChildren = @import("walker.zig").WalkerChildren;
const WalkerNone = @import("walker.zig").WalkerNone;
const Matcher = union(enum) {
matchByName: MatchByName,
matchByTagName: MatchByTagName,
@@ -255,89 +260,6 @@ pub fn HTMLCollectionByAnchors(
};
}
const Walker = union(enum) {
walkerDepthFirst: WalkerDepthFirst,
walkerChildren: WalkerChildren,
walkerNone: WalkerNone,
pub fn get_next(self: Walker, root: *parser.Node, cur: ?*parser.Node) !?*parser.Node {
switch (self) {
inline else => |case| return case.get_next(root, cur),
}
}
};
// WalkerDepthFirst iterates over the DOM tree to return the next following
// node or null at the end.
//
// This implementation is a zig version of Netsurf code.
// http://source.netsurf-browser.org/libdom.git/tree/src/html/html_collection.c#n177
//
// The iteration is a depth first as required by the specification.
// https://dom.spec.whatwg.org/#htmlcollection
// https://dom.spec.whatwg.org/#concept-tree-order
pub const WalkerDepthFirst = struct {
pub fn get_next(_: WalkerDepthFirst, root: *parser.Node, cur: ?*parser.Node) !?*parser.Node {
var n = cur orelse root;
// TODO deinit next
if (try parser.nodeFirstChild(n)) |next| {
return next;
}
// TODO deinit next
if (try parser.nodeNextSibling(n)) |next| {
return next;
}
// TODO deinit parent
// Back to the parent of cur.
// If cur has no parent, then the iteration is over.
var parent = try parser.nodeParentNode(n) orelse return null;
// TODO deinit lastchild
var lastchild = try parser.nodeLastChild(parent);
while (n != root and n == lastchild) {
n = parent;
// TODO deinit parent
// Back to the prev's parent.
// If prev has no parent, then the loop must stop.
parent = try parser.nodeParentNode(n) orelse break;
// TODO deinit lastchild
lastchild = try parser.nodeLastChild(parent);
}
if (n == root) {
return null;
}
return try parser.nodeNextSibling(n);
}
};
// WalkerChildren iterates over the root's children only.
pub const WalkerChildren = struct {
pub fn get_next(_: WalkerChildren, root: *parser.Node, cur: ?*parser.Node) !?*parser.Node {
// On walk start, we return the first root's child.
if (cur == null) return try parser.nodeFirstChild(root);
// If cur is root, then return null.
// This is a special case, if the root is included in the walk, we
// don't want to go further to find children.
if (root == cur.?) return null;
return try parser.nodeNextSibling(cur.?);
}
};
pub const WalkerNone = struct {
pub fn get_next(_: WalkerNone, _: *parser.Node, _: ?*parser.Node) !?*parser.Node {
return null;
}
};
pub const HTMLCollectionIterator = struct {
pub const mem_guarantied = true;

86
src/dom/walker.zig Normal file
View File

@@ -0,0 +1,86 @@
const std = @import("std");
const parser = @import("../netsurf.zig");
pub const Walker = union(enum) {
walkerDepthFirst: WalkerDepthFirst,
walkerChildren: WalkerChildren,
walkerNone: WalkerNone,
pub fn get_next(self: Walker, root: *parser.Node, cur: ?*parser.Node) !?*parser.Node {
switch (self) {
inline else => |case| return case.get_next(root, cur),
}
}
};
// WalkerDepthFirst iterates over the DOM tree to return the next following
// node or null at the end.
//
// This implementation is a zig version of Netsurf code.
// http://source.netsurf-browser.org/libdom.git/tree/src/html/html_collection.c#n177
//
// The iteration is a depth first as required by the specification.
// https://dom.spec.whatwg.org/#htmlcollection
// https://dom.spec.whatwg.org/#concept-tree-order
pub const WalkerDepthFirst = struct {
pub fn get_next(_: WalkerDepthFirst, root: *parser.Node, cur: ?*parser.Node) !?*parser.Node {
var n = cur orelse root;
// TODO deinit next
if (try parser.nodeFirstChild(n)) |next| {
return next;
}
// TODO deinit next
if (try parser.nodeNextSibling(n)) |next| {
return next;
}
// TODO deinit parent
// Back to the parent of cur.
// If cur has no parent, then the iteration is over.
var parent = try parser.nodeParentNode(n) orelse return null;
// TODO deinit lastchild
var lastchild = try parser.nodeLastChild(parent);
while (n != root and n == lastchild) {
n = parent;
// TODO deinit parent
// Back to the prev's parent.
// If prev has no parent, then the loop must stop.
parent = try parser.nodeParentNode(n) orelse break;
// TODO deinit lastchild
lastchild = try parser.nodeLastChild(parent);
}
if (n == root) {
return null;
}
return try parser.nodeNextSibling(n);
}
};
// WalkerChildren iterates over the root's children only.
pub const WalkerChildren = struct {
pub fn get_next(_: WalkerChildren, root: *parser.Node, cur: ?*parser.Node) !?*parser.Node {
// On walk start, we return the first root's child.
if (cur == null) return try parser.nodeFirstChild(root);
// If cur is root, then return null.
// This is a special case, if the root is included in the walk, we
// don't want to go further to find children.
if (root == cur.?) return null;
return try parser.nodeNextSibling(cur.?);
}
};
pub const WalkerNone = struct {
pub fn get_next(_: WalkerNone, _: *parser.Node, _: ?*parser.Node) !?*parser.Node {
return null;
}
};

View File

@@ -12,7 +12,7 @@ const NodeList = @import("../dom/nodelist.zig").NodeList;
const HTMLElem = @import("elements.zig");
const collection = @import("../dom/html_collection.zig");
const Walker = collection.WalkerDepthFirst;
const Walker = @import("../dom/walker.zig").WalkerDepthFirst;
// WEB IDL https://html.spec.whatwg.org/#the-document-object
pub const HTMLDocument = struct {

View File

@@ -2,10 +2,12 @@ const generate = @import("../generate.zig");
const HTMLDocument = @import("document.zig").HTMLDocument;
const HTMLElem = @import("elements.zig");
const Window = @import("window.zig").Window;
pub const Interfaces = generate.Tuple(.{
HTMLDocument,
HTMLElem.HTMLElement,
HTMLElem.HTMLMediaElement,
HTMLElem.Interfaces,
Window,
});

47
src/html/window.zig Normal file
View File

@@ -0,0 +1,47 @@
const std = @import("std");
const parser = @import("../netsurf.zig");
const EventTarget = @import("../dom/event_target.zig").EventTarget;
// https://dom.spec.whatwg.org/#interface-window-extensions
// https://html.spec.whatwg.org/multipage/nav-history-apis.html#window
pub const Window = struct {
pub const prototype = *EventTarget;
pub const mem_guarantied = true;
document: ?*parser.Document = null,
target: []const u8,
pub fn create(target: ?[]const u8) Window {
return Window{
.target = target orelse "",
};
}
pub fn replaceDocument(self: *Window, doc: *parser.Document) void {
self.document = doc;
}
pub fn get_window(self: *Window) *Window {
return self;
}
pub fn get_self(self: *Window) *Window {
return self;
}
pub fn get_parent(self: *Window) *Window {
return self;
}
pub fn get_document(self: *Window) ?*parser.Document {
return self.document;
}
pub fn get_name(self: *Window) []const u8 {
return self.target;
}
// TODO we need to re-implement EventTarget interface.
};

View File

@@ -3,9 +3,9 @@ const std = @import("std");
const jsruntime = @import("jsruntime");
const parser = @import("netsurf.zig");
const DOM = @import("dom.zig");
const apiweb = @import("apiweb.zig");
pub const Types = jsruntime.reflect(DOM.Interfaces);
pub const Types = jsruntime.reflect(apiweb.Interfaces);
const socket_path = "/tmp/browsercore-server.sock";

73
src/main_get.zig Normal file
View File

@@ -0,0 +1,73 @@
const std = @import("std");
const Browser = @import("browser/browser.zig").Browser;
const jsruntime = @import("jsruntime");
const apiweb = @import("apiweb.zig");
pub const Types = jsruntime.reflect(apiweb.Interfaces);
pub const std_options = struct {
pub const log_level = .debug;
};
const usage =
\\usage: {s} [options] <url>
\\ request the url with the browser
\\
\\ -h, --help Print this help message and exit.
\\ --dump Dump document in stdout
\\
;
pub fn main() !void {
var gpa = std.heap.GeneralPurposeAllocator(.{}){};
defer {
const check = gpa.deinit();
if (check == .leak) {
std.log.warn("leaks detected\n", .{});
}
}
const allocator = gpa.allocator();
var args = try std.process.argsWithAllocator(allocator);
defer args.deinit();
const execname = args.next().?;
var url: []const u8 = "";
var dump: bool = false;
while (args.next()) |arg| {
if (std.mem.eql(u8, "-h", arg) or std.mem.eql(u8, "--help", arg)) {
try std.io.getStdErr().writer().print(usage, .{execname});
std.os.exit(0);
}
if (std.mem.eql(u8, "--dump", arg)) {
dump = true;
continue;
}
// allow only one url
if (url.len != 0) {
try std.io.getStdErr().writer().print(usage, .{execname});
std.os.exit(1);
}
url = arg;
}
if (url.len == 0) {
try std.io.getStdErr().writer().print(usage, .{execname});
std.os.exit(1);
}
const vm = jsruntime.VM.init();
defer vm.deinit();
var browser = try Browser.init(allocator, vm);
defer browser.deinit();
var page = try browser.currentSession().createPage();
defer page.end();
try page.navigate(url);
if (dump) {
try page.dump(std.io.getStdOut());
}
}

View File

@@ -3,11 +3,11 @@ const std = @import("std");
const jsruntime = @import("jsruntime");
const parser = @import("netsurf.zig");
const DOM = @import("dom.zig");
const apiweb = @import("apiweb.zig");
const html_test = @import("html_test.zig").html;
pub const Types = jsruntime.reflect(DOM.Interfaces);
pub const Types = jsruntime.reflect(apiweb.Interfaces);
var doc: *parser.DocumentHTML = undefined;

View File

@@ -6,7 +6,7 @@ const Suite = @import("wpt/testcase.zig").Suite;
const FileLoader = @import("wpt/fileloader.zig").FileLoader;
const wpt = @import("wpt/run.zig");
const DOM = @import("dom.zig");
const apiweb = @import("apiweb.zig");
const HTMLElem = @import("html/elements.zig");
const wpt_dir = "tests/wpt";
@@ -29,7 +29,7 @@ const Out = enum {
text,
};
pub const Types = jsruntime.reflect(DOM.Interfaces);
pub const Types = jsruntime.reflect(apiweb.Interfaces);
// TODO For now the WPT tests run is specific to WPT.
// It manually load js framwork libs, and run the first script w/ js content in

View File

@@ -5,7 +5,7 @@ const jsruntime = @import("jsruntime");
const generate = @import("generate.zig");
const parser = @import("netsurf.zig");
const DOM = @import("dom.zig");
const apiweb = @import("apiweb.zig");
const documentTestExecFn = @import("dom/document.zig").testExecFn;
const HTMLDocumentTestExecFn = @import("html/document.zig").testExecFn;
@@ -21,7 +21,7 @@ const DOMTokenListExecFn = @import("dom/token_list.zig").testExecFn;
const NodeListTestExecFn = @import("dom/nodelist.zig").testExecFn;
const AttrTestExecFn = @import("dom/attribute.zig").testExecFn;
pub const Types = jsruntime.reflect(DOM.Interfaces);
pub const Types = jsruntime.reflect(apiweb.Interfaces);
var doc: *parser.DocumentHTML = undefined;
@@ -122,3 +122,12 @@ test "bug document html parsing #4" {
doc = try parser.documentHTMLParse(file.reader(), "UTF-8");
parser.documentHTMLClose(doc) catch {};
}
const dump = @import("browser/dump.zig");
test "run browser tests" {
// const out = std.io.getStdOut();
const out = try std.fs.openFileAbsolute("/dev/null", .{ .mode = .write_only });
defer out.close();
try dump.HTMLFileTestFn(out);
}