diff --git a/src/browser/browser.zig b/src/browser/browser.zig index 06757d51..8393081e 100644 --- a/src/browser/browser.zig +++ b/src/browser/browser.zig @@ -17,33 +17,16 @@ // along with this program. If not, see . const std = @import("std"); -const builtin = @import("builtin"); const Allocator = std.mem.Allocator; const ArenaAllocator = std.heap.ArenaAllocator; -const Dump = @import("dump.zig"); -const Mime = @import("mime.zig").Mime; -const DataURI = @import("datauri.zig").DataURI; -const parser = @import("netsurf.zig"); - -const Window = @import("html/window.zig").Window; -const Walker = @import("dom/walker.zig").WalkerDepthFirst; - const Env = @import("env.zig").Env; const App = @import("../app.zig").App; -const Loop = @import("../runtime/loop.zig").Loop; - -const URL = @import("../url.zig").URL; - -const http = @import("../http/client.zig"); -const storage = @import("storage/storage.zig"); -const SessionState = @import("env.zig").SessionState; +const Session = @import("session.zig").Session; const Notification = @import("../notification.zig").Notification; -const polyfill = @import("polyfill/polyfill.zig"); - -const log = std.log.scoped(.browser); +const http = @import("../http/client.zig"); // Browser is an instance of the browser. // You can create multiple browser instances. @@ -107,816 +90,6 @@ pub const Browser = struct { } }; -// Session is like a browser's tab. -// It owns the js env and the loader for all the pages of the session. -// You can create successively multiple pages for a session, but you must -// deinit a page before running another one. -pub const Session = struct { - browser: *Browser, - - // Used to create our Inspector and in the BrowserContext. - arena: ArenaAllocator, - - executor: Env.Executor, - storage_shed: storage.Shed, - cookie_jar: storage.CookieJar, - - page: ?Page = null, - - fn init(self: *Session, browser: *Browser) !void { - var executor = try browser.env.newExecutor(); - errdefer executor.deinit(); - - const allocator = browser.app.allocator; - self.* = .{ - .browser = browser, - .executor = executor, - .arena = ArenaAllocator.init(allocator), - .storage_shed = storage.Shed.init(allocator), - .cookie_jar = storage.CookieJar.init(allocator), - }; - } - - fn deinit(self: *Session) void { - if (self.page != null) { - self.removePage(); - } - self.arena.deinit(); - self.cookie_jar.deinit(); - self.storage_shed.deinit(); - self.executor.deinit(); - } - - // NOTE: the caller is not the owner of the returned value, - // the pointer on Page is just returned as a convenience - pub fn createPage(self: *Session) !*Page { - std.debug.assert(self.page == null); - - // Start netsurf memory arena. - // We need to init this early as JS event handlers may be registered through Runtime.evaluate before the first html doc is loaded - try parser.init(); - - const page_arena = &self.browser.page_arena; - _ = page_arena.reset(.{ .retain_with_limit = 1 * 1024 * 1024 }); - - self.page = @as(Page, undefined); - const page = &self.page.?; - try Page.init(page, page_arena.allocator(), self); - - // start JS env - log.debug("start new js scope", .{}); - // Inform CDP the main page has been created such that additional context for other Worlds can be created as well - self.browser.notification.dispatch(.page_created, page); - - return page; - } - - pub fn removePage(self: *Session) void { - // Inform CDP the page is going to be removed, allowing other worlds to remove themselves before the main one - self.browser.notification.dispatch(.page_remove, .{}); - - std.debug.assert(self.page != null); - // Reset all existing callbacks. - self.browser.app.loop.reset(); - self.executor.endScope(); - self.page = null; - - // clear netsurf memory arena. - parser.deinit(); - } - - pub fn currentPage(self: *Session) ?*Page { - return &(self.page orelse return null); - } - - fn pageNavigate(self: *Session, url_string: []const u8) !void { - // currently, this is only called from the page, so let's hope - // it isn't null! - std.debug.assert(self.page != null); - - // can't use the page arena, because we're about to reset it - // and don't want to use the session's arena, because that'll start to - // look like a leak if we navigate from page to page a lot. - var buf: [2048]u8 = undefined; - var fba = std.heap.FixedBufferAllocator.init(&buf); - const url = try self.page.?.url.resolve(fba.allocator(), url_string); - - self.removePage(); - var page = try self.createPage(); - return page.navigate(url, .{ - .reason = .anchor, - }); - } -}; - -// Page navigates to an url. -// You can navigates multiple urls with the same page, but you have to call -// end() to stop the previous navigation before starting a new one. -// The page handle all its memory in an arena allocator. The arena is reseted -// when end() is called. -pub const Page = struct { - session: *Session, - - // an arena with a lifetime for the entire duration of the page - arena: Allocator, - - // Gets injected into any WebAPI method that needs it - state: SessionState, - - // Serves are the root object of our JavaScript environment - window: Window, - - doc: ?*parser.Document, - - // The URL of the page - url: URL, - - raw_data: ?[]const u8, - - renderer: FlatRenderer, - - microtask_node: Loop.CallbackNode, - - window_clicked_event_node: parser.EventNode, - - scope: *Env.Scope, - - // List of modules currently fetched/loaded. - module_map: std.StringHashMapUnmanaged([]const u8), - - // current_script is the script currently evaluated by the page. - // current_script could by fetch module to resolve module's url to fetch. - current_script: ?*const Script = null, - - fn init(self: *Page, arena: Allocator, session: *Session) !void { - const browser = session.browser; - self.* = .{ - .window = try Window.create(null, null), - .arena = arena, - .doc = null, - .raw_data = null, - .url = URL.empty, - .session = session, - .renderer = FlatRenderer.init(arena), - .microtask_node = .{ .func = microtaskCallback }, - .window_clicked_event_node = .{ .func = windowClicked }, - .state = .{ - .arena = arena, - .document = null, - .url = &self.url, - .renderer = &self.renderer, - .loop = browser.app.loop, - .cookie_jar = &session.cookie_jar, - .http_client = browser.http_client, - }, - .scope = try session.executor.startScope(&self.window, &self.state, self, true), - .module_map = .empty, - }; - - // load polyfills - try polyfill.load(self.arena, self.scope); - - // _ = try session.browser.app.loop.timeout(1 * std.time.ns_per_ms, &self.microtask_node); - } - - fn microtaskCallback(node: *Loop.CallbackNode, repeat_delay: *?u63) void { - const self: *Page = @fieldParentPtr("microtask_node", node); - self.session.browser.runMicrotasks(); - repeat_delay.* = 1 * std.time.ns_per_ms; - } - - // dump writes the page content into the given file. - pub fn dump(self: *const Page, out: std.fs.File) !void { - // if no HTML document pointer available, dump the data content only. - if (self.doc == null) { - // no data loaded, nothing to do. - if (self.raw_data == null) return; - return try out.writeAll(self.raw_data.?); - } - - // if the page has a pointer to a document, dumps the HTML. - try Dump.writeHTML(self.doc.?, out); - } - - pub fn fetchModuleSource(ctx: *anyopaque, specifier: []const u8) !?[]const u8 { - const self: *Page = @ptrCast(@alignCast(ctx)); - - log.debug("fetch module: specifier: {s}", .{specifier}); - - const base = if (self.current_script) |s| s.src else null; - - const file_src = blk: { - if (base) |_base| { - break :blk try URL.stitch(self.arena, specifier, _base); - } else break :blk specifier; - }; - - if (self.module_map.get(file_src)) |module| return module; - - const module = try self.fetchData(specifier, base); - if (module) |_module| try self.module_map.putNoClobber(self.arena, file_src, _module); - return module; - } - - pub fn wait(self: *Page) !void { - var try_catch: Env.TryCatch = undefined; - try_catch.init(self.scope); - defer try_catch.deinit(); - - try self.session.browser.app.loop.run(); - - if (try_catch.hasCaught() == false) { - log.debug("wait: OK", .{}); - return; - } - - const msg = (try try_catch.err(self.arena)) orelse "unknown"; - log.info("wait error: {s}", .{msg}); - } - - pub fn origin(self: *const Page, arena: Allocator) ![]const u8 { - var arr: std.ArrayListUnmanaged(u8) = .{}; - try self.url.origin(arr.writer(arena)); - return arr.items; - } - - // spec reference: https://html.spec.whatwg.org/#document-lifecycle - pub fn navigate(self: *Page, request_url: URL, opts: NavigateOpts) !void { - const arena = self.arena; - const session = self.session; - - log.debug("starting GET {s}", .{request_url}); - - // if the url is about:blank, nothing to do. - if (std.mem.eql(u8, "about:blank", request_url.raw)) { - return; - } - - // we don't clone url, because we're going to replace self.url - // later in this function, with the final request url (since we might - // redirect) - self.url = request_url; - - // load the data - var request = try self.newHTTPRequest(.GET, &self.url, .{ .navigation = true }); - defer request.deinit(); - - session.browser.notification.dispatch(.page_navigate, &.{ - .url = &self.url, - .reason = opts.reason, - .timestamp = timestamp(), - }); - - var response = try request.sendSync(.{}); - - // would be different than self.url in the case of a redirect - self.url = try URL.fromURI(arena, request.request_uri); - - const header = response.header; - try session.cookie_jar.populateFromResponse(&self.url.uri, &header); - - // TODO handle fragment in url. - try self.window.replaceLocation(.{ .url = try self.url.toWebApi(arena) }); - - log.info("GET {any} {d}", .{ self.url, header.status }); - - const content_type = header.get("content-type"); - - const mime: Mime = blk: { - if (content_type) |ct| { - break :blk try Mime.parse(arena, ct); - } - break :blk Mime.sniff(try response.peek()); - } orelse .unknown; - - if (mime.isHTML()) { - try self.loadHTMLDoc(&response, mime.charset orelse "utf-8"); - } else { - log.info("non-HTML document: {s}", .{content_type orelse "null"}); - var arr: std.ArrayListUnmanaged(u8) = .{}; - while (try response.next()) |data| { - try arr.appendSlice(arena, try arena.dupe(u8, data)); - } - // save the body into the page. - self.raw_data = arr.items; - } - - session.browser.notification.dispatch(.page_navigated, &.{ - .url = &self.url, - .timestamp = timestamp(), - }); - } - - // https://html.spec.whatwg.org/#read-html - fn loadHTMLDoc(self: *Page, reader: anytype, charset: []const u8) !void { - const arena = self.arena; - - log.debug("parse html with charset {s}", .{charset}); - - const ccharset = try arena.dupeZ(u8, charset); - - const html_doc = try parser.documentHTMLParse(reader, ccharset); - const doc = parser.documentHTMLToDocument(html_doc); - - // save a document's pointer in the page. - self.doc = doc; - - const document_element = (try parser.documentGetDocumentElement(doc)) orelse return error.DocumentElementError; - try parser.eventTargetAddEventListener( - parser.toEventTarget(parser.Element, document_element), - "click", - &self.window_clicked_event_node, - false, - ); - - // TODO set document.readyState to interactive - // https://html.spec.whatwg.org/#reporting-document-loading-status - - // inject the URL to the document including the fragment. - try parser.documentSetDocumentURI(doc, self.url.raw); - - // TODO set the referrer to the document. - try self.window.replaceDocument(html_doc); - self.window.setStorageShelf( - try self.session.storage_shed.getOrPut(try self.origin(self.arena)), - ); - - // https://html.spec.whatwg.org/#read-html - - // update the sessions state - self.state.document = html_doc; - - // browse the DOM tree to retrieve scripts - // TODO execute the synchronous scripts during the HTL parsing. - // TODO fetch the script resources concurrently but execute them in the - // declaration order for synchronous ones. - - // async_scripts stores scripts which can be run asynchronously. - // for now they are just run after the non-async one in order to - // dispatch DOMContentLoaded the sooner as possible. - var async_scripts: std.ArrayListUnmanaged(Script) = .{}; - - // defer_scripts stores scripts which are meant to be deferred. For now - // this doesn't have a huge impact, since normal scripts are parsed - // after the document is loaded. But (a) we should fix that and (b) - // this results in JavaScript being loaded in the same order as browsers - // which can help debug issues (and might actually fix issues if websites - // are expecting this execution order) - var defer_scripts: std.ArrayListUnmanaged(Script) = .{}; - - const root = parser.documentToNode(doc); - const walker = Walker{}; - var next: ?*parser.Node = null; - while (true) { - next = try walker.get_next(root, next) orelse break; - - // ignore non-elements nodes. - if (try parser.nodeType(next.?) != .element) { - continue; - } - - const e = parser.nodeToElement(next.?); - - // ignore non-js script. - const script = try Script.init(e) orelse continue; - - // TODO use fetchpriority - // https://developer.mozilla.org/en-US/docs/Web/HTML/Element/script#fetchpriority - - // > async - // > For classic scripts, if the async attribute is present, - // > then the classic script will be fetched in parallel to - // > parsing and evaluated as soon as it is available. - // https://developer.mozilla.org/en-US/docs/Web/HTML/Element/script#async - if (script.is_async) { - try async_scripts.append(arena, script); - continue; - } - - if (script.is_defer) { - try defer_scripts.append(arena, script); - continue; - } - - // TODO handle for attribute - // TODO handle event attribute - - // > Scripts without async, defer or type="module" - // > attributes, as well as inline scripts without the - // > type="module" attribute, are fetched and executed - // > immediately before the browser continues to parse the - // > page. - // https://developer.mozilla.org/en-US/docs/Web/HTML/Element/script#notes - try parser.documentHTMLSetCurrentScript(html_doc, @ptrCast(e)); - self.evalScript(&script) catch |err| log.warn("evaljs: {any}", .{err}); - try parser.documentHTMLSetCurrentScript(html_doc, null); - } - - for (defer_scripts.items) |s| { - try parser.documentHTMLSetCurrentScript(html_doc, @ptrCast(s.element)); - self.evalScript(&s) catch |err| log.warn("evaljs: {any}", .{err}); - try parser.documentHTMLSetCurrentScript(html_doc, null); - } - - // dispatch DOMContentLoaded before the transition to "complete", - // at the point where all subresources apart from async script elements - // have loaded. - // https://html.spec.whatwg.org/#reporting-document-loading-status - const evt = try parser.eventCreate(); - defer parser.eventDestroy(evt); - - try parser.eventInit(evt, "DOMContentLoaded", .{ .bubbles = true, .cancelable = true }); - _ = try parser.eventTargetDispatchEvent(parser.toEventTarget(parser.DocumentHTML, html_doc), evt); - - // eval async scripts. - for (async_scripts.items) |s| { - try parser.documentHTMLSetCurrentScript(html_doc, @ptrCast(s.element)); - self.evalScript(&s) catch |err| log.warn("evaljs: {any}", .{err}); - try parser.documentHTMLSetCurrentScript(html_doc, null); - } - - // TODO wait for async scripts - - // TODO set document.readyState to complete - - // dispatch window.load event - const loadevt = try parser.eventCreate(); - defer parser.eventDestroy(loadevt); - - try parser.eventInit(loadevt, "load", .{}); - _ = try parser.eventTargetDispatchEvent( - parser.toEventTarget(Window, &self.window), - loadevt, - ); - } - - // evalScript evaluates the src in priority. - // if no src is present, we evaluate the text source. - // https://html.spec.whatwg.org/multipage/scripting.html#script-processing-model - fn evalScript(self: *Page, script: *const Script) !void { - const src = script.src orelse { - // source is inline - // TODO handle charset attribute - if (try parser.nodeTextContent(parser.elementToNode(script.element))) |text| { - try script.eval(self, text); - } - return; - }; - - self.current_script = script; - defer self.current_script = null; - - log.debug("starting GET {s}", .{src}); - - // https://html.spec.whatwg.org/multipage/webappapis.html#fetch-a-classic-script - const body = (try self.fetchData(src, null)) orelse { - // TODO If el's result is null, then fire an event named error at - // el, and return - return; - }; - - script.eval(self, body) catch |err| switch (err) { - error.JsErr => {}, // nothing to do here. - else => return err, - }; - - // TODO If el's from an external file is true, then fire an event - // named load at el. - } - - // fetchData returns the data corresponding to the src target. - // It resolves src using the page's uri. - // If a base path is given, src is resolved according to the base first. - // the caller owns the returned string - fn fetchData(self: *const Page, src: []const u8, base: ?[]const u8) !?[]const u8 { - log.debug("starting fetch {s}", .{src}); - - const arena = self.arena; - - // Handle data URIs. - if (try DataURI.parse(arena, src)) |data_uri| { - return data_uri.data; - } - - var res_src = src; - - // if a base path is given, we resolve src using base. - if (base) |_base| { - res_src = try URL.stitch(arena, src, _base); - } - - var origin_url = &self.url; - const url = try origin_url.resolve(arena, res_src); - - var request = try self.newHTTPRequest(.GET, &url, .{ - .origin_uri = &origin_url.uri, - .navigation = false, - }); - defer request.deinit(); - - var response = try request.sendSync(.{}); - var header = response.header; - try self.session.cookie_jar.populateFromResponse(&url.uri, &header); - - log.info("fetch {any}: {d}", .{ url, header.status }); - - if (header.status != 200) { - return error.BadStatusCode; - } - - var arr: std.ArrayListUnmanaged(u8) = .{}; - while (try response.next()) |data| { - try arr.appendSlice(arena, try arena.dupe(u8, data)); - } - - // TODO check content-type - - // check no body - if (arr.items.len == 0) { - return null; - } - - return arr.items; - } - - fn newHTTPRequest(self: *const Page, method: http.Request.Method, url: *const URL, opts: storage.cookie.LookupOpts) !http.Request { - var request = try self.state.http_client.request(method, &url.uri); - errdefer request.deinit(); - - var arr: std.ArrayListUnmanaged(u8) = .{}; - try self.state.cookie_jar.forRequest(&url.uri, arr.writer(self.arena), opts); - - if (arr.items.len > 0) { - try request.addHeader("Cookie", arr.items, .{}); - } - - return request; - } - - pub const MouseEvent = struct { - x: i32, - y: i32, - type: Type, - - const Type = enum { - pressed, - released, - }; - }; - - pub fn mouseEvent(self: *Page, me: MouseEvent) !void { - if (me.type != .pressed) { - return; - } - - const element = self.renderer.getElementAtPosition(me.x, me.y) orelse return; - - const event = try parser.mouseEventCreate(); - defer parser.mouseEventDestroy(event); - try parser.mouseEventInit(event, "click", .{ - .bubbles = true, - .cancelable = true, - .x = me.x, - .y = me.y, - }); - _ = try parser.elementDispatchEvent(element, @ptrCast(event)); - } - - fn windowClicked(node: *parser.EventNode, event: *parser.Event) void { - const self: *Page = @fieldParentPtr("window_clicked_event_node", node); - self._windowClicked(event) catch |err| { - log.err("window click handler: {}", .{err}); - }; - } - - fn _windowClicked(self: *Page, event: *parser.Event) !void { - const target = (try parser.eventTarget(event)) orelse return; - - const node = parser.eventTargetToNode(target); - if (try parser.nodeType(node) != .element) { - return; - } - - const html_element: *parser.ElementHTML = @ptrCast(node); - switch (try parser.elementHTMLGetTagType(html_element)) { - .a => { - const element: *parser.Element = @ptrCast(node); - const href = (try parser.elementGetAttribute(element, "href")) orelse return; - - // We cannot navigate immediately as navigating will delete the DOM tree, which holds this event's node. - // As such we schedule the function to be called as soon as possible. - // NOTE Using the page.arena assumes that the scheduling loop does use this object after invoking the callback - // If that changes we may want to consider storing DelayedNavigation in the session instead. - const arena = self.arena; - const navi = try arena.create(DelayedNavigation); - navi.* = .{ - .session = self.session, - .href = try arena.dupe(u8, href), - }; - _ = try self.state.loop.timeout(0, &navi.navigate_node); - }, - else => {}, - } - } - - const DelayedNavigation = struct { - navigate_node: Loop.CallbackNode = .{ .func = DelayedNavigation.delay_navigate }, - session: *Session, - href: []const u8, - - fn delay_navigate(node: *Loop.CallbackNode, repeat_delay: *?u63) void { - _ = repeat_delay; - const self: *DelayedNavigation = @fieldParentPtr("navigate_node", node); - self.session.pageNavigate(self.href) catch |err| { - log.err("Delayed navigation error {}", .{err}); // TODO: should we trigger a specific event here? - }; - } - }; - - const Script = struct { - kind: Kind, - is_async: bool, - is_defer: bool, - src: ?[]const u8, - element: *parser.Element, - // The javascript to load after we successfully load the script - onload: ?[]const u8, - - // The javascript to load if we have an error executing the script - // For now, we ignore this, since we still have a lot of errors that we - // shouldn't - //onerror: ?[]const u8, - - const Kind = enum { - module, - javascript, - }; - - fn init(e: *parser.Element) !?Script { - // ignore non-script tags - const tag = try parser.elementHTMLGetTagType(@as(*parser.ElementHTML, @ptrCast(e))); - if (tag != .script) { - return null; - } - - if (try parser.elementGetAttribute(e, "nomodule") != null) { - // these scripts should only be loaded if we don't support modules - // but since we do support modules, we can just skip them. - return null; - } - - const kind = parseKind(try parser.elementGetAttribute(e, "type")) orelse { - return null; - }; - - return .{ - .kind = kind, - .element = e, - .src = try parser.elementGetAttribute(e, "src"), - .onload = try parser.elementGetAttribute(e, "onload"), - .is_async = try parser.elementGetAttribute(e, "async") != null, - .is_defer = try parser.elementGetAttribute(e, "defer") != null, - }; - } - - // > type - // > Attribute is not set (default), an empty string, or a JavaScript MIME - // > type indicates that the script is a "classic script", containing - // > JavaScript code. - // https://developer.mozilla.org/en-US/docs/Web/HTML/Element/script#attribute_is_not_set_default_an_empty_string_or_a_javascript_mime_type - fn parseKind(script_type_: ?[]const u8) ?Kind { - const script_type = script_type_ orelse return .javascript; - if (script_type.len == 0) { - return .javascript; - } - - if (std.mem.eql(u8, script_type, "application/javascript")) return .javascript; - if (std.mem.eql(u8, script_type, "text/javascript")) return .javascript; - if (std.mem.eql(u8, script_type, "module")) return .module; - - return null; - } - - fn eval(self: *const Script, page: *Page, body: []const u8) !void { - var try_catch: Env.TryCatch = undefined; - try_catch.init(page.scope); - defer try_catch.deinit(); - - const src = self.src orelse "inline"; - const res = switch (self.kind) { - .javascript => page.scope.exec(body, src), - .module => page.scope.module(body, src), - } catch { - if (try try_catch.err(page.arena)) |msg| { - log.info("eval script {s}: {s}", .{ src, msg }); - } - return error.JsErr; - }; - - if (builtin.mode == .Debug) { - const msg = try res.toString(page.arena); - log.debug("eval script {s}: {s}", .{ src, msg }); - } - - if (self.onload) |onload| { - _ = page.scope.exec(onload, "script_on_load") catch { - if (try try_catch.err(page.arena)) |msg| { - log.info("eval script onload {s}: {s}", .{ src, msg }); - } - return error.JsErr; - }; - } - } - }; -}; - -pub const NavigateReason = enum { - anchor, - address_bar, -}; - -const NavigateOpts = struct { - reason: NavigateReason = .address_bar, -}; - -// provide very poor abstration to the rest of the code. In theory, we can change -// the FlatRenderer to a different implementation, and it'll all just work. -pub const Renderer = FlatRenderer; - -// This "renderer" positions elements in a single row in an unspecified order. -// The important thing is that elements have a consistent position/index within -// that row, which can be turned into a rectangle. -const FlatRenderer = struct { - allocator: Allocator, - - // key is a @ptrFromInt of the element - // value is the index position - positions: std.AutoHashMapUnmanaged(u64, u32), - - // given an index, get the element - elements: std.ArrayListUnmanaged(u64), - - const Element = @import("dom/element.zig").Element; - - // we expect allocator to be an arena - pub fn init(allocator: Allocator) FlatRenderer { - return .{ - .elements = .{}, - .positions = .{}, - .allocator = allocator, - }; - } - - pub fn getRect(self: *FlatRenderer, e: *parser.Element) !Element.DOMRect { - var elements = &self.elements; - const gop = try self.positions.getOrPut(self.allocator, @intFromPtr(e)); - var x: u32 = gop.value_ptr.*; - if (gop.found_existing == false) { - x = @intCast(elements.items.len); - try elements.append(self.allocator, @intFromPtr(e)); - gop.value_ptr.* = x; - } - - return .{ - .x = @floatFromInt(x), - .y = 0.0, - .width = 1.0, - .height = 1.0, - }; - } - - pub fn boundingRect(self: *const FlatRenderer) Element.DOMRect { - return .{ - .x = 0.0, - .y = 0.0, - .width = @floatFromInt(self.width()), - .height = @floatFromInt(self.height()), - }; - } - - pub fn width(self: *const FlatRenderer) u32 { - return @max(@as(u32, @intCast(self.elements.items.len)), 1); // At least 1 pixel even if empty - } - - pub fn height(_: *const FlatRenderer) u32 { - return 1; - } - - pub fn getElementAtPosition(self: *const FlatRenderer, x: i32, y: i32) ?*parser.Element { - if (y != 0 or x < 0) { - return null; - } - - const elements = self.elements.items; - return if (x < elements.len) @ptrFromInt(elements[@intCast(x)]) else null; - } -}; - -fn timestamp() u32 { - const ts = std.posix.clock_gettime(std.posix.CLOCK.MONOTONIC) catch unreachable; - return @intCast(ts.sec); -} - const testing = @import("../testing.zig"); test "Browser" { var runner = try testing.jsRunner(testing.tracking_allocator, .{}); diff --git a/src/browser/dom/document_fragment.zig b/src/browser/dom/document_fragment.zig index e203ee85..67286fbb 100644 --- a/src/browser/dom/document_fragment.zig +++ b/src/browser/dom/document_fragment.zig @@ -16,8 +16,6 @@ // You should have received a copy of the GNU Affero General Public License // along with this program. If not, see . -const std = @import("std"); - const parser = @import("../netsurf.zig"); const SessionState = @import("../env.zig").SessionState; diff --git a/src/browser/dom/intersection_observer.zig b/src/browser/dom/intersection_observer.zig index 81041d4c..fc04c40f 100644 --- a/src/browser/dom/intersection_observer.zig +++ b/src/browser/dom/intersection_observer.zig @@ -17,14 +17,12 @@ // along with this program. If not, see . const std = @import("std"); -const Allocator = std.mem.Allocator; const parser = @import("../netsurf.zig"); const SessionState = @import("../env.zig").SessionState; const Env = @import("../env.zig").Env; const Element = @import("element.zig").Element; -const Document = @import("document.zig").Document; pub const Interfaces = .{ IntersectionObserver, diff --git a/src/browser/dom/namednodemap.zig b/src/browser/dom/namednodemap.zig index 2c91becf..1923b12c 100644 --- a/src/browser/dom/namednodemap.zig +++ b/src/browser/dom/namednodemap.zig @@ -16,8 +16,6 @@ // You should have received a copy of the GNU Affero General Public License // along with this program. If not, see . -const std = @import("std"); - const parser = @import("../netsurf.zig"); const DOMException = @import("exceptions.zig").DOMException; diff --git a/src/browser/env.zig b/src/browser/env.zig index 41342723..f378619a 100644 --- a/src/browser/env.zig +++ b/src/browser/env.zig @@ -5,9 +5,9 @@ const URL = @import("../url.zig").URL; const js = @import("../runtime/js.zig"); const storage = @import("storage/storage.zig"); const generate = @import("../runtime/generate.zig"); +const Renderer = @import("renderer.zig").Renderer; const Loop = @import("../runtime/loop.zig").Loop; const HttpClient = @import("../http/client.zig").Client; -const Renderer = @import("browser.zig").Renderer; const WebApis = struct { // Wrapped like this for debug ergonomics. diff --git a/src/browser/page.zig b/src/browser/page.zig new file mode 100644 index 00000000..ffed1cd2 --- /dev/null +++ b/src/browser/page.zig @@ -0,0 +1,678 @@ +// Copyright (C) 2023-2024 Lightpanda (Selecy SAS) +// +// Francis Bouvier +// Pierre Tachoire +// +// This program is free software: you can redistribute it and/or modify +// it under the terms of the GNU Affero General Public License as +// published by the Free Software Foundation, either version 3 of the +// License, or (at your option) any later version. +// +// This program is distributed in the hope that it will be useful, +// but WITHOUT ANY WARRANTY; without even the implied warranty of +// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +// GNU Affero General Public License for more details. +// +// You should have received a copy of the GNU Affero General Public License +// along with this program. If not, see . + +const std = @import("std"); +const builtin = @import("builtin"); + +const Allocator = std.mem.Allocator; + +const Dump = @import("dump.zig"); +const Mime = @import("mime.zig").Mime; +const DataURI = @import("datauri.zig").DataURI; +const Session = @import("session.zig").Session; +const Renderer = @import("renderer.zig").Renderer; +const SessionState = @import("env.zig").SessionState; +const Window = @import("html/window.zig").Window; +const Walker = @import("dom/walker.zig").WalkerDepthFirst; +const Env = @import("env.zig").Env; +const Loop = @import("../runtime/loop.zig").Loop; + +const URL = @import("../url.zig").URL; + +const parser = @import("netsurf.zig"); +const http = @import("../http/client.zig"); +const storage = @import("storage/storage.zig"); + +const polyfill = @import("polyfill/polyfill.zig"); + +const log = std.log.scoped(.page); + +// Page navigates to an url. +// You can navigates multiple urls with the same page, but you have to call +// end() to stop the previous navigation before starting a new one. +// The page handle all its memory in an arena allocator. The arena is reseted +// when end() is called. +pub const Page = struct { + session: *Session, + + // an arena with a lifetime for the entire duration of the page + arena: Allocator, + + // Gets injected into any WebAPI method that needs it + state: SessionState, + + // Serves are the root object of our JavaScript environment + window: Window, + + doc: ?*parser.Document, + + // The URL of the page + url: URL, + + raw_data: ?[]const u8, + + renderer: Renderer, + + microtask_node: Loop.CallbackNode, + + window_clicked_event_node: parser.EventNode, + + scope: *Env.Scope, + + // List of modules currently fetched/loaded. + module_map: std.StringHashMapUnmanaged([]const u8), + + // current_script is the script currently evaluated by the page. + // current_script could by fetch module to resolve module's url to fetch. + current_script: ?*const Script = null, + + pub fn init(self: *Page, arena: Allocator, session: *Session) !void { + const browser = session.browser; + self.* = .{ + .window = try Window.create(null, null), + .arena = arena, + .doc = null, + .raw_data = null, + .url = URL.empty, + .session = session, + .renderer = Renderer.init(arena), + .microtask_node = .{ .func = microtaskCallback }, + .window_clicked_event_node = .{ .func = windowClicked }, + .state = .{ + .arena = arena, + .document = null, + .url = &self.url, + .renderer = &self.renderer, + .loop = browser.app.loop, + .cookie_jar = &session.cookie_jar, + .http_client = browser.http_client, + }, + .scope = try session.executor.startScope(&self.window, &self.state, self, true), + .module_map = .empty, + }; + + // load polyfills + try polyfill.load(self.arena, self.scope); + + // _ = try session.browser.app.loop.timeout(1 * std.time.ns_per_ms, &self.microtask_node); + } + + fn microtaskCallback(node: *Loop.CallbackNode, repeat_delay: *?u63) void { + const self: *Page = @fieldParentPtr("microtask_node", node); + self.session.browser.runMicrotasks(); + repeat_delay.* = 1 * std.time.ns_per_ms; + } + + // dump writes the page content into the given file. + pub fn dump(self: *const Page, out: std.fs.File) !void { + // if no HTML document pointer available, dump the data content only. + if (self.doc == null) { + // no data loaded, nothing to do. + if (self.raw_data == null) return; + return try out.writeAll(self.raw_data.?); + } + + // if the page has a pointer to a document, dumps the HTML. + try Dump.writeHTML(self.doc.?, out); + } + + pub fn fetchModuleSource(ctx: *anyopaque, specifier: []const u8) !?[]const u8 { + const self: *Page = @ptrCast(@alignCast(ctx)); + + log.debug("fetch module: specifier: {s}", .{specifier}); + + const base = if (self.current_script) |s| s.src else null; + + const file_src = blk: { + if (base) |_base| { + break :blk try URL.stitch(self.arena, specifier, _base); + } else break :blk specifier; + }; + + if (self.module_map.get(file_src)) |module| return module; + + const module = try self.fetchData(specifier, base); + if (module) |_module| try self.module_map.putNoClobber(self.arena, file_src, _module); + return module; + } + + pub fn wait(self: *Page) !void { + var try_catch: Env.TryCatch = undefined; + try_catch.init(self.scope); + defer try_catch.deinit(); + + try self.session.browser.app.loop.run(); + + if (try_catch.hasCaught() == false) { + log.debug("wait: OK", .{}); + return; + } + + const msg = (try try_catch.err(self.arena)) orelse "unknown"; + log.info("wait error: {s}", .{msg}); + } + + pub fn origin(self: *const Page, arena: Allocator) ![]const u8 { + var arr: std.ArrayListUnmanaged(u8) = .{}; + try self.url.origin(arr.writer(arena)); + return arr.items; + } + + // spec reference: https://html.spec.whatwg.org/#document-lifecycle + pub fn navigate(self: *Page, request_url: URL, opts: NavigateOpts) !void { + const arena = self.arena; + const session = self.session; + + log.debug("starting GET {s}", .{request_url}); + + // if the url is about:blank, nothing to do. + if (std.mem.eql(u8, "about:blank", request_url.raw)) { + return; + } + + // we don't clone url, because we're going to replace self.url + // later in this function, with the final request url (since we might + // redirect) + self.url = request_url; + + // load the data + var request = try self.newHTTPRequest(.GET, &self.url, .{ .navigation = true }); + defer request.deinit(); + + session.browser.notification.dispatch(.page_navigate, &.{ + .url = &self.url, + .reason = opts.reason, + .timestamp = timestamp(), + }); + + var response = try request.sendSync(.{}); + + // would be different than self.url in the case of a redirect + self.url = try URL.fromURI(arena, request.request_uri); + + const header = response.header; + try session.cookie_jar.populateFromResponse(&self.url.uri, &header); + + // TODO handle fragment in url. + try self.window.replaceLocation(.{ .url = try self.url.toWebApi(arena) }); + + log.info("GET {any} {d}", .{ self.url, header.status }); + + const content_type = header.get("content-type"); + + const mime: Mime = blk: { + if (content_type) |ct| { + break :blk try Mime.parse(arena, ct); + } + break :blk Mime.sniff(try response.peek()); + } orelse .unknown; + + if (mime.isHTML()) { + try self.loadHTMLDoc(&response, mime.charset orelse "utf-8"); + } else { + log.info("non-HTML document: {s}", .{content_type orelse "null"}); + var arr: std.ArrayListUnmanaged(u8) = .{}; + while (try response.next()) |data| { + try arr.appendSlice(arena, try arena.dupe(u8, data)); + } + // save the body into the page. + self.raw_data = arr.items; + } + + session.browser.notification.dispatch(.page_navigated, &.{ + .url = &self.url, + .timestamp = timestamp(), + }); + } + + // https://html.spec.whatwg.org/#read-html + fn loadHTMLDoc(self: *Page, reader: anytype, charset: []const u8) !void { + const arena = self.arena; + + log.debug("parse html with charset {s}", .{charset}); + + const ccharset = try arena.dupeZ(u8, charset); + + const html_doc = try parser.documentHTMLParse(reader, ccharset); + const doc = parser.documentHTMLToDocument(html_doc); + + // save a document's pointer in the page. + self.doc = doc; + + const document_element = (try parser.documentGetDocumentElement(doc)) orelse return error.DocumentElementError; + try parser.eventTargetAddEventListener( + parser.toEventTarget(parser.Element, document_element), + "click", + &self.window_clicked_event_node, + false, + ); + + // TODO set document.readyState to interactive + // https://html.spec.whatwg.org/#reporting-document-loading-status + + // inject the URL to the document including the fragment. + try parser.documentSetDocumentURI(doc, self.url.raw); + + // TODO set the referrer to the document. + try self.window.replaceDocument(html_doc); + self.window.setStorageShelf( + try self.session.storage_shed.getOrPut(try self.origin(self.arena)), + ); + + // https://html.spec.whatwg.org/#read-html + + // update the sessions state + self.state.document = html_doc; + + // browse the DOM tree to retrieve scripts + // TODO execute the synchronous scripts during the HTL parsing. + // TODO fetch the script resources concurrently but execute them in the + // declaration order for synchronous ones. + + // async_scripts stores scripts which can be run asynchronously. + // for now they are just run after the non-async one in order to + // dispatch DOMContentLoaded the sooner as possible. + var async_scripts: std.ArrayListUnmanaged(Script) = .{}; + + // defer_scripts stores scripts which are meant to be deferred. For now + // this doesn't have a huge impact, since normal scripts are parsed + // after the document is loaded. But (a) we should fix that and (b) + // this results in JavaScript being loaded in the same order as browsers + // which can help debug issues (and might actually fix issues if websites + // are expecting this execution order) + var defer_scripts: std.ArrayListUnmanaged(Script) = .{}; + + const root = parser.documentToNode(doc); + const walker = Walker{}; + var next: ?*parser.Node = null; + while (true) { + next = try walker.get_next(root, next) orelse break; + + // ignore non-elements nodes. + if (try parser.nodeType(next.?) != .element) { + continue; + } + + const e = parser.nodeToElement(next.?); + + // ignore non-js script. + const script = try Script.init(e) orelse continue; + + // TODO use fetchpriority + // https://developer.mozilla.org/en-US/docs/Web/HTML/Element/script#fetchpriority + + // > async + // > For classic scripts, if the async attribute is present, + // > then the classic script will be fetched in parallel to + // > parsing and evaluated as soon as it is available. + // https://developer.mozilla.org/en-US/docs/Web/HTML/Element/script#async + if (script.is_async) { + try async_scripts.append(arena, script); + continue; + } + + if (script.is_defer) { + try defer_scripts.append(arena, script); + continue; + } + + // TODO handle for attribute + // TODO handle event attribute + + // > Scripts without async, defer or type="module" + // > attributes, as well as inline scripts without the + // > type="module" attribute, are fetched and executed + // > immediately before the browser continues to parse the + // > page. + // https://developer.mozilla.org/en-US/docs/Web/HTML/Element/script#notes + try parser.documentHTMLSetCurrentScript(html_doc, @ptrCast(e)); + self.evalScript(&script) catch |err| log.warn("evaljs: {any}", .{err}); + try parser.documentHTMLSetCurrentScript(html_doc, null); + } + + for (defer_scripts.items) |s| { + try parser.documentHTMLSetCurrentScript(html_doc, @ptrCast(s.element)); + self.evalScript(&s) catch |err| log.warn("evaljs: {any}", .{err}); + try parser.documentHTMLSetCurrentScript(html_doc, null); + } + + // dispatch DOMContentLoaded before the transition to "complete", + // at the point where all subresources apart from async script elements + // have loaded. + // https://html.spec.whatwg.org/#reporting-document-loading-status + const evt = try parser.eventCreate(); + defer parser.eventDestroy(evt); + + try parser.eventInit(evt, "DOMContentLoaded", .{ .bubbles = true, .cancelable = true }); + _ = try parser.eventTargetDispatchEvent(parser.toEventTarget(parser.DocumentHTML, html_doc), evt); + + // eval async scripts. + for (async_scripts.items) |s| { + try parser.documentHTMLSetCurrentScript(html_doc, @ptrCast(s.element)); + self.evalScript(&s) catch |err| log.warn("evaljs: {any}", .{err}); + try parser.documentHTMLSetCurrentScript(html_doc, null); + } + + // TODO wait for async scripts + + // TODO set document.readyState to complete + + // dispatch window.load event + const loadevt = try parser.eventCreate(); + defer parser.eventDestroy(loadevt); + + try parser.eventInit(loadevt, "load", .{}); + _ = try parser.eventTargetDispatchEvent( + parser.toEventTarget(Window, &self.window), + loadevt, + ); + } + + // evalScript evaluates the src in priority. + // if no src is present, we evaluate the text source. + // https://html.spec.whatwg.org/multipage/scripting.html#script-processing-model + fn evalScript(self: *Page, script: *const Script) !void { + const src = script.src orelse { + // source is inline + // TODO handle charset attribute + if (try parser.nodeTextContent(parser.elementToNode(script.element))) |text| { + try script.eval(self, text); + } + return; + }; + + self.current_script = script; + defer self.current_script = null; + + log.debug("starting GET {s}", .{src}); + + // https://html.spec.whatwg.org/multipage/webappapis.html#fetch-a-classic-script + const body = (try self.fetchData(src, null)) orelse { + // TODO If el's result is null, then fire an event named error at + // el, and return + return; + }; + + script.eval(self, body) catch |err| switch (err) { + error.JsErr => {}, // nothing to do here. + else => return err, + }; + + // TODO If el's from an external file is true, then fire an event + // named load at el. + } + + // fetchData returns the data corresponding to the src target. + // It resolves src using the page's uri. + // If a base path is given, src is resolved according to the base first. + // the caller owns the returned string + fn fetchData(self: *const Page, src: []const u8, base: ?[]const u8) !?[]const u8 { + log.debug("starting fetch {s}", .{src}); + + const arena = self.arena; + + // Handle data URIs. + if (try DataURI.parse(arena, src)) |data_uri| { + return data_uri.data; + } + + var res_src = src; + + // if a base path is given, we resolve src using base. + if (base) |_base| { + res_src = try URL.stitch(arena, src, _base); + } + + var origin_url = &self.url; + const url = try origin_url.resolve(arena, res_src); + + var request = try self.newHTTPRequest(.GET, &url, .{ + .origin_uri = &origin_url.uri, + .navigation = false, + }); + defer request.deinit(); + + var response = try request.sendSync(.{}); + var header = response.header; + try self.session.cookie_jar.populateFromResponse(&url.uri, &header); + + log.info("fetch {any}: {d}", .{ url, header.status }); + + if (header.status != 200) { + return error.BadStatusCode; + } + + var arr: std.ArrayListUnmanaged(u8) = .{}; + while (try response.next()) |data| { + try arr.appendSlice(arena, try arena.dupe(u8, data)); + } + + // TODO check content-type + + // check no body + if (arr.items.len == 0) { + return null; + } + + return arr.items; + } + + fn newHTTPRequest(self: *const Page, method: http.Request.Method, url: *const URL, opts: storage.cookie.LookupOpts) !http.Request { + var request = try self.state.http_client.request(method, &url.uri); + errdefer request.deinit(); + + var arr: std.ArrayListUnmanaged(u8) = .{}; + try self.state.cookie_jar.forRequest(&url.uri, arr.writer(self.arena), opts); + + if (arr.items.len > 0) { + try request.addHeader("Cookie", arr.items, .{}); + } + + return request; + } + + pub const MouseEvent = struct { + x: i32, + y: i32, + type: Type, + + const Type = enum { + pressed, + released, + }; + }; + + pub fn mouseEvent(self: *Page, me: MouseEvent) !void { + if (me.type != .pressed) { + return; + } + + const element = self.renderer.getElementAtPosition(me.x, me.y) orelse return; + + const event = try parser.mouseEventCreate(); + defer parser.mouseEventDestroy(event); + try parser.mouseEventInit(event, "click", .{ + .bubbles = true, + .cancelable = true, + .x = me.x, + .y = me.y, + }); + _ = try parser.elementDispatchEvent(element, @ptrCast(event)); + } + + fn windowClicked(node: *parser.EventNode, event: *parser.Event) void { + const self: *Page = @fieldParentPtr("window_clicked_event_node", node); + self._windowClicked(event) catch |err| { + log.err("window click handler: {}", .{err}); + }; + } + + fn _windowClicked(self: *Page, event: *parser.Event) !void { + const target = (try parser.eventTarget(event)) orelse return; + + const node = parser.eventTargetToNode(target); + if (try parser.nodeType(node) != .element) { + return; + } + + const html_element: *parser.ElementHTML = @ptrCast(node); + switch (try parser.elementHTMLGetTagType(html_element)) { + .a => { + const element: *parser.Element = @ptrCast(node); + const href = (try parser.elementGetAttribute(element, "href")) orelse return; + + // We cannot navigate immediately as navigating will delete the DOM tree, which holds this event's node. + // As such we schedule the function to be called as soon as possible. + // NOTE Using the page.arena assumes that the scheduling loop does use this object after invoking the callback + // If that changes we may want to consider storing DelayedNavigation in the session instead. + const arena = self.arena; + const navi = try arena.create(DelayedNavigation); + navi.* = .{ + .session = self.session, + .href = try arena.dupe(u8, href), + }; + _ = try self.state.loop.timeout(0, &navi.navigate_node); + }, + else => {}, + } + } + + const DelayedNavigation = struct { + navigate_node: Loop.CallbackNode = .{ .func = DelayedNavigation.delay_navigate }, + session: *Session, + href: []const u8, + + fn delay_navigate(node: *Loop.CallbackNode, repeat_delay: *?u63) void { + _ = repeat_delay; + const self: *DelayedNavigation = @fieldParentPtr("navigate_node", node); + self.session.pageNavigate(self.href) catch |err| { + log.err("Delayed navigation error {}", .{err}); // TODO: should we trigger a specific event here? + }; + } + }; + + const Script = struct { + kind: Kind, + is_async: bool, + is_defer: bool, + src: ?[]const u8, + element: *parser.Element, + // The javascript to load after we successfully load the script + onload: ?[]const u8, + + // The javascript to load if we have an error executing the script + // For now, we ignore this, since we still have a lot of errors that we + // shouldn't + //onerror: ?[]const u8, + + const Kind = enum { + module, + javascript, + }; + + fn init(e: *parser.Element) !?Script { + // ignore non-script tags + const tag = try parser.elementHTMLGetTagType(@as(*parser.ElementHTML, @ptrCast(e))); + if (tag != .script) { + return null; + } + + if (try parser.elementGetAttribute(e, "nomodule") != null) { + // these scripts should only be loaded if we don't support modules + // but since we do support modules, we can just skip them. + return null; + } + + const kind = parseKind(try parser.elementGetAttribute(e, "type")) orelse { + return null; + }; + + return .{ + .kind = kind, + .element = e, + .src = try parser.elementGetAttribute(e, "src"), + .onload = try parser.elementGetAttribute(e, "onload"), + .is_async = try parser.elementGetAttribute(e, "async") != null, + .is_defer = try parser.elementGetAttribute(e, "defer") != null, + }; + } + + // > type + // > Attribute is not set (default), an empty string, or a JavaScript MIME + // > type indicates that the script is a "classic script", containing + // > JavaScript code. + // https://developer.mozilla.org/en-US/docs/Web/HTML/Element/script#attribute_is_not_set_default_an_empty_string_or_a_javascript_mime_type + fn parseKind(script_type_: ?[]const u8) ?Kind { + const script_type = script_type_ orelse return .javascript; + if (script_type.len == 0) { + return .javascript; + } + + if (std.mem.eql(u8, script_type, "application/javascript")) return .javascript; + if (std.mem.eql(u8, script_type, "text/javascript")) return .javascript; + if (std.mem.eql(u8, script_type, "module")) return .module; + + return null; + } + + fn eval(self: *const Script, page: *Page, body: []const u8) !void { + var try_catch: Env.TryCatch = undefined; + try_catch.init(page.scope); + defer try_catch.deinit(); + + const src = self.src orelse "inline"; + const res = switch (self.kind) { + .javascript => page.scope.exec(body, src), + .module => page.scope.module(body, src), + } catch { + if (try try_catch.err(page.arena)) |msg| { + log.info("eval script {s}: {s}", .{ src, msg }); + } + return error.JsErr; + }; + + if (builtin.mode == .Debug) { + const msg = try res.toString(page.arena); + log.debug("eval script {s}: {s}", .{ src, msg }); + } + + if (self.onload) |onload| { + _ = page.scope.exec(onload, "script_on_load") catch { + if (try try_catch.err(page.arena)) |msg| { + log.info("eval script onload {s}: {s}", .{ src, msg }); + } + return error.JsErr; + }; + } + } + }; +}; + +pub const NavigateReason = enum { + anchor, + address_bar, +}; + +const NavigateOpts = struct { + reason: NavigateReason = .address_bar, +}; + +fn timestamp() u32 { + const ts = std.posix.clock_gettime(std.posix.CLOCK.MONOTONIC) catch unreachable; + return @intCast(ts.sec); +} diff --git a/src/browser/renderer.zig b/src/browser/renderer.zig new file mode 100644 index 00000000..26be82bb --- /dev/null +++ b/src/browser/renderer.zig @@ -0,0 +1,96 @@ +// Copyright (C) 2023-2024 Lightpanda (Selecy SAS) +// +// Francis Bouvier +// Pierre Tachoire +// +// This program is free software: you can redistribute it and/or modify +// it under the terms of the GNU Affero General Public License as +// published by the Free Software Foundation, either version 3 of the +// License, or (at your option) any later version. +// +// This program is distributed in the hope that it will be useful, +// but WITHOUT ANY WARRANTY; without even the implied warranty of +// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +// GNU Affero General Public License for more details. +// +// You should have received a copy of the GNU Affero General Public License +// along with this program. If not, see . + +const std = @import("std"); + +const parser = @import("netsurf.zig"); + +const Allocator = std.mem.Allocator; + +// provide very poor abstration to the rest of the code. In theory, we can change +// the FlatRenderer to a different implementation, and it'll all just work. +pub const Renderer = FlatRenderer; + +// This "renderer" positions elements in a single row in an unspecified order. +// The important thing is that elements have a consistent position/index within +// that row, which can be turned into a rectangle. +const FlatRenderer = struct { + allocator: Allocator, + + // key is a @ptrFromInt of the element + // value is the index position + positions: std.AutoHashMapUnmanaged(u64, u32), + + // given an index, get the element + elements: std.ArrayListUnmanaged(u64), + + const Element = @import("dom/element.zig").Element; + + // we expect allocator to be an arena + pub fn init(allocator: Allocator) FlatRenderer { + return .{ + .elements = .{}, + .positions = .{}, + .allocator = allocator, + }; + } + + pub fn getRect(self: *FlatRenderer, e: *parser.Element) !Element.DOMRect { + var elements = &self.elements; + const gop = try self.positions.getOrPut(self.allocator, @intFromPtr(e)); + var x: u32 = gop.value_ptr.*; + if (gop.found_existing == false) { + x = @intCast(elements.items.len); + try elements.append(self.allocator, @intFromPtr(e)); + gop.value_ptr.* = x; + } + + return .{ + .x = @floatFromInt(x), + .y = 0.0, + .width = 1.0, + .height = 1.0, + }; + } + + pub fn boundingRect(self: *const FlatRenderer) Element.DOMRect { + return .{ + .x = 0.0, + .y = 0.0, + .width = @floatFromInt(self.width()), + .height = @floatFromInt(self.height()), + }; + } + + pub fn width(self: *const FlatRenderer) u32 { + return @max(@as(u32, @intCast(self.elements.items.len)), 1); // At least 1 pixel even if empty + } + + pub fn height(_: *const FlatRenderer) u32 { + return 1; + } + + pub fn getElementAtPosition(self: *const FlatRenderer, x: i32, y: i32) ?*parser.Element { + if (y != 0 or x < 0) { + return null; + } + + const elements = self.elements.items; + return if (x < elements.len) @ptrFromInt(elements[@intCast(x)]) else null; + } +}; diff --git a/src/browser/session.zig b/src/browser/session.zig new file mode 100644 index 00000000..d5e83257 --- /dev/null +++ b/src/browser/session.zig @@ -0,0 +1,132 @@ +// Copyright (C) 2023-2024 Lightpanda (Selecy SAS) +// +// Francis Bouvier +// Pierre Tachoire +// +// This program is free software: you can redistribute it and/or modify +// it under the terms of the GNU Affero General Public License as +// published by the Free Software Foundation, either version 3 of the +// License, or (at your option) any later version. +// +// This program is distributed in the hope that it will be useful, +// but WITHOUT ANY WARRANTY; without even the implied warranty of +// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +// GNU Affero General Public License for more details. +// +// You should have received a copy of the GNU Affero General Public License +// along with this program. If not, see . + +const std = @import("std"); + +const ArenaAllocator = std.heap.ArenaAllocator; + +const Env = @import("env.zig").Env; +const Page = @import("page.zig").Page; +const Browser = @import("browser.zig").Browser; + +const parser = @import("netsurf.zig"); +const storage = @import("storage/storage.zig"); + +const log = std.log.scoped(.session); + +// Session is like a browser's tab. +// It owns the js env and the loader for all the pages of the session. +// You can create successively multiple pages for a session, but you must +// deinit a page before running another one. +pub const Session = struct { + browser: *Browser, + + // Used to create our Inspector and in the BrowserContext. + arena: ArenaAllocator, + + executor: Env.Executor, + storage_shed: storage.Shed, + cookie_jar: storage.CookieJar, + + page: ?Page = null, + + pub fn init(self: *Session, browser: *Browser) !void { + var executor = try browser.env.newExecutor(); + errdefer executor.deinit(); + + const allocator = browser.app.allocator; + self.* = .{ + .browser = browser, + .executor = executor, + .arena = ArenaAllocator.init(allocator), + .storage_shed = storage.Shed.init(allocator), + .cookie_jar = storage.CookieJar.init(allocator), + }; + } + + pub fn deinit(self: *Session) void { + if (self.page != null) { + self.removePage(); + } + self.arena.deinit(); + self.cookie_jar.deinit(); + self.storage_shed.deinit(); + self.executor.deinit(); + } + + // NOTE: the caller is not the owner of the returned value, + // the pointer on Page is just returned as a convenience + pub fn createPage(self: *Session) !*Page { + std.debug.assert(self.page == null); + + // Start netsurf memory arena. + // We need to init this early as JS event handlers may be registered through Runtime.evaluate before the first html doc is loaded + try parser.init(); + + const page_arena = &self.browser.page_arena; + _ = page_arena.reset(.{ .retain_with_limit = 1 * 1024 * 1024 }); + + self.page = @as(Page, undefined); + const page = &self.page.?; + try Page.init(page, page_arena.allocator(), self); + + // start JS env + log.debug("start new js scope", .{}); + // Inform CDP the main page has been created such that additional context for other Worlds can be created as well + self.browser.notification.dispatch(.page_created, page); + + return page; + } + + pub fn removePage(self: *Session) void { + // Inform CDP the page is going to be removed, allowing other worlds to remove themselves before the main one + self.browser.notification.dispatch(.page_remove, .{}); + + std.debug.assert(self.page != null); + // Reset all existing callbacks. + self.browser.app.loop.reset(); + self.executor.endScope(); + self.page = null; + + // clear netsurf memory arena. + parser.deinit(); + } + + pub fn currentPage(self: *Session) ?*Page { + return &(self.page orelse return null); + } + + pub fn pageNavigate(self: *Session, url_string: []const u8) !void { + // currently, this is only called from the page, so let's hope + // it isn't null! + std.debug.assert(self.page != null); + + // can't use the page arena, because we're about to reset it + // and don't want to use the session's arena, because that'll start to + // look like a leak if we navigate from page to page a lot. + var buf: [2048]u8 = undefined; + var fba = std.heap.FixedBufferAllocator.init(&buf); + const url = try self.page.?.url.resolve(fba.allocator(), url_string); + + self.removePage(); + var page = try self.createPage(); + return page.navigate(url, .{ + .reason = .anchor, + }); + } +}; diff --git a/src/browser/xhr/xhr.zig b/src/browser/xhr/xhr.zig index 13c05d31..4ea517cd 100644 --- a/src/browser/xhr/xhr.zig +++ b/src/browser/xhr/xhr.zig @@ -20,7 +20,6 @@ const std = @import("std"); const Allocator = std.mem.Allocator; const DOMError = @import("../netsurf.zig").DOMError; -const DOMException = @import("../dom/exceptions.zig").DOMException; const ProgressEvent = @import("progress_event.zig").ProgressEvent; const XMLHttpRequestEventTarget = @import("event_target.zig").XMLHttpRequestEventTarget; diff --git a/src/cdp/cdp.zig b/src/cdp/cdp.zig index 5c18a6c5..0f022999 100644 --- a/src/cdp/cdp.zig +++ b/src/cdp/cdp.zig @@ -24,8 +24,8 @@ const App = @import("../app.zig").App; const Env = @import("../browser/env.zig").Env; const asUint = @import("../str/parser.zig").asUint; const Browser = @import("../browser/browser.zig").Browser; -const Session = @import("../browser/browser.zig").Session; -const Page = @import("../browser/browser.zig").Page; +const Session = @import("../browser/session.zig").Session; +const Page = @import("../browser/page.zig").Page; const Inspector = @import("../browser/env.zig").Env.Inspector; const Incrementing = @import("../id.zig").Incrementing; const Notification = @import("../notification.zig").Notification; diff --git a/src/cdp/domains/input.zig b/src/cdp/domains/input.zig index 2aaee495..b6897dc2 100644 --- a/src/cdp/domains/input.zig +++ b/src/cdp/domains/input.zig @@ -17,7 +17,7 @@ // along with this program. If not, see . const std = @import("std"); -const Page = @import("../../browser/browser.zig").Page; +const Page = @import("../../browser/page.zig").Page; pub fn processMessage(cmd: anytype) !void { const action = std.meta.stringToEnum(enum { diff --git a/src/cdp/domains/page.zig b/src/cdp/domains/page.zig index f61e2d13..75e10695 100644 --- a/src/cdp/domains/page.zig +++ b/src/cdp/domains/page.zig @@ -18,8 +18,8 @@ const std = @import("std"); const URL = @import("../../url.zig").URL; +const Page = @import("../../browser/page.zig").Page; const Notification = @import("../../notification.zig").Notification; -const Page = @import("../../browser/browser.zig").Page; pub fn processMessage(cmd: anytype) !void { const action = std.meta.stringToEnum(enum { diff --git a/src/notification.zig b/src/notification.zig index ab8c2511..397b3d62 100644 --- a/src/notification.zig +++ b/src/notification.zig @@ -1,7 +1,7 @@ const std = @import("std"); const URL = @import("url.zig").URL; -const browser = @import("browser/browser.zig"); +const page = @import("browser/page.zig"); const Allocator = std.mem.Allocator; @@ -64,7 +64,7 @@ pub const Notification = struct { const Events = union(enum) { page_remove: PageRemove, - page_created: *browser.Page, + page_created: *page.Page, page_navigate: *const PageNavigate, page_navigated: *const PageNavigated, notification_created: *Notification, @@ -76,7 +76,7 @@ pub const Notification = struct { pub const PageNavigate = struct { timestamp: u32, url: *const URL, - reason: browser.NavigateReason, + reason: page.NavigateReason, }; pub const PageNavigated = struct { diff --git a/src/runtime/loop.zig b/src/runtime/loop.zig index 5c31b973..ae9e1206 100644 --- a/src/runtime/loop.zig +++ b/src/runtime/loop.zig @@ -22,8 +22,6 @@ const MemoryPool = std.heap.MemoryPool; pub const IO = @import("tigerbeetle-io").IO; -const JSCallback = @import("../browser/env.zig").Env.Callback; - const log = std.log.scoped(.loop); // SingleThreaded I/O Loop based on Tigerbeetle io_uring loop. diff --git a/src/testing.zig b/src/testing.zig index 0042fce5..f11c0ab6 100644 --- a/src/testing.zig +++ b/src/testing.zig @@ -371,7 +371,7 @@ pub const JsRunner = struct { const HttpClient = @import("http/client.zig").Client; const storage = @import("browser/storage/storage.zig"); const Window = @import("browser/html/window.zig").Window; - const Renderer = @import("browser/browser.zig").Renderer; + const Renderer = @import("browser/renderer.zig").Renderer; const SessionState = @import("browser/env.zig").SessionState; url: URL,