Files
browser/src/browser/page.zig
2025-08-11 21:37:03 +08:00

1014 lines
38 KiB
Zig

// Copyright (C) 2023-2024 Lightpanda (Selecy SAS)
//
// Francis Bouvier <francis@lightpanda.io>
// Pierre Tachoire <pierre@lightpanda.io>
//
// This program is free software: you can redistribute it and/or modify
// it under the terms of the GNU Affero General Public License as
// published by the Free Software Foundation, either version 3 of the
// License, or (at your option) any later version.
//
// This program is distributed in the hope that it will be useful,
// but WITHOUT ANY WARRANTY; without even the implied warranty of
// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
// GNU Affero General Public License for more details.
//
// You should have received a copy of the GNU Affero General Public License
// along with this program. If not, see <https://www.gnu.org/licenses/>.
const std = @import("std");
const builtin = @import("builtin");
const Allocator = std.mem.Allocator;
const Dump = @import("dump.zig");
const State = @import("State.zig");
const Env = @import("env.zig").Env;
const Mime = @import("mime.zig").Mime;
const Session = @import("session.zig").Session;
const Renderer = @import("renderer.zig").Renderer;
const Window = @import("html/window.zig").Window;
const Walker = @import("dom/walker.zig").WalkerDepthFirst;
const Scheduler = @import("Scheduler.zig");
const HttpClient = @import("../http/Client.zig");
const ScriptManager = @import("ScriptManager.zig");
const HTMLDocument = @import("html/document.zig").HTMLDocument;
const URL = @import("../url.zig").URL;
const log = @import("../log.zig");
const parser = @import("netsurf.zig");
const storage = @import("storage/storage.zig");
const polyfill = @import("polyfill/polyfill.zig");
// Page navigates to an url.
// You can navigates multiple urls with the same page, but you have to call
// end() to stop the previous navigation before starting a new one.
// The page handle all its memory in an arena allocator. The arena is reseted
// when end() is called.
pub const Page = struct {
cookie_jar: *storage.CookieJar,
// Pre-configured http/cilent.zig used to make HTTP requests.
// @newhttp
// request_factory: RequestFactory,
session: *Session,
// An arena with a lifetime for the entire duration of the page
arena: Allocator,
// Managed by the JS runtime, meant to have a much shorter life than the
// above arena. It should only be used by WebAPIs.
call_arena: Allocator,
// Serves as the root object of our JavaScript environment
window: Window,
// The URL of the page
url: URL,
renderer: Renderer,
keydown_event_node: parser.EventNode,
window_clicked_event_node: parser.EventNode,
// Our JavaScript context for this specific page. This is what we use to
// execute any JavaScript
main_context: *Env.JsContext,
// indicates intention to navigate to another page on the next loop execution.
delayed_navigation: bool = false,
state_pool: *std.heap.MemoryPool(State),
polyfill_loader: polyfill.Loader = .{},
scheduler: Scheduler,
http_client: *HttpClient,
script_manager: ScriptManager,
mode: Mode,
load_state: LoadState = .parsing,
const Mode = union(enum) {
pre: void,
err: anyerror,
parsed: void,
html: parser.Parser,
raw: std.ArrayListUnmanaged(u8),
raw_done: []const u8,
};
const LoadState = enum {
// the main HTML is being parsed (or downloaded)
parsing,
// the main HTML has been parsed and the JavaScript (including deferred
// scripts) have been loaded. Corresponds to the DOMContentLoaded event
load,
// the page has been loaded and all async scripts (if any) are done
// Corresponds to the load event
complete,
};
pub fn init(self: *Page, arena: Allocator, session: *Session) !void {
const browser = session.browser;
const script_manager = ScriptManager.init(browser, self);
self.* = .{
.url = URL.empty,
.mode = .{ .pre = {} },
.window = try Window.create(null, null),
.arena = arena,
.session = session,
.call_arena = undefined,
.renderer = Renderer.init(arena),
.state_pool = &browser.state_pool,
.cookie_jar = &session.cookie_jar,
.script_manager = script_manager,
.http_client = browser.http_client,
.scheduler = Scheduler.init(arena),
.keydown_event_node = .{ .func = keydownCallback },
.window_clicked_event_node = .{ .func = windowClicked },
// @newhttp
// .request_factory = browser.http_client.requestFactory(.{
// .notification = browser.notification,
// }),
.main_context = undefined,
};
self.main_context = try session.executor.createJsContext(&self.window, self, self, true, Env.GlobalMissingCallback.init(&self.polyfill_loader));
try polyfill.preload(self.arena, self.main_context);
try self.scheduler.add(self, runMicrotasks, 5, .{ .name = "page.microtasks" });
// message loop must run only non-test env
if (comptime !builtin.is_test) {
try self.scheduler.add(self, runMessageLoop, 5, .{ .name = "page.messageLoop" });
}
}
pub fn deinit(self: *Page) void {
self.script_manager.shutdown = true;
self.http_client.abort();
self.script_manager.deinit();
}
fn reset(self: *Page) void {
self.scheduler.reset();
self.http_client.abort();
self.script_manager.reset();
self.load_state = .parsing;
self.mode = .{ .pre = {} };
_ = self.session.browser.page_arena.reset(.{ .retain_with_limit = 1 * 1024 * 1024 });
}
fn runMicrotasks(ctx: *anyopaque) ?u32 {
const self: *Page = @alignCast(@ptrCast(ctx));
self.session.browser.runMicrotasks();
return 5;
}
fn runMessageLoop(ctx: *anyopaque) ?u32 {
const self: *Page = @alignCast(@ptrCast(ctx));
self.session.browser.runMessageLoop();
return 100;
}
pub const DumpOpts = struct {
// set to include element shadowroots in the dump
page: ?*const Page = null,
with_base: bool = false,
exclude_scripts: bool = false,
};
// dump writes the page content into the given file.
pub fn dump(self: *const Page, opts: DumpOpts, out: std.fs.File) !void {
switch (self.mode) {
.pre => return error.PageNotLoaded,
.raw => |buf| {
// maybe page.wait timed-out, print what we have
log.warn(.http, "incomplete load", .{ .mode = "raw" });
return out.writeAll(buf.items);
},
.raw_done => |data| return out.writeAll(data),
.html => {
// maybe page.wait timed-out, print what we have
log.warn(.http, "incomplete load", .{ .mode = "html" });
// processed below, along with .html
},
.parsed => {
// processed below, along with .html
},
.err => |err| return err,
}
const doc = parser.documentHTMLToDocument(self.window.document);
// if the base si requested, add the base's node in the document's headers.
if (opts.with_base) {
try self.addDOMTreeBase();
}
try Dump.writeHTML(doc, .{
.page = opts.page,
.exclude_scripts = opts.exclude_scripts,
}, out);
}
// addDOMTreeBase modifies the page's document to add a <base> tag after
// <head>.
// If <head> is missing, the function returns silently.
fn addDOMTreeBase(self: *const Page) !void {
const doc = parser.documentHTMLToDocument(self.window.document);
std.debug.assert(doc.is_html);
// find <head> tag
const list = try parser.documentGetElementsByTagName(doc, "head");
const head = try parser.nodeListItem(list, 0) orelse return;
const base = try parser.documentCreateElement(doc, "base");
try parser.elementSetAttribute(base, "href", self.url.raw);
const Node = @import("dom/node.zig").Node;
try Node.prepend(head, &[_]Node.NodeOrText{.{ .node = parser.elementToNode(base) }});
}
pub fn fetchModuleSource(ctx: *anyopaque, src: [:0]const u8) !ScriptManager.BlockingResult {
const self: *Page = @ptrCast(@alignCast(ctx));
return self.script_manager.blockingGet(src);
}
pub fn wait(self: *Page, wait_sec: usize) void {
self._wait(wait_sec) catch |err| switch (err) {
error.JsError => {}, // already logged (with hopefully more context)
else => {
// There may be errors from the http/client or ScriptManager
// that we should not treat as an error like this. Will need
// to run this through more real-world sites and see if we need
// to expand the switch (err) to have more customized logs for
// specific messages.
log.err(.browser, "page wait", .{ .err = err });
},
};
}
fn _wait(self: *Page, wait_sec: usize) !void {
var ms_remaining = wait_sec * 1000;
var timer = try std.time.Timer.start();
var try_catch: Env.TryCatch = undefined;
try_catch.init(self.main_context);
defer try_catch.deinit();
var scheduler = &self.scheduler;
var http_client = self.http_client;
// for debugging
// defer self.printWaitAnalysis();
while (true) {
SW: switch (self.mode) {
.pre, .raw => {
// The main page hasn't started/finished navigating.
// There's no JS to run, and no reason to run the scheduler.
if (http_client.active == 0) {
// haven't started navigating, I guess.
return;
}
// There should only be 1 active http transfer, the main page
std.debug.assert(http_client.active == 1);
try http_client.tick(ms_remaining);
},
.html, .parsed => {
// The HTML page was parsed. We now either have JS scripts to
// download, or timeouts to execute, or both.
// scheduler.run could trigger new http transfers, so do not
// store http_client.active BEFORE this call and then use
// it AFTER.
const ms_to_next_task = try scheduler.runHighPriority();
if (try_catch.hasCaught()) {
const msg = (try try_catch.err(self.arena)) orelse "unknown";
log.warn(.user_script, "page wait", .{ .err = msg, .src = "scheduler" });
return error.JsError;
}
if (http_client.active == 0) {
if (ms_to_next_task) |ms| {
// There are no HTTP transfers, so there's no point calling
// http_client.tick.
// TODO: should we just force-run the scheduler??
if (ms > ms_remaining) {
// we'd wait to long, might as well exit early.
return;
}
_ = try scheduler.runLowPriority();
std.time.sleep(std.time.ns_per_ms * ms);
break :SW;
}
// We have no active http transfer and no pending
// schedule tasks. We're done
return;
}
_ = try scheduler.runLowPriority();
// We'll block here, waiting for network IO. We know
// when the next timeout is scheduled, and we know how long
// the caller wants to wait for, so we can pick a good wait
// duration
const ms_to_wait = @min(ms_remaining, ms_to_next_task orelse 1000);
try http_client.tick(ms_to_wait);
if (try_catch.hasCaught()) {
const msg = (try try_catch.err(self.arena)) orelse "unknown";
log.warn(.user_script, "page wait", .{ .err = msg, .src = "data" });
return error.JsError;
}
},
.err => |err| return err,
.raw_done => return,
}
const ms_elapsed = timer.lap() / 1_000_000;
if (ms_elapsed >= ms_remaining) {
return;
}
ms_remaining -= ms_elapsed;
}
}
fn printWaitAnalysis(self: *Page) void {
std.debug.print("mode: {s}\n", .{@tagName(std.meta.activeTag(self.mode))});
std.debug.print("load: {s}\n", .{@tagName(self.load_state)});
{
std.debug.print("\nactive requests: {d}\n", .{self.http_client.active});
var n_ = self.http_client.handles.in_use.first;
while (n_) |n| {
const transfer = HttpClient.Transfer.fromEasy(n.data.conn.easy) catch |err| {
std.debug.print(" - failed to load transfer: {any}\n", .{err});
break;
};
std.debug.print(" - {s}\n", .{transfer});
n_ = n.next;
}
}
{
std.debug.print("\nqueued requests: {d}\n", .{self.http_client.queue.len});
var n_ = self.http_client.queue.first;
while (n_) |n| {
std.debug.print(" - {s}\n", .{n.data.url});
n_ = n.next;
}
}
{
std.debug.print("\nscripts: {d}\n", .{self.script_manager.scripts.len});
var n_ = self.script_manager.scripts.first;
while (n_) |n| {
std.debug.print(" - {s} complete: {any}\n", .{ n.data.script.url, n.data.complete });
n_ = n.next;
}
}
{
std.debug.print("\ndeferreds: {d}\n", .{self.script_manager.deferreds.len});
var n_ = self.script_manager.deferreds.first;
while (n_) |n| {
std.debug.print(" - {s} complete: {any}\n", .{ n.data.script.url, n.data.complete });
n_ = n.next;
}
}
const now = std.time.milliTimestamp();
{
std.debug.print("\nasyncs: {d}\n", .{self.script_manager.asyncs.len});
var n_ = self.script_manager.asyncs.first;
while (n_) |n| {
std.debug.print(" - {s} complete: {any}\n", .{ n.data.script.url, n.data.complete });
n_ = n.next;
}
}
{
std.debug.print("\nprimary schedule: {d}\n", .{self.scheduler.primary.count()});
var it = self.scheduler.primary.iterator();
while (it.next()) |task| {
std.debug.print(" - {s} complete: {any}\n", .{ task.name, task.ms - now });
}
}
{
std.debug.print("\nsecondary schedule: {d}\n", .{self.scheduler.secondary.count()});
var it = self.scheduler.secondary.iterator();
while (it.next()) |task| {
std.debug.print(" - {s} complete: {any}\n", .{ task.name, task.ms - now });
}
}
}
pub fn origin(self: *const Page, arena: Allocator) ![]const u8 {
var arr: std.ArrayListUnmanaged(u8) = .{};
try self.url.origin(arr.writer(arena));
return arr.items;
}
const RequestCookieOpts = struct {
is_http: bool = true,
is_navigation: bool = false,
};
pub fn requestCookie(self: *const Page, opts: RequestCookieOpts) HttpClient.RequestCookie {
return .{
.jar = self.cookie_jar,
.origin = &self.url.uri,
.is_http = opts.is_http,
.is_navigation = opts.is_navigation,
};
}
// spec reference: https://html.spec.whatwg.org/#document-lifecycle
pub fn navigate(self: *Page, request_url: []const u8, opts: NavigateOpts) !void {
if (self.mode != .pre) {
// it's possible for navigate to be called multiple times on the
// same page (via CDP). We want to reset the page between each call.
self.reset();
}
log.info(.http, "navigate", .{
.url = request_url,
.method = opts.method,
.reason = opts.reason,
.body = opts.body != null,
});
// if the url is about:blank, nothing to do.
if (std.mem.eql(u8, "about:blank", request_url)) {
const html_doc = try parser.documentHTMLParseFromStr("");
try self.setDocument(html_doc);
// We do not processHTMLDoc here as we know we don't have any scripts
// This assumption may be false when CDP Page.addScriptToEvaluateOnNewDocument is implemented
try HTMLDocument.documentIsComplete(self.window.document, self);
return;
}
const owned_url = try self.arena.dupeZ(u8, request_url);
self.url = try URL.parse(owned_url, null);
self.http_client.request(.{
.ctx = self,
.url = owned_url,
.method = opts.method,
.body = opts.body,
.cookie = self.requestCookie(.{ .is_navigation = true }),
.header_done_callback = pageHeaderDoneCallback,
.data_callback = pageDataCallback,
.done_callback = pageDoneCallback,
.error_callback = pageErrorCallback,
}) catch |err| {
log.err(.http, "navigate request", .{ .url = owned_url, .err = err });
return err;
};
self.session.browser.notification.dispatch(.page_navigate, &.{
.opts = opts,
.url = owned_url,
.timestamp = timestamp(),
});
}
pub fn setCurrentScript(self: *Page, script: ?*parser.Script) !void {
const html_doc = self.window.document;
try parser.documentHTMLSetCurrentScript(html_doc, script);
}
pub fn documentIsLoaded(self: *Page) void {
if (self.load_state != .parsing) {
// Ideally, documentIsLoaded would only be called once, but if a
// script is dynamically added from an async script after
// documentIsLoaded is already called, then ScriptManager will call
// it again.
return;
}
self.load_state = .load;
HTMLDocument.documentIsLoaded(self.window.document, self) catch |err| {
log.err(.browser, "document is loaded", .{ .err = err });
};
}
pub fn documentIsComplete(self: *Page) void {
if (self.load_state == .complete) {
// Ideally, documentIsComplete would only be called once, but with
// dynamic scripts, it can be hard to keep track of that. An async
// script could be evaluated AFTER Loaded and Complete and load its
// own non non-async script - which, upon completion, needs to check
// whether Laoded/Complete have already been called, which is what
// this guard is.
return;
}
// documentIsComplete could be called directly, without first calling
// documentIsLoaded, if there were _only_ async scripts
if (self.load_state == .parsing) {
self.documentIsLoaded();
}
self.load_state = .complete;
self._documentIsComplete() catch |err| {
log.err(.browser, "document is complete", .{ .err = err });
};
self.session.browser.notification.dispatch(.page_navigated, &.{
.url = self.url.raw,
.timestamp = timestamp(),
});
}
fn _documentIsComplete(self: *Page) !void {
try HTMLDocument.documentIsComplete(self.window.document, self);
// dispatch window.load event
const loadevt = try parser.eventCreate();
defer parser.eventDestroy(loadevt);
log.debug(.script_event, "dispatch event", .{ .type = "load", .source = "page" });
try parser.eventInit(loadevt, "load", .{});
_ = try parser.eventTargetDispatchEvent(
parser.toEventTarget(Window, &self.window),
loadevt,
);
}
fn pageHeaderDoneCallback(transfer: *HttpClient.Transfer) !void {
var self: *Page = @alignCast(@ptrCast(transfer.ctx));
// would be different than self.url in the case of a redirect
const header = &transfer.response_header.?;
const owned_url = try self.arena.dupe(u8, std.mem.span(header.url));
self.url = try URL.parse(owned_url, null);
log.debug(.http, "navigate header", .{
.url = self.url,
.status = header.status,
.content_type = header.contentType(),
});
}
fn pageDataCallback(transfer: *HttpClient.Transfer, data: []const u8) !void {
var self: *Page = @alignCast(@ptrCast(transfer.ctx));
if (self.mode == .pre) {
// we lazily do this, because we might need the first chunk of data
// to sniff the content type
const mime: Mime = blk: {
if (transfer.response_header.?.contentType()) |ct| {
break :blk try Mime.parse(ct);
}
break :blk Mime.sniff(data);
} orelse .unknown;
const is_html = mime.isHTML();
log.debug(.http, "navigate first chunk", .{ .html = is_html, .len = data.len });
if (is_html) {
self.mode = .{ .html = try parser.Parser.init(mime.charset orelse "UTF-8") };
} else {
self.mode = .{ .raw = .{} };
}
}
switch (self.mode) {
.html => |*p| try p.process(data),
.raw => |*buf| try buf.appendSlice(self.arena, data),
.pre => unreachable,
.parsed => unreachable,
.err => unreachable,
.raw_done => unreachable,
}
}
fn pageDoneCallback(ctx: *anyopaque) !void {
log.debug(.http, "navigate done", .{});
var self: *Page = @alignCast(@ptrCast(ctx));
self.clearTransferArena();
switch (self.mode) {
.raw => |buf| self.mode = .{ .raw_done = buf.items },
.html => |*p| {
const html_doc = p.html_doc;
p.deinit(); // don't need the parser anymore
self.mode = .{ .parsed = {} };
try self.setDocument(html_doc);
const doc = parser.documentHTMLToDocument(html_doc);
// we want to be notified of any dynamically added script tags
// so that we can load the script
parser.documentSetScriptAddedCallback(doc, self, scriptAddedCallback);
const document_element = (try parser.documentGetDocumentElement(doc)) orelse return error.DocumentElementError;
_ = try parser.eventTargetAddEventListener(
parser.toEventTarget(parser.Element, document_element),
"click",
&self.window_clicked_event_node,
false,
);
_ = try parser.eventTargetAddEventListener(
parser.toEventTarget(parser.Element, document_element),
"keydown",
&self.keydown_event_node,
false,
);
const root = parser.documentToNode(doc);
const walker = Walker{};
var next: ?*parser.Node = null;
while (try walker.get_next(root, next)) |n| {
next = n;
const node = next.?;
const tag = (try parser.nodeHTMLGetTagType(node)) orelse continue;
if (tag != .script) {
// ignore non-js script.
continue;
}
try self.script_manager.addFromElement(@ptrCast(node));
}
self.script_manager.staticScriptsDone();
if (self.script_manager.isDone()) {
// No scripts, or just inline scripts that were already processed
// we need to trigger this ourselves
self.documentIsComplete();
}
},
else => unreachable,
}
}
fn pageErrorCallback(ctx: *anyopaque, err: anyerror) void {
log.err(.http, "navigate failed", .{ .err = err });
var self: *Page = @alignCast(@ptrCast(ctx));
self.clearTransferArena();
switch (self.mode) {
.html => |*p| p.deinit(), // don't need the parser anymore
else => {},
}
self.mode = .{ .err = err };
}
// The transfer arena is useful and interesting, but has a weird lifetime.
// When we're transfering from one page to another (via delayed navigation)
// we need things in memory: like the URL that we're navigating to and
// optionally the body to POST. That cannot exist in the page.arena, because
// the page that we have is going to be destroyed and a new page is going
// to be created. If we used the page.arena, we'd wouldn't be able to reset
// it between navigations.
// So the transfer arena is meant to exist between a navigation event. It's
// freed when the main html navigation is complete, either in pageDoneCallback
// or pageErrorCallback. It needs to exist for this long because, if we set
// a body, CURLOPT_POSTFIELDS does not copy the body (it optionally can, but
// why would we want to) and requires the body to live until the transfer
// is complete.
fn clearTransferArena(self: *Page) void {
_ = self.session.browser.transfer_arena.reset(.{ .retain_with_limit = 4 * 1024 });
}
// extracted because this sis called from tests to set things up.
pub fn setDocument(self: *Page, html_doc: *parser.DocumentHTML) !void {
const doc = parser.documentHTMLToDocument(html_doc);
try parser.documentSetDocumentURI(doc, self.url.raw);
// TODO set the referrer to the document.
try self.window.replaceDocument(html_doc);
self.window.setStorageShelf(
try self.session.storage_shed.getOrPut(try self.origin(self.arena)),
);
try self.window.replaceLocation(.{ .url = try self.url.toWebApi(self.arena) });
}
pub const MouseEvent = struct {
x: i32,
y: i32,
type: Type,
const Type = enum {
pressed,
released,
};
};
pub fn mouseEvent(self: *Page, me: MouseEvent) !void {
if (me.type != .pressed) {
return;
}
const element = self.renderer.getElementAtPosition(me.x, me.y) orelse return;
const event = try parser.mouseEventCreate();
defer parser.mouseEventDestroy(event);
try parser.mouseEventInit(event, "click", .{
.bubbles = true,
.cancelable = true,
.x = me.x,
.y = me.y,
});
_ = try parser.elementDispatchEvent(element, @ptrCast(event));
}
fn windowClicked(node: *parser.EventNode, event: *parser.Event) void {
const self: *Page = @fieldParentPtr("window_clicked_event_node", node);
self._windowClicked(event) catch |err| {
log.err(.browser, "click handler error", .{ .err = err });
};
}
fn _windowClicked(self: *Page, event: *parser.Event) !void {
const target = (try parser.eventTarget(event)) orelse return;
const node = parser.eventTargetToNode(target);
const tag = (try parser.nodeHTMLGetTagType(node)) orelse return;
switch (tag) {
.a => {
const element: *parser.Element = @ptrCast(node);
const href = (try parser.elementGetAttribute(element, "href")) orelse return;
try self.navigateFromWebAPI(href, .{});
},
.input => {
const element: *parser.Element = @ptrCast(node);
const input_type = try parser.inputGetType(@ptrCast(element));
if (std.ascii.eqlIgnoreCase(input_type, "submit")) {
return self.elementSubmitForm(element);
}
},
.button => {
const element: *parser.Element = @ptrCast(node);
const button_type = try parser.buttonGetType(@ptrCast(element));
if (std.ascii.eqlIgnoreCase(button_type, "submit")) {
return self.elementSubmitForm(element);
}
if (std.ascii.eqlIgnoreCase(button_type, "reset")) {
if (try self.formForElement(element)) |form| {
return parser.formElementReset(form);
}
}
},
else => {},
}
}
pub const KeyboardEvent = struct {
type: Type,
key: []const u8,
code: []const u8,
alt: bool,
ctrl: bool,
meta: bool,
shift: bool,
const Type = enum {
keydown,
};
};
pub fn keyboardEvent(self: *Page, kbe: KeyboardEvent) !void {
if (kbe.type != .keydown) {
return;
}
const Document = @import("dom/document.zig").Document;
const element = (try Document.getActiveElement(@ptrCast(self.window.document), self)) orelse return;
const event = try parser.keyboardEventCreate();
defer parser.keyboardEventDestroy(event);
try parser.keyboardEventInit(event, "keydown", .{
.bubbles = true,
.cancelable = true,
.key = kbe.key,
.code = kbe.code,
.alt = kbe.alt,
.ctrl = kbe.ctrl,
.meta = kbe.meta,
.shift = kbe.shift,
});
_ = try parser.elementDispatchEvent(element, @ptrCast(event));
}
fn keydownCallback(node: *parser.EventNode, event: *parser.Event) void {
const self: *Page = @fieldParentPtr("keydown_event_node", node);
self._keydownCallback(event) catch |err| {
log.err(.browser, "keydown handler error", .{ .err = err });
};
}
fn _keydownCallback(self: *Page, event: *parser.Event) !void {
const target = (try parser.eventTarget(event)) orelse return;
const node = parser.eventTargetToNode(target);
const tag = (try parser.nodeHTMLGetTagType(node)) orelse return;
const kbe: *parser.KeyboardEvent = @ptrCast(event);
var new_key = try parser.keyboardEventGetKey(kbe);
if (std.mem.eql(u8, new_key, "Dead")) {
return;
}
switch (tag) {
.input => {
const element: *parser.Element = @ptrCast(node);
const input_type = try parser.inputGetType(@ptrCast(element));
if (std.mem.eql(u8, input_type, "text")) {
if (std.mem.eql(u8, new_key, "Enter")) {
const form = (try self.formForElement(element)) orelse return;
return self.submitForm(@ptrCast(form), null);
}
const value = try parser.inputGetValue(@ptrCast(element));
const new_value = try std.mem.concat(self.arena, u8, &.{ value, new_key });
try parser.inputSetValue(@ptrCast(element), new_value);
}
},
.textarea => {
const value = try parser.textareaGetValue(@ptrCast(node));
if (std.mem.eql(u8, new_key, "Enter")) {
new_key = "\n";
}
const new_value = try std.mem.concat(self.arena, u8, &.{ value, new_key });
try parser.textareaSetValue(@ptrCast(node), new_value);
},
else => {},
}
}
// We cannot navigate immediately as navigating will delete the DOM tree,
// which holds this event's node.
// As such we schedule the function to be called as soon as possible.
// The page.arena is safe to use here, but the transfer_arena exists
// specifically for this type of lifetime.
pub fn navigateFromWebAPI(self: *Page, url: []const u8, opts: NavigateOpts) !void {
const session = self.session;
if (session.queued_navigation != null) {
// It might seem like this should never happen. And it might not,
// BUT..consider the case where we have script like:
// top.location = X;
// top.location = Y;
// Will the 2nd top.location execute? You'd think not, since,
// when we're in this function for the 1st, we'll call:
// session.executor.terminateExecution();
// But, this doesn't seem guaranteed to stop on the current line.
// My best guess is that v8 groups executes in chunks (how they are
// chunked, I can't guess) and always executes them together.
return;
}
log.debug(.browser, "delayed navigation", .{
.url = url,
.reason = opts.reason,
});
self.delayed_navigation = true;
session.queued_navigation = .{
.opts = opts,
.url = try URL.stitch(session.transfer_arena, url, self.url.raw, .{ .alloc = .always }),
};
self.http_client.abort();
// In v8, this throws an exception which JS code cannot catch.
session.executor.terminateExecution();
}
pub fn getOrCreateNodeState(self: *Page, node: *parser.Node) !*State {
if (self.getNodeState(node)) |wrap| {
return wrap;
}
const state = try self.state_pool.create();
state.* = .{};
parser.nodeSetEmbedderData(node, state);
return state;
}
pub fn getNodeState(_: *const Page, node: *parser.Node) ?*State {
if (parser.nodeGetEmbedderData(node)) |state| {
return @alignCast(@ptrCast(state));
}
return null;
}
pub fn submitForm(self: *Page, form: *parser.Form, submitter: ?*parser.ElementHTML) !void {
const FormData = @import("xhr/form_data.zig").FormData;
const transfer_arena = self.session.transfer_arena;
var form_data = try FormData.fromForm(form, submitter, self);
const encoding = try parser.elementGetAttribute(@alignCast(@ptrCast(form)), "enctype");
var buf: std.ArrayListUnmanaged(u8) = .empty;
try form_data.write(encoding, buf.writer(transfer_arena));
const method = try parser.elementGetAttribute(@alignCast(@ptrCast(form)), "method") orelse "";
var action = try parser.elementGetAttribute(@alignCast(@ptrCast(form)), "action") orelse self.url.raw;
var opts = NavigateOpts{
.reason = .form,
};
if (std.ascii.eqlIgnoreCase(method, "post")) {
opts.method = .POST;
opts.body = buf.items;
} else {
action = try URL.concatQueryString(transfer_arena, action, buf.items);
}
try self.navigateFromWebAPI(action, opts);
}
pub fn isNodeAttached(self: *const Page, node: *parser.Node) !bool {
const root = parser.documentToNode(parser.documentHTMLToDocument(self.window.document));
return root == try parser.nodeGetRootNode(node);
}
fn elementSubmitForm(self: *Page, element: *parser.Element) !void {
const form = (try self.formForElement(element)) orelse return;
return self.submitForm(@ptrCast(form), @ptrCast(element));
}
fn formForElement(self: *Page, element: *parser.Element) !?*parser.Form {
if (try parser.elementGetAttribute(element, "disabled") != null) {
return null;
}
if (try parser.elementGetAttribute(element, "form")) |form_id| {
const document = parser.documentHTMLToDocument(self.window.document);
const form_element = try parser.documentGetElementById(document, form_id) orelse return null;
if (try parser.elementTag(@ptrCast(form_element)) == .form) {
return @ptrCast(form_element);
}
return null;
}
const Element = @import("dom/element.zig").Element;
const form = (try Element._closest(element, "form", self)) orelse return null;
return @ptrCast(form);
}
pub fn stackTrace(self: *Page) !?[]const u8 {
if (comptime builtin.mode == .Debug) {
return self.main_context.stackTrace();
}
return null;
}
};
pub const NavigateReason = enum {
anchor,
address_bar,
form,
script,
};
pub const NavigateOpts = struct {
cdp_id: ?i64 = null,
reason: NavigateReason = .address_bar,
method: HttpClient.Method = .GET,
body: ?[]const u8 = null,
};
fn timestamp() u32 {
const ts = std.posix.clock_gettime(std.posix.CLOCK.MONOTONIC) catch unreachable;
return @intCast(ts.sec);
}
// A callback from libdom whenever a script tag is added to the DOM.
// element is guaranteed to be a script element.
// The script tag might not have a src. It might be any attribute, like
// `nomodule`, `defer` and `async`. `Script.init` will return null on `nomodule`
// so that's handled. And because we're only executing the inline <script> tags
// after the document is loaded, it's ok to execute any async and defer scripts
// immediately.
pub export fn scriptAddedCallback(ctx: ?*anyopaque, element: ?*parser.Element) callconv(.C) void {
const self: *Page = @alignCast(@ptrCast(ctx.?));
if (self.delayed_navigation) {
// if we're planning on navigating to another page, don't run this script
return;
}
self.script_manager.addFromElement(element.?) catch |err| {
log.warn(.browser, "dynamcic script", .{ .err = err });
};
}