Compare commits

..

11 Commits

Author SHA1 Message Date
Karl Seguin
492fd86bad Expand the lifetime of the XHR reference
We need to take the self-reference to the XHR object as soon as the request is
made. Previously, we were waiting until we got the start callback, but v8 could
(and does) drop the reference before that happens. Unfortunately, that means
we can no longer use _transfer == null to tell if we own a reference or not, so
a new boolean was added.
2026-03-31 16:46:31 +08:00
Adrià Arrufat
55666ab7cd Merge pull request #2049 from lightpanda-io/refactor/getNodeDetails-param-order
Some checks are pending
e2e-test / zig build release (push) Waiting to run
e2e-test / demo-scripts (push) Blocked by required conditions
e2e-test / wba-demo-scripts (push) Blocked by required conditions
e2e-test / wba-test (push) Blocked by required conditions
e2e-test / cdp-and-hyperfine-bench (push) Blocked by required conditions
e2e-test / perf-fmt (push) Blocked by required conditions
e2e-test / browser fetch (push) Blocked by required conditions
zig-test / zig fmt (push) Waiting to run
zig-test / zig test using v8 in debug mode (push) Waiting to run
zig-test / zig test (push) Waiting to run
zig-test / perf-fmt (push) Blocked by required conditions
SemanticTree: reorder getNodeDetails params
2026-03-31 07:58:28 +02:00
Adrià Arrufat
008235222b SemanticTree: reorder getNodeDetails params 2026-03-31 07:29:33 +02:00
Adrià Arrufat
b03dbc77ab Merge pull request #2048 from lightpanda-io/feature/semantic-tree-checked-state
SemanticTree: add checked state to node data and output
2026-03-31 07:08:36 +02:00
Adrià Arrufat
fc057a3bb3 SemanticTree: add checked state to node data and output 2026-03-31 06:43:01 +02:00
Adrià Arrufat
16f17ead9a Merge pull request #2045 from lightpanda-io/semantic-tree-node-details
SemanticTree: Add nodeDetails tool
2026-03-31 05:28:24 +02:00
Adrià Arrufat
367d20d39f SemanticTree: simplify lp.String.wrap calls 2026-03-31 05:20:32 +02:00
Karl Seguin
a6a8752787 Merge pull request #2032 from lightpanda-io/cdp_navigation_event_order
Some checks failed
e2e-test / zig build release (push) Has been cancelled
e2e-test / demo-scripts (push) Has been cancelled
e2e-test / wba-demo-scripts (push) Has been cancelled
e2e-test / wba-test (push) Has been cancelled
e2e-test / cdp-and-hyperfine-bench (push) Has been cancelled
e2e-test / perf-fmt (push) Has been cancelled
e2e-test / browser fetch (push) Has been cancelled
zig-test / zig fmt (push) Has been cancelled
zig-test / zig test using v8 in debug mode (push) Has been cancelled
zig-test / zig test (push) Has been cancelled
zig-test / perf-fmt (push) Has been cancelled
Improve/Fix CDP navigation event order
2026-03-31 07:12:14 +08:00
Karl Seguin
568fa25add Remove DOMContentLoaded and Loaded events from page_navigated
These were moved to their own distinct events, and should have been removed from
here.
2026-03-31 06:56:01 +08:00
Karl Seguin
752184b12b Improve/Fix CDP navigation event order
These changes all better align with chrome's event ordering/timing.

There are two big changes. The first is that our internal page_navigated event,
which is kind of our heavy hitter, is sent once the header is received as
opposed to (much later) on document load. The main goal of this internal event
is to trigger the "Page.frameNavigated" CDP event which is meant to happen
once the URL is committed, which _is_ on header response.

To accommodate this earlier trigger, new explicit events for DOMContentLoaded
and load have be added.

This drastically changes the flow of events as things go from:
Start Page Navigation
Response Received
  Start Frame Navigation
  Response Received
  End Frame Navigation
End Page Navigation
context clear + reset
DOMContentLoaded
Loaded

TO:
Start Page Navigation
Response Received
End Page Navigation
context clear + reset
Start Frame Navigation
Response Received
End Frame Navigation
DOMContentLoaded
Loaded

So not only does it remove the nesting, but it ensures that the context are
cleared and reset once the main page's navigation is locked in, and before any
frame is created.
2026-03-31 06:56:00 +08:00
Adrià Arrufat
9c8fe9b20f SemanticTree: Add nodeDetails tool
Adds a tool to retrieve detailed node metadata and updates the
semantic tree to track and display the disabled state of elements.
2026-03-30 16:38:23 +02:00
21 changed files with 560 additions and 1256 deletions

View File

@@ -55,7 +55,7 @@ pub fn init(allocator: Allocator, config: *const Config) !*App {
.arena_pool = undefined,
};
app.network = try Network.init(allocator, app, config);
app.network = try Network.init(allocator, config);
errdefer app.network.deinit();
app.platform = try Platform.init();

View File

@@ -157,13 +157,6 @@ pub fn userAgentSuffix(self: *const Config) ?[]const u8 {
};
}
pub fn cacheDir(self: *const Config) ?[]const u8 {
return switch (self.mode) {
inline .serve, .fetch, .mcp => |opts| opts.common.cache_dir,
else => null,
};
}
pub fn cdpTimeout(self: *const Config) usize {
return switch (self.mode) {
.serve => |opts| if (opts.timeout > 604_800) 604_800_000 else @as(usize, opts.timeout) * 1000,
@@ -273,7 +266,6 @@ pub const Common = struct {
log_format: ?log.Format = null,
log_filter_scopes: ?[]log.Scope = null,
user_agent_suffix: ?[]const u8 = null,
cache_dir: ?[]const u8 = null,
web_bot_auth_key_file: ?[]const u8 = null,
web_bot_auth_keyid: ?[]const u8 = null,
@@ -1008,14 +1000,5 @@ fn parseCommonArg(
return true;
}
if (std.mem.eql(u8, "--cache_dir", opt)) {
const str = args.next() orelse {
log.fatal(.app, "missing argument value", .{ .arg = "--cache_dir" });
return error.InvalidArgument;
};
common.cache_dir = try allocator.dupe(u8, str);
return true;
}
return false;
}

View File

@@ -74,6 +74,8 @@ const EventListeners = struct {
page_network_idle: List = .{},
page_network_almost_idle: List = .{},
page_frame_created: List = .{},
page_dom_content_loaded: List = .{},
page_loaded: List = .{},
http_request_fail: List = .{},
http_request_start: List = .{},
http_request_intercept: List = .{},
@@ -91,6 +93,8 @@ const Events = union(enum) {
page_network_idle: *const PageNetworkIdle,
page_network_almost_idle: *const PageNetworkAlmostIdle,
page_frame_created: *const PageFrameCreated,
page_dom_content_loaded: *const PageDOMContentLoaded,
page_loaded: *const PageLoaded,
http_request_fail: *const RequestFail,
http_request_start: *const RequestStart,
http_request_intercept: *const RequestIntercept,
@@ -137,6 +141,18 @@ pub const PageFrameCreated = struct {
timestamp: u64,
};
pub const PageDOMContentLoaded = struct {
req_id: u32,
frame_id: u32,
timestamp: u64,
};
pub const PageLoaded = struct {
req_id: u32,
frame_id: u32,
timestamp: u64,
};
pub const RequestStart = struct {
transfer: *Transfer,
};

View File

@@ -95,9 +95,11 @@ const NodeData = struct {
name: ?[]const u8,
value: ?[]const u8,
options: ?[]OptionData = null,
checked: ?bool = null,
xpath: []const u8,
is_interactive: bool,
node_name: []const u8,
interactive: bool,
disabled: bool,
tag_name: []const u8,
};
const WalkContext = struct {
@@ -148,16 +150,21 @@ fn walk(
const role = try axn.getRole();
var is_interactive = false;
var is_disabled = false;
var value: ?[]const u8 = null;
var options: ?[]OptionData = null;
var node_name: []const u8 = "text";
var checked: ?bool = null;
var tag_name: []const u8 = "text";
if (node.is(Element)) |el| {
node_name = el.getTagNameLower();
tag_name = el.getTagNameLower();
if (el.is(Element.Html.Input)) |input| {
value = input.getValue();
if (el.getAttributeSafe(comptime lp.String.wrap("list"))) |list_id| {
if (input._input_type == .checkbox or input._input_type == .radio) {
checked = input.getChecked();
}
if (el.getAttributeSafe(comptime .wrap("list"))) |list_id| {
options = try extractDataListOptions(list_id, self.page, self.arena);
}
} else if (el.is(Element.Html.TextArea)) |textarea| {
@@ -172,8 +179,10 @@ fn walk(
is_interactive = true;
}
}
is_disabled = el.isDisabled();
} else if (node._type == .document or node._type == .document_fragment) {
node_name = "root";
tag_name = "root";
}
const initial_xpath_len = ctx.xpath_buffer.items.len;
@@ -234,9 +243,11 @@ fn walk(
.name = name,
.value = value,
.options = options,
.checked = checked,
.xpath = xpath,
.is_interactive = is_interactive,
.node_name = node_name,
.interactive = is_interactive,
.disabled = is_disabled,
.tag_name = tag_name,
};
if (should_visit) {
@@ -335,7 +346,7 @@ const JsonVisitor = struct {
try self.jw.write(data.id);
try self.jw.objectField("nodeName");
try self.jw.write(data.node_name);
try self.jw.write(data.tag_name);
try self.jw.objectField("xpath");
try self.jw.write(data.xpath);
@@ -345,7 +356,12 @@ const JsonVisitor = struct {
try self.jw.write(1);
try self.jw.objectField("isInteractive");
try self.jw.write(data.is_interactive);
try self.jw.write(data.interactive);
if (data.disabled) {
try self.jw.objectField("isDisabled");
try self.jw.write(true);
}
try self.jw.objectField("role");
try self.jw.write(data.role);
@@ -373,6 +389,11 @@ const JsonVisitor = struct {
try self.jw.endObject();
}
if (data.checked) |checked| {
try self.jw.objectField("checked");
try self.jw.write(checked);
}
if (data.options) |options| {
try self.jw.objectField("options");
try self.jw.beginArray();
@@ -459,6 +480,9 @@ const TextVisitor = struct {
const is_text_only = std.mem.eql(u8, data.role, "StaticText") or std.mem.eql(u8, data.role, "none") or std.mem.eql(u8, data.role, "generic");
try self.writer.print("{d}", .{data.id});
if (data.interactive) {
try self.writer.writeAll(if (data.disabled) " [i:disabled]" else " [i]");
}
if (!is_text_only) {
try self.writer.print(" {s}", .{data.role});
}
@@ -472,6 +496,14 @@ const TextVisitor = struct {
}
}
if (data.checked) |c| {
if (c) {
try self.writer.writeAll(" [checked]");
} else {
try self.writer.writeAll(" [unchecked]");
}
}
if (data.options) |options| {
try self.writer.writeAll(" options=[");
for (options, 0..) |opt, i| {
@@ -509,6 +541,182 @@ const TextVisitor = struct {
}
};
pub const NodeDetails = struct {
backendNodeId: CDPNode.Id,
tag_name: []const u8,
role: []const u8,
name: ?[]const u8,
interactive: bool,
disabled: bool,
value: ?[]const u8 = null,
input_type: ?[]const u8 = null,
placeholder: ?[]const u8 = null,
href: ?[]const u8 = null,
id: ?[]const u8 = null,
class: ?[]const u8 = null,
checked: ?bool = null,
options: ?[]OptionData = null,
pub fn jsonStringify(self: *const NodeDetails, jw: anytype) !void {
try jw.beginObject();
try jw.objectField("backendNodeId");
try jw.write(self.backendNodeId);
try jw.objectField("tagName");
try jw.write(self.tag_name);
try jw.objectField("role");
try jw.write(self.role);
if (self.name) |n| {
try jw.objectField("name");
try jw.write(n);
}
try jw.objectField("isInteractive");
try jw.write(self.interactive);
if (self.disabled) {
try jw.objectField("isDisabled");
try jw.write(true);
}
if (self.value) |v| {
try jw.objectField("value");
try jw.write(v);
}
if (self.input_type) |v| {
try jw.objectField("inputType");
try jw.write(v);
}
if (self.placeholder) |v| {
try jw.objectField("placeholder");
try jw.write(v);
}
if (self.href) |v| {
try jw.objectField("href");
try jw.write(v);
}
if (self.id) |v| {
try jw.objectField("id");
try jw.write(v);
}
if (self.class) |v| {
try jw.objectField("class");
try jw.write(v);
}
if (self.checked) |c| {
try jw.objectField("checked");
try jw.write(c);
}
if (self.options) |opts| {
try jw.objectField("options");
try jw.beginArray();
for (opts) |opt| {
try jw.beginObject();
try jw.objectField("value");
try jw.write(opt.value);
try jw.objectField("text");
try jw.write(opt.text);
if (opt.selected) {
try jw.objectField("selected");
try jw.write(true);
}
try jw.endObject();
}
try jw.endArray();
}
try jw.endObject();
}
};
pub fn getNodeDetails(
arena: std.mem.Allocator,
node: *Node,
registry: *CDPNode.Registry,
page: *Page,
) !NodeDetails {
const cdp_node = try registry.register(node);
const axn = AXNode.fromNode(node);
const role = try axn.getRole();
const name = try axn.getName(page, arena);
var is_interactive = false;
var is_disabled = false;
var tag_name: []const u8 = "text";
var value: ?[]const u8 = null;
var input_type: ?[]const u8 = null;
var placeholder: ?[]const u8 = null;
var href: ?[]const u8 = null;
var id_attr: ?[]const u8 = null;
var class_attr: ?[]const u8 = null;
var checked: ?bool = null;
var options: ?[]OptionData = null;
if (node.is(Element)) |el| {
tag_name = el.getTagNameLower();
is_disabled = el.isDisabled();
id_attr = el.getAttributeSafe(comptime .wrap("id"));
class_attr = el.getAttributeSafe(comptime .wrap("class"));
placeholder = el.getAttributeSafe(comptime .wrap("placeholder"));
if (el.getAttributeSafe(comptime .wrap("href"))) |h| {
const URL = lp.URL;
href = URL.resolve(arena, page.base(), h, .{ .encode = true }) catch h;
}
if (el.is(Element.Html.Input)) |input| {
value = input.getValue();
input_type = input._input_type.toString();
if (input._input_type == .checkbox or input._input_type == .radio) {
checked = input.getChecked();
}
if (el.getAttributeSafe(comptime .wrap("list"))) |list_id| {
options = try extractDataListOptions(list_id, page, arena);
}
} else if (el.is(Element.Html.TextArea)) |textarea| {
value = textarea.getValue();
} else if (el.is(Element.Html.Select)) |select| {
value = select.getValue(page);
options = try extractSelectOptions(el.asNode(), page, arena);
}
if (el.is(Element.Html)) |html_el| {
const listener_targets = try interactive.buildListenerTargetMap(page, arena);
var pointer_events_cache: Element.PointerEventsCache = .empty;
if (interactive.classifyInteractivity(page, el, html_el, listener_targets, &pointer_events_cache) != null) {
is_interactive = true;
}
}
}
return .{
.backendNodeId = cdp_node.id,
.tag_name = tag_name,
.role = role,
.name = name,
.interactive = is_interactive,
.disabled = is_disabled,
.value = value,
.input_type = input_type,
.placeholder = placeholder,
.href = href,
.id = id_attr,
.class = class_attr,
.checked = checked,
.options = options,
};
}
const testing = @import("testing.zig");
test "SemanticTree backendDOMNodeId" {

View File

@@ -32,9 +32,6 @@ const CookieJar = @import("webapi/storage/Cookie.zig").Jar;
const http = @import("../network/http.zig");
const Runtime = @import("../network/Runtime.zig");
const Robots = @import("../network/Robots.zig");
const Cache = @import("../network/cache/Cache.zig");
const CacheMetadata = Cache.CachedMetadata;
const CachedResponse = Cache.CachedResponse;
const IS_DEBUG = builtin.mode == .Debug;
@@ -314,81 +311,7 @@ pub fn request(self: *Client, req: Request) !void {
return self.fetchRobotsThenProcessRequest(robots_url, req);
}
fn serveFromCache(req: Request, cached: *const CachedResponse) !void {
const response = Response.fromCached(req.ctx, cached);
defer switch (cached.data) {
.buffer => |_| {},
.file => |f| f.file.close(),
};
if (req.start_callback) |cb| {
try cb(response);
}
const proceed = try req.header_callback(response);
if (!proceed) {
req.error_callback(req.ctx, error.Abort);
return;
}
switch (cached.data) {
.buffer => |data| {
if (data.len > 0) {
try req.data_callback(response, data);
}
},
.file => |f| {
const file = f.file;
var buf: [1024]u8 = undefined;
var file_reader = file.reader(&buf);
try file_reader.seekTo(f.offset);
const reader = &file_reader.interface;
var read_buf: [1024]u8 = undefined;
var remaining = f.len;
while (remaining > 0) {
const read_len = @min(read_buf.len, remaining);
const n = try reader.readSliceShort(read_buf[0..read_len]);
if (n == 0) break;
remaining -= n;
try req.data_callback(response, read_buf[0..n]);
}
},
}
try req.done_callback(req.ctx);
}
fn processRequest(self: *Client, req: Request) !void {
if (self.network.cache) |*cache| {
if (req.method == .GET) {
const arena = try self.network.app.arena_pool.acquire(.{ .debug = "HttpClient.processRequest.cache" });
defer self.network.app.arena_pool.release(arena);
var iter = req.headers.iterator();
const req_header_list = try iter.collect(arena);
if (cache.get(arena, .{
.url = req.url,
.timestamp = std.time.timestamp(),
.request_headers = req_header_list.items,
})) |cached| {
log.debug(.browser, "http.cache.get", .{
.url = req.url,
.found = true,
.metadata = cached.metadata,
});
defer req.headers.deinit();
return serveFromCache(req, &cached);
} else {
log.debug(.browser, "http.cache.get", .{ .url = req.url, .found = false });
}
}
}
const transfer = try self.makeTransfer(req);
transfer.req.notification.dispatch(.http_request_start, &.{ .transfer = transfer });
@@ -476,10 +399,8 @@ fn fetchRobotsThenProcessRequest(self: *Client, robots_url: [:0]const u8, req: R
try entry.value_ptr.append(self.allocator, req);
}
fn robotsHeaderCallback(response: Response) !bool {
const ctx: *RobotsRequestContext = @ptrCast(@alignCast(response.ctx));
// Robots callbacks only happen on real live requests.
const transfer = response.inner.transfer;
fn robotsHeaderCallback(transfer: *Transfer) !bool {
const ctx: *RobotsRequestContext = @ptrCast(@alignCast(transfer.ctx));
if (transfer.response_header) |hdr| {
log.debug(.browser, "robots status", .{ .status = hdr.status, .robots_url = ctx.robots_url });
@@ -493,8 +414,8 @@ fn robotsHeaderCallback(response: Response) !bool {
return true;
}
fn robotsDataCallback(response: Response, data: []const u8) !void {
const ctx: *RobotsRequestContext = @ptrCast(@alignCast(response.ctx));
fn robotsDataCallback(transfer: *Transfer, data: []const u8) !void {
const ctx: *RobotsRequestContext = @ptrCast(@alignCast(transfer.ctx));
try ctx.buffer.appendSlice(ctx.client.allocator, data);
}
@@ -713,43 +634,13 @@ fn makeTransfer(self: *Client, req: Request) !*Transfer {
.id = id,
.url = req.url,
.req = req,
.ctx = req.ctx,
.client = self,
.max_response_size = self.network.config.httpMaxResponseSize(),
};
return transfer;
}
fn requestFailed(transfer: *Transfer, err: anyerror, comptime execute_callback: bool) void {
if (transfer._notified_fail) {
// we can force a failed request within a callback, which will eventually
// result in this being called again in the more general loop. We do this
// because we can raise a more specific error inside a callback in some cases
return;
}
transfer._notified_fail = true;
transfer.req.notification.dispatch(.http_request_fail, &.{
.transfer = transfer,
.err = err,
});
if (execute_callback) {
transfer.req.error_callback(transfer.req.ctx, err);
} else if (transfer.req.shutdown_callback) |cb| {
cb(transfer.req.ctx);
}
}
// Same restriction as changeProxy. Should be ok since this is only called on
// BrowserContext deinit.
pub fn restoreOriginalProxy(self: *Client) !void {
try self.ensureNoActiveConnection();
self.http_proxy = self.network.config.httpProxy();
self.use_proxy = self.http_proxy != null;
}
fn makeRequest(self: *Client, conn: *http.Connection, transfer: *Transfer) anyerror!void {
{
// Reset per-response state for retries (auth challenge, queue).
@@ -783,7 +674,7 @@ fn makeRequest(self: *Client, conn: *http.Connection, transfer: *Transfer) anyer
self.active += 1;
if (transfer.req.start_callback) |cb| {
cb(Response.fromTransfer(transfer)) catch |err| {
cb(transfer) catch |err| {
transfer.deinit();
return err;
};
@@ -851,10 +742,7 @@ fn processOneMessage(self: *Client, msg: http.Handles.MultiMessage, transfer: *T
// TODO give a way to configure the number of auth retries.
if (transfer._auth_challenge != null and transfer._tries < 10) {
var wait_for_interception = false;
transfer.req.notification.dispatch(
.http_request_auth_required,
&.{ .transfer = transfer, .wait_for_interception = &wait_for_interception },
);
transfer.req.notification.dispatch(.http_request_auth_required, &.{ .transfer = transfer, .wait_for_interception = &wait_for_interception });
if (wait_for_interception) {
self.intercepted += 1;
if (comptime IS_DEBUG) {
@@ -953,11 +841,10 @@ fn processOneMessage(self: *Client, msg: http.Handles.MultiMessage, transfer: *T
}
}
const body = transfer._stream_buffer.items;
// Replay buffered body through user's data_callback.
if (transfer._stream_buffer.items.len > 0) {
try transfer.req.data_callback(Response.fromTransfer(transfer), body);
const body = transfer._stream_buffer.items;
try transfer.req.data_callback(transfer, body);
transfer.req.notification.dispatch(.http_response_data, &.{
.data = body,
@@ -974,21 +861,7 @@ fn processOneMessage(self: *Client, msg: http.Handles.MultiMessage, transfer: *T
// will load more resources.
transfer.releaseConn();
try transfer.req.done_callback(transfer.req.ctx);
if (transfer._pending_cache_metadata) |metadata| {
const cache = &self.network.cache.?;
// TODO: Support Vary Keying
const cache_key = transfer.req.url;
log.debug(.browser, "http cache", .{ .key = cache_key, .metadata = metadata });
cache.put(metadata, body) catch |err| {
log.warn(.http, "cache put failed", .{ .err = err });
};
log.debug(.browser, "http.cache.put", .{ .url = transfer.req.url });
}
try transfer.req.done_callback(transfer.ctx);
transfer.req.notification.dispatch(.http_request_done, &.{
.transfer = transfer,
});
@@ -1066,9 +939,9 @@ pub const Request = struct {
// arbitrary data that can be associated with this request
ctx: *anyopaque = undefined,
start_callback: ?*const fn (response: Response) anyerror!void = null,
header_callback: *const fn (response: Response) anyerror!bool,
data_callback: *const fn (response: Response, data: []const u8) anyerror!void,
start_callback: ?*const fn (transfer: *Transfer) anyerror!void = null,
header_callback: *const fn (transfer: *Transfer) anyerror!bool,
data_callback: *const fn (transfer: *Transfer, data: []const u8) anyerror!void,
done_callback: *const fn (ctx: *anyopaque) anyerror!void,
error_callback: *const fn (ctx: *anyopaque, err: anyerror) void,
shutdown_callback: ?*const fn (ctx: *anyopaque) void = null,
@@ -1094,84 +967,16 @@ pub const Request = struct {
};
};
pub const Response = struct {
ctx: *anyopaque,
inner: union(enum) {
transfer: *Transfer,
cached: *const CachedResponse,
},
pub fn fromTransfer(transfer: *Transfer) Response {
return .{ .ctx = transfer.req.ctx, .inner = .{ .transfer = transfer } };
}
pub fn fromCached(ctx: *anyopaque, resp: *const CachedResponse) Response {
return .{ .ctx = ctx, .inner = .{ .cached = resp } };
}
pub fn status(self: Response) ?u16 {
return switch (self.inner) {
.transfer => |t| if (t.response_header) |rh| rh.status else null,
.cached => |c| c.metadata.status,
};
}
pub fn contentType(self: Response) ?[]const u8 {
return switch (self.inner) {
.transfer => |t| if (t.response_header) |*rh| rh.contentType() else null,
.cached => |c| c.metadata.content_type,
};
}
pub fn contentLength(self: Response) ?u32 {
return switch (self.inner) {
.transfer => |t| t.getContentLength(),
.cached => |c| switch (c.data) {
.buffer => |buf| @intCast(buf.len),
.file => |f| @intCast(f.len),
},
};
}
pub fn redirectCount(self: Response) ?u32 {
return switch (self.inner) {
.transfer => |t| if (t.response_header) |rh| rh.redirect_count else null,
.cached => 0,
};
}
pub fn url(self: Response) [:0]const u8 {
return switch (self.inner) {
.transfer => |t| t.url,
.cached => |c| c.metadata.url,
};
}
pub fn headerIterator(self: Response) HeaderIterator {
return switch (self.inner) {
.transfer => |t| t.responseHeaderIterator(),
.cached => |c| HeaderIterator{ .list = .{ .list = c.metadata.headers } },
};
}
pub fn abort(self: Response, err: anyerror) void {
switch (self.inner) {
.transfer => |t| t.abort(err),
.cached => {},
}
}
};
pub const Transfer = struct {
arena: ArenaAllocator,
id: u32 = 0,
req: Request,
url: [:0]const u8,
ctx: *anyopaque, // copied from req.ctx to make it easier for callback handlers
client: *Client,
// total bytes received in the response, including the response status line,
// the headers, and the [encoded] body.
bytes_received: usize = 0,
_pending_cache_metadata: ?CacheMetadata = null,
aborted: bool = false,
@@ -1260,7 +1065,7 @@ pub const Transfer = struct {
// as abort (doesn't send a notification, doesn't invoke an error callback)
fn kill(self: *Transfer) void {
if (self.req.shutdown_callback) |cb| {
cb(self.req.ctx);
cb(self.ctx);
}
if (self._performing or self.client.performing) {
@@ -1296,7 +1101,7 @@ pub const Transfer = struct {
});
if (execute_callback) {
self.req.error_callback(self.req.ctx, err);
self.req.error_callback(self.ctx, err);
} else if (self.req.shutdown_callback) |cb| {
cb(self.ctx);
}
@@ -1543,64 +1348,15 @@ pub const Transfer = struct {
}
}
const proceed = transfer.req.header_callback(Response.fromTransfer(transfer)) catch |err| {
log.err(.http, "header_callback", .{ .err = err, .req = transfer });
return err;
};
if (transfer.client.network.cache != null and transfer.req.method == .GET) {
const rh = &transfer.response_header.?;
const allocator = transfer.arena.allocator();
const vary = if (conn.getResponseHeader("vary", 0)) |h| h.value else null;
const maybe_cm = try Cache.tryCache(
allocator,
std.time.timestamp(),
transfer.url,
rh.status,
rh.contentType(),
if (conn.getResponseHeader("cache-control", 0)) |h| h.value else null,
vary,
if (conn.getResponseHeader("age", 0)) |h| h.value else null,
conn.getResponseHeader("set-cookie", 0) != null,
conn.getResponseHeader("authorization", 0) != null,
);
if (maybe_cm) |cm| {
transfer._pending_cache_metadata = cm;
var iter = transfer.responseHeaderIterator();
var header_list = try iter.collect(allocator);
const end_of_response = header_list.items.len;
transfer._pending_cache_metadata.?.headers = header_list.items[0..end_of_response];
if (vary) |vary_str| {
var req_it = transfer.req.headers.iterator();
while (req_it.next()) |hdr| {
var vary_iter = std.mem.splitScalar(u8, vary_str, ',');
while (vary_iter.next()) |part| {
const name = std.mem.trim(u8, part, &std.ascii.whitespace);
if (std.ascii.eqlIgnoreCase(hdr.name, name)) {
try header_list.append(allocator, .{
.name = try allocator.dupe(u8, hdr.name),
.value = try allocator.dupe(u8, hdr.value),
});
}
}
}
transfer._pending_cache_metadata.?.vary_headers = header_list.items[end_of_response..];
}
}
}
transfer.req.notification.dispatch(.http_response_header_done, &.{
.transfer = transfer,
});
const proceed = transfer.req.header_callback(transfer) catch |err| {
log.err(.http, "header_callback", .{ .err = err, .req = transfer });
return err;
};
return proceed and transfer.aborted == false;
}
@@ -1699,7 +1455,7 @@ pub const Transfer = struct {
fn _fulfill(transfer: *Transfer, status: u16, headers: []const http.Header, body: ?[]const u8) !void {
const req = &transfer.req;
if (req.start_callback) |cb| {
try cb(Response.fromTransfer(transfer));
try cb(transfer);
}
transfer.response_header = .{
@@ -1718,13 +1474,13 @@ pub const Transfer = struct {
}
lp.assert(transfer._header_done_called == false, "Transfer.fulfill header_done_called", .{});
if (try req.header_callback(Response.fromTransfer(transfer)) == false) {
if (try req.header_callback(transfer) == false) {
transfer.abort(error.Abort);
return;
}
if (body) |b| {
try req.data_callback(Response.fromTransfer(transfer), b);
try req.data_callback(transfer, b);
}
try req.done_callback(req.ctx);
@@ -1761,10 +1517,10 @@ pub const Transfer = struct {
};
const Noop = struct {
fn headerCallback(_: Response) !bool {
fn headerCallback(_: *Transfer) !bool {
return true;
}
fn dataCallback(_: Response, _: []const u8) !void {}
fn dataCallback(_: *Transfer, _: []const u8) !void {}
fn doneCallback(_: *anyopaque) !void {}
fn errorCallback(_: *anyopaque, _: anyerror) void {}
};

View File

@@ -27,9 +27,6 @@ charset: [41]u8 = default_charset,
charset_len: usize = default_charset_len,
is_default_charset: bool = true,
type_buf: [127]u8 = @splat(0),
sub_type_buf: [127]u8 = @splat(0),
/// String "UTF-8" continued by null characters.
const default_charset = .{ 'U', 'T', 'F', '-', '8' } ++ .{0} ** 36;
const default_charset_len = 5;
@@ -64,10 +61,7 @@ pub const ContentType = union(ContentTypeEnum) {
image_webp: void,
application_json: void,
unknown: void,
other: struct {
type: []const u8,
sub_type: []const u8,
},
other: struct { type: []const u8, sub_type: []const u8 },
};
pub fn contentTypeString(mime: *const Mime) []const u8 {
@@ -118,18 +112,17 @@ fn parseCharset(value: []const u8) error{ CharsetTooBig, Invalid }![]const u8 {
return value;
}
pub fn parse(input: []const u8) !Mime {
pub fn parse(input: []u8) !Mime {
if (input.len > 255) {
return error.TooBig;
}
var buf: [255]u8 = undefined;
const normalized = std.ascii.lowerString(&buf, std.mem.trim(u8, input, &std.ascii.whitespace));
// Zig's trim API is broken. The return type is always `[]const u8`,
// even if the input type is `[]u8`. @constCast is safe here.
var normalized = @constCast(std.mem.trim(u8, input, &std.ascii.whitespace));
_ = std.ascii.lowerString(normalized, normalized);
var mime = Mime{ .content_type = undefined };
const content_type, const type_len = try parseContentType(normalized, &mime.type_buf, &mime.sub_type_buf);
const content_type, const type_len = try parseContentType(normalized);
if (type_len >= normalized.len) {
return .{ .content_type = content_type };
}
@@ -170,12 +163,13 @@ pub fn parse(input: []const u8) !Mime {
}
}
mime.params = params;
mime.charset = charset;
mime.charset_len = charset_len;
mime.content_type = content_type;
mime.is_default_charset = !has_explicit_charset;
return mime;
return .{
.params = params,
.charset = charset,
.charset_len = charset_len,
.content_type = content_type,
.is_default_charset = !has_explicit_charset,
};
}
/// Prescan the first 1024 bytes of an HTML document for a charset declaration.
@@ -401,7 +395,7 @@ pub fn isText(mime: *const Mime) bool {
}
// we expect value to be lowercase
fn parseContentType(value: []const u8, type_buf: []u8, sub_type_buf: []u8) !struct { ContentType, usize } {
fn parseContentType(value: []const u8) !struct { ContentType, usize } {
const end = std.mem.indexOfScalarPos(u8, value, 0, ';') orelse value.len;
const type_name = trimRight(value[0..end]);
const attribute_start = end + 1;
@@ -450,18 +444,10 @@ fn parseContentType(value: []const u8, type_buf: []u8, sub_type_buf: []u8) !stru
return error.Invalid;
}
@memcpy(type_buf[0..main_type.len], main_type);
@memcpy(sub_type_buf[0..sub_type.len], sub_type);
return .{
.{
.other = .{
.type = type_buf[0..main_type.len],
.sub_type = sub_type_buf[0..sub_type.len],
},
},
attribute_start,
};
return .{ .{ .other = .{
.type = main_type,
.sub_type = sub_type,
} }, attribute_start };
}
const VALID_CODEPOINTS = blk: {
@@ -475,13 +461,6 @@ const VALID_CODEPOINTS = blk: {
break :blk v;
};
pub fn typeString(self: *const Mime) []const u8 {
return switch (self.content_type) {
.other => |o| o.type[0..o.type_len],
else => "",
};
}
fn validType(value: []const u8) bool {
for (value) |b| {
if (VALID_CODEPOINTS[b] == false) {

View File

@@ -487,7 +487,6 @@ pub fn navigate(self: *Page, request_url: [:0]const u8, opts: NavigateOpts) !voi
return error.InjectBlankFailed;
};
}
self.documentIsComplete();
session.notification.dispatch(.page_navigate, &.{
.frame_id = self._frame_id,
@@ -519,6 +518,8 @@ pub fn navigate(self: *Page, request_url: [:0]const u8, opts: NavigateOpts) !voi
// force next request id manually b/c we won't create a real req.
_ = session.browser.http_client.incrReqId();
self.documentIsComplete();
return;
}
@@ -738,6 +739,12 @@ pub fn _documentIsLoaded(self: *Page) !void {
self.document.asEventTarget(),
event,
);
self._session.notification.dispatch(.page_dom_content_loaded, &.{
.frame_id = self._frame_id,
.req_id = self._req_id,
.timestamp = timestamp(.monotonic),
});
}
pub fn scriptsCompletedLoading(self: *Page) void {
@@ -796,19 +803,6 @@ pub fn documentIsComplete(self: *Page) void {
self._documentIsComplete() catch |err| {
log.err(.page, "document is complete", .{ .err = err, .type = self._type, .url = self.url });
};
if (self._navigated_options) |no| {
// _navigated_options will be null in special short-circuit cases, like
// "navigating" to about:blank, in which case this notification has
// already been sent
self._session.notification.dispatch(.page_navigated, &.{
.frame_id = self._frame_id,
.req_id = self._req_id,
.opts = no,
.url = self.url,
.timestamp = timestamp(.monotonic),
});
}
}
fn _documentIsComplete(self: *Page) !void {
@@ -827,6 +821,12 @@ fn _documentIsComplete(self: *Page) !void {
try self._event_manager.dispatchDirect(window_target, event, self.window._on_load, .{ .inject_target = false, .context = "page load" });
}
self._session.notification.dispatch(.page_loaded, &.{
.frame_id = self._frame_id,
.req_id = self._req_id,
.timestamp = timestamp(.monotonic),
});
if (self._event_manager.hasDirectListeners(window_target, "pageshow", self.window._on_pageshow)) {
const pageshow_event = (try PageTransitionEvent.initTrusted(comptime .wrap("pageshow"), .{}, self)).asEvent();
try self._event_manager.dispatchDirect(window_target, pageshow_event, self.window._on_pageshow, .{ .context = "page show" });
@@ -854,10 +854,12 @@ fn notifyParentLoadComplete(self: *Page) void {
parent.iframeCompletedLoading(self.iframe.?);
}
fn pageHeaderDoneCallback(response: HttpClient.Response) !bool {
var self: *Page = @ptrCast(@alignCast(response.ctx));
fn pageHeaderDoneCallback(transfer: *HttpClient.Transfer) !bool {
var self: *Page = @ptrCast(@alignCast(transfer.ctx));
const response_url = response.url();
const header = &transfer.response_header.?;
const response_url = std.mem.span(header.url);
if (std.mem.eql(u8, response_url, self.url) == false) {
// would be different than self.url in the case of a redirect
self.url = try self.arena.dupeZ(u8, response_url);
@@ -871,23 +873,36 @@ fn pageHeaderDoneCallback(response: HttpClient.Response) !bool {
if (comptime IS_DEBUG) {
log.debug(.page, "navigate header", .{
.url = self.url,
.status = response.status(),
.content_type = response.contentType(),
.status = header.status,
.content_type = header.contentType(),
.type = self._type,
});
}
if (self._navigated_options) |no| {
// _navigated_options will be null in special short-circuit cases, like
// "navigating" to about:blank, in which case this notification has
// already been sent
self._session.notification.dispatch(.page_navigated, &.{
.frame_id = self._frame_id,
.req_id = self._req_id,
.opts = no,
.url = self.url,
.timestamp = timestamp(.monotonic),
});
}
return true;
}
fn pageDataCallback(response: HttpClient.Response, data: []const u8) !void {
var self: *Page = @ptrCast(@alignCast(response.ctx));
fn pageDataCallback(transfer: *HttpClient.Transfer, data: []const u8) !void {
var self: *Page = @ptrCast(@alignCast(transfer.ctx));
if (self._parse_state == .pre) {
// we lazily do this, because we might need the first chunk of data
// to sniff the content type
var mime: Mime = blk: {
if (response.contentType()) |ct| {
if (transfer.response_header.?.contentType()) |ct| {
break :blk try Mime.parse(ct);
}
break :blk Mime.sniff(data);

View File

@@ -694,86 +694,82 @@ pub const Script = struct {
self.manager.page.releaseArena(self.arena);
}
fn startCallback(response: HttpClient.Response) !void {
log.debug(.http, "script fetch start", .{ .req = response });
fn startCallback(transfer: *HttpClient.Transfer) !void {
log.debug(.http, "script fetch start", .{ .req = transfer });
}
fn headerCallback(response: HttpClient.Response) !bool {
const self: *Script = @ptrCast(@alignCast(response.ctx));
self.status = response.status().?;
if (response.status() != 200) {
fn headerCallback(transfer: *HttpClient.Transfer) !bool {
const self: *Script = @ptrCast(@alignCast(transfer.ctx));
const header = &transfer.response_header.?;
self.status = header.status;
if (header.status != 200) {
log.info(.http, "script header", .{
.req = response,
.status = response.status(),
.content_type = response.contentType(),
.req = transfer,
.status = header.status,
.content_type = header.contentType(),
});
return false;
}
if (comptime IS_DEBUG) {
log.debug(.http, "script header", .{
.req = response,
.status = response.status(),
.content_type = response.contentType(),
.req = transfer,
.status = header.status,
.content_type = header.contentType(),
});
}
switch (response.inner) {
.transfer => |transfer| {
// temp debug, trying to figure out why the next assert sometimes
// fails. Is the buffer just corrupt or is headerCallback really
// being called twice?
lp.assert(self.header_callback_called == false, "ScriptManager.Header recall", .{
.m = @tagName(std.meta.activeTag(self.mode)),
.a1 = self.debug_transfer_id,
.a2 = self.debug_transfer_tries,
.a3 = self.debug_transfer_aborted,
.a4 = self.debug_transfer_bytes_received,
.a5 = self.debug_transfer_notified_fail,
.a7 = self.debug_transfer_intercept_state,
.a8 = self.debug_transfer_auth_challenge,
.a9 = self.debug_transfer_easy_id,
.b1 = transfer.id,
.b2 = transfer._tries,
.b3 = transfer.aborted,
.b4 = transfer.bytes_received,
.b5 = transfer._notified_fail,
.b7 = @intFromEnum(transfer._intercept_state),
.b8 = transfer._auth_challenge != null,
.b9 = if (transfer._conn) |c| @intFromPtr(c._easy) else 0,
});
self.header_callback_called = true;
self.debug_transfer_id = transfer.id;
self.debug_transfer_tries = transfer._tries;
self.debug_transfer_aborted = transfer.aborted;
self.debug_transfer_bytes_received = transfer.bytes_received;
self.debug_transfer_notified_fail = transfer._notified_fail;
self.debug_transfer_intercept_state = @intFromEnum(transfer._intercept_state);
self.debug_transfer_auth_challenge = transfer._auth_challenge != null;
self.debug_transfer_easy_id = if (transfer._conn) |c| @intFromPtr(c._easy) else 0;
},
else => {},
{
// temp debug, trying to figure out why the next assert sometimes
// fails. Is the buffer just corrupt or is headerCallback really
// being called twice?
lp.assert(self.header_callback_called == false, "ScriptManager.Header recall", .{
.m = @tagName(std.meta.activeTag(self.mode)),
.a1 = self.debug_transfer_id,
.a2 = self.debug_transfer_tries,
.a3 = self.debug_transfer_aborted,
.a4 = self.debug_transfer_bytes_received,
.a5 = self.debug_transfer_notified_fail,
.a7 = self.debug_transfer_intercept_state,
.a8 = self.debug_transfer_auth_challenge,
.a9 = self.debug_transfer_easy_id,
.b1 = transfer.id,
.b2 = transfer._tries,
.b3 = transfer.aborted,
.b4 = transfer.bytes_received,
.b5 = transfer._notified_fail,
.b7 = @intFromEnum(transfer._intercept_state),
.b8 = transfer._auth_challenge != null,
.b9 = if (transfer._conn) |c| @intFromPtr(c._easy) else 0,
});
self.header_callback_called = true;
self.debug_transfer_id = transfer.id;
self.debug_transfer_tries = transfer._tries;
self.debug_transfer_aborted = transfer.aborted;
self.debug_transfer_bytes_received = transfer.bytes_received;
self.debug_transfer_notified_fail = transfer._notified_fail;
self.debug_transfer_intercept_state = @intFromEnum(transfer._intercept_state);
self.debug_transfer_auth_challenge = transfer._auth_challenge != null;
self.debug_transfer_easy_id = if (transfer._conn) |c| @intFromPtr(c._easy) else 0;
}
lp.assert(self.source.remote.capacity == 0, "ScriptManager.Header buffer", .{ .capacity = self.source.remote.capacity });
var buffer: std.ArrayList(u8) = .empty;
if (response.contentLength()) |cl| {
if (transfer.getContentLength()) |cl| {
try buffer.ensureTotalCapacity(self.arena, cl);
}
self.source = .{ .remote = buffer };
return true;
}
fn dataCallback(response: HttpClient.Response, data: []const u8) !void {
const self: *Script = @ptrCast(@alignCast(response.ctx));
self._dataCallback(response, data) catch |err| {
log.err(.http, "SM.dataCallback", .{ .err = err, .transfer = response, .len = data.len });
fn dataCallback(transfer: *HttpClient.Transfer, data: []const u8) !void {
const self: *Script = @ptrCast(@alignCast(transfer.ctx));
self._dataCallback(transfer, data) catch |err| {
log.err(.http, "SM.dataCallback", .{ .err = err, .transfer = transfer, .len = data.len });
return err;
};
}
fn _dataCallback(self: *Script, _: HttpClient.Response, data: []const u8) !void {
fn _dataCallback(self: *Script, _: *HttpClient.Transfer, data: []const u8) !void {
try self.source.remote.appendSlice(self.arena, data);
}

View File

@@ -127,16 +127,16 @@ fn handleBlobUrl(url: []const u8, resolver: js.PromiseResolver, page: *Page) !js
return resolver.promise();
}
fn httpStartCallback(response: HttpClient.Response) !void {
const self: *Fetch = @ptrCast(@alignCast(response.ctx));
fn httpStartCallback(transfer: *HttpClient.Transfer) !void {
const self: *Fetch = @ptrCast(@alignCast(transfer.ctx));
if (comptime IS_DEBUG) {
log.debug(.http, "request start", .{ .url = self._url, .source = "fetch" });
}
self._response._http_response = response;
self._response._transfer = transfer;
}
fn httpHeaderDoneCallback(response: HttpClient.Response) !bool {
const self: *Fetch = @ptrCast(@alignCast(response.ctx));
fn httpHeaderDoneCallback(transfer: *HttpClient.Transfer) !bool {
const self: *Fetch = @ptrCast(@alignCast(transfer.ctx));
if (self._signal) |signal| {
if (signal._aborted) {
@@ -145,24 +145,25 @@ fn httpHeaderDoneCallback(response: HttpClient.Response) !bool {
}
const arena = self._response._arena;
if (response.contentLength()) |cl| {
if (transfer.getContentLength()) |cl| {
try self._buf.ensureTotalCapacity(arena, cl);
}
const res = self._response;
const header = transfer.response_header.?;
if (comptime IS_DEBUG) {
log.debug(.http, "request header", .{
.source = "fetch",
.url = self._url,
.status = response.status(),
.status = header.status,
});
}
res._status = response.status().?;
res._status_text = std.http.Status.phrase(@enumFromInt(response.status().?)) orelse "";
res._url = try arena.dupeZ(u8, response.url());
res._is_redirected = response.redirectCount().? > 0;
res._status = header.status;
res._status_text = std.http.Status.phrase(@enumFromInt(header.status)) orelse "";
res._url = try arena.dupeZ(u8, std.mem.span(header.url));
res._is_redirected = header.redirect_count > 0;
// Determine response type based on origin comparison
const page_origin = URL.getOrigin(arena, self._page.url) catch null;
@@ -182,7 +183,7 @@ fn httpHeaderDoneCallback(response: HttpClient.Response) !bool {
res._type = .basic;
}
var it = response.headerIterator();
var it = transfer.responseHeaderIterator();
while (it.next()) |hdr| {
try res._headers.append(hdr.name, hdr.value, self._page);
}
@@ -190,8 +191,8 @@ fn httpHeaderDoneCallback(response: HttpClient.Response) !bool {
return true;
}
fn httpDataCallback(response: HttpClient.Response, data: []const u8) !void {
const self: *Fetch = @ptrCast(@alignCast(response.ctx));
fn httpDataCallback(transfer: *HttpClient.Transfer, data: []const u8) !void {
const self: *Fetch = @ptrCast(@alignCast(transfer.ctx));
// Check if aborted
if (self._signal) |signal| {
@@ -206,7 +207,7 @@ fn httpDataCallback(response: HttpClient.Response, data: []const u8) !void {
fn httpDoneCallback(ctx: *anyopaque) !void {
const self: *Fetch = @ptrCast(@alignCast(ctx));
var response = self._response;
response._http_response = null;
response._transfer = null;
response._body = self._buf.items;
log.info(.http, "request complete", .{
@@ -229,7 +230,7 @@ fn httpErrorCallback(ctx: *anyopaque, _: anyerror) void {
const self: *Fetch = @ptrCast(@alignCast(ctx));
var response = self._response;
response._http_response = null;
response._transfer = null;
// the response is only passed on v8 on success, if we're here, it's safe to
// clear this. (defer since `self is in the response's arena).
@@ -255,7 +256,7 @@ fn httpShutdownCallback(ctx: *anyopaque) void {
if (self._owns_response) {
var response = self._response;
response._http_response = null;
response._transfer = null;
response.deinit(self._page._session);
// Do not access `self` after this point: the Fetch struct was
// allocated from response._arena which has been released.

View File

@@ -48,7 +48,7 @@ _type: Type,
_status_text: []const u8,
_url: [:0]const u8,
_is_redirected: bool,
_http_response: ?HttpClient.Response = null,
_transfer: ?*HttpClient.Transfer = null,
const InitOpts = struct {
status: u16 = 200,
@@ -81,9 +81,9 @@ pub fn init(body_: ?[]const u8, opts_: ?InitOpts, page: *Page) !*Response {
}
pub fn deinit(self: *Response, session: *Session) void {
if (self._http_response) |resp| {
resp.abort(error.Abort);
self._http_response = null;
if (self._transfer) |transfer| {
transfer.abort(error.Abort);
self._transfer = null;
}
session.releaseArena(self._arena);
}
@@ -191,7 +191,7 @@ pub fn clone(self: *const Response, page: *Page) !*Response {
._type = self._type,
._is_redirected = self._is_redirected,
._headers = try Headers.init(.{ .obj = self._headers }, page),
._http_response = null,
._transfer = null,
};
return cloned;
}

View File

@@ -43,7 +43,8 @@ _rc: lp.RC(u8) = .{},
_page: *Page,
_proto: *XMLHttpRequestEventTarget,
_arena: Allocator,
_http_response: ?HttpClient.Response = null,
_transfer: ?*HttpClient.Transfer = null,
_has_ref: bool = false,
_url: [:0]const u8 = "",
_method: net_http.Method = .GET,
@@ -99,9 +100,9 @@ pub fn init(page: *Page) !*XMLHttpRequest {
}
pub fn deinit(self: *XMLHttpRequest, session: *Session) void {
if (self._http_response) |resp| {
resp.abort(error.Abort);
self._http_response = null;
if (self._transfer) |transfer| {
transfer.abort(error.Abort);
self._transfer = null;
}
if (self._on_ready_state_change) |func| {
@@ -136,6 +137,14 @@ pub fn deinit(self: *XMLHttpRequest, session: *Session) void {
session.releaseArena(self._arena);
}
fn releaseSelfRef(self: *XMLHttpRequest) void {
if (self._has_ref == false) {
return;
}
self.releaseRef(self._page._session);
self._has_ref = false;
}
pub fn releaseRef(self: *XMLHttpRequest, session: *Session) void {
self._rc.release(self, session);
}
@@ -175,9 +184,9 @@ pub fn setWithCredentials(self: *XMLHttpRequest, value: bool) !void {
// TODO: url should be a union, as it can be multiple things
pub fn open(self: *XMLHttpRequest, method_: []const u8, url: [:0]const u8) !void {
// Abort any in-progress request
if (self._http_response) |transfer| {
if (self._transfer) |transfer| {
transfer.abort(error.Abort);
self._http_response = null;
self._transfer = null;
}
// Reset internal state
@@ -252,6 +261,8 @@ pub fn send(self: *XMLHttpRequest, body_: ?[]const u8) !void {
.error_callback = httpErrorCallback,
.shutdown_callback = httpShutdownCallback,
});
self.acquireRef();
self._has_ref = true;
}
fn handleBlobUrl(self: *XMLHttpRequest, page: *Page) !void {
@@ -387,33 +398,34 @@ pub fn getResponseXML(self: *XMLHttpRequest, page: *Page) !?*Node.Document {
};
}
fn httpStartCallback(response: HttpClient.Response) !void {
const self: *XMLHttpRequest = @ptrCast(@alignCast(response.ctx));
fn httpStartCallback(transfer: *HttpClient.Transfer) !void {
const self: *XMLHttpRequest = @ptrCast(@alignCast(transfer.ctx));
if (comptime IS_DEBUG) {
log.debug(.http, "request start", .{ .method = self._method, .url = self._url, .source = "xhr" });
}
self._http_response = response;
self.acquireRef();
self._transfer = transfer;
}
fn httpHeaderCallback(response: HttpClient.Response, header: net_http.Header) !void {
const self: *XMLHttpRequest = @ptrCast(@alignCast(response.ctx));
fn httpHeaderCallback(transfer: *HttpClient.Transfer, header: net_http.Header) !void {
const self: *XMLHttpRequest = @ptrCast(@alignCast(transfer.ctx));
const joined = try std.fmt.allocPrint(self._arena, "{s}: {s}", .{ header.name, header.value });
try self._response_headers.append(self._arena, joined);
}
fn httpHeaderDoneCallback(response: HttpClient.Response) !bool {
const self: *XMLHttpRequest = @ptrCast(@alignCast(response.ctx));
fn httpHeaderDoneCallback(transfer: *HttpClient.Transfer) !bool {
const self: *XMLHttpRequest = @ptrCast(@alignCast(transfer.ctx));
const header = &transfer.response_header.?;
if (comptime IS_DEBUG) {
log.debug(.http, "request header", .{
.source = "xhr",
.url = self._url,
.status = response.status(),
.status = header.status,
});
}
if (response.contentType()) |ct| {
if (header.contentType()) |ct| {
self._response_mime = Mime.parse(ct) catch |e| {
log.info(.http, "invalid content type", .{
.content_Type = ct,
@@ -424,18 +436,18 @@ fn httpHeaderDoneCallback(response: HttpClient.Response) !bool {
};
}
var it = response.headerIterator();
var it = transfer.responseHeaderIterator();
while (it.next()) |hdr| {
const joined = try std.fmt.allocPrint(self._arena, "{s}: {s}", .{ hdr.name, hdr.value });
try self._response_headers.append(self._arena, joined);
}
self._response_status = response.status().?;
if (response.contentLength()) |cl| {
self._response_status = header.status;
if (transfer.getContentLength()) |cl| {
self._response_len = cl;
try self._response_data.ensureTotalCapacity(self._arena, cl);
}
self._response_url = try self._arena.dupeZ(u8, response.url());
self._response_url = try self._arena.dupeZ(u8, std.mem.span(header.url));
const page = self._page;
@@ -450,8 +462,8 @@ fn httpHeaderDoneCallback(response: HttpClient.Response) !bool {
return true;
}
fn httpDataCallback(response: HttpClient.Response, data: []const u8) !void {
const self: *XMLHttpRequest = @ptrCast(@alignCast(response.ctx));
fn httpDataCallback(transfer: *HttpClient.Transfer, data: []const u8) !void {
const self: *XMLHttpRequest = @ptrCast(@alignCast(transfer.ctx));
try self._response_data.appendSlice(self._arena, data);
const page = self._page;
@@ -474,7 +486,7 @@ fn httpDoneCallback(ctx: *anyopaque) !void {
// Not that the request is done, the http/client will free the transfer
// object. It isn't safe to keep it around.
self._http_response = null;
self._transfer = null;
const page = self._page;
@@ -497,24 +509,24 @@ fn httpErrorCallback(ctx: *anyopaque, err: anyerror) void {
const self: *XMLHttpRequest = @ptrCast(@alignCast(ctx));
// http client will close it after an error, it isn't safe to keep around
self.handleError(err);
if (self._http_response != null) {
self._http_response = null;
self.releaseRef(self._page._session);
if (self._transfer != null) {
self._transfer = null;
}
self.releaseSelfRef();
}
fn httpShutdownCallback(ctx: *anyopaque) void {
const self: *XMLHttpRequest = @ptrCast(@alignCast(ctx));
self._http_response = null;
self._transfer = null;
}
pub fn abort(self: *XMLHttpRequest) void {
self.handleError(error.Abort);
if (self._http_response) |resp| {
self._http_response = null;
resp.abort(error.Abort);
self.releaseRef(self._page._session);
if (self._transfer) |transfer| {
self._transfer = null;
transfer.abort(error.Abort);
}
self.releaseSelfRef();
}
fn handleError(self: *XMLHttpRequest, err: anyerror) void {

View File

@@ -432,6 +432,8 @@ pub fn BrowserContext(comptime CDP_T: type) type {
try notification.register(.page_navigate, self, onPageNavigate);
try notification.register(.page_navigated, self, onPageNavigated);
try notification.register(.page_frame_created, self, onPageFrameCreated);
try notification.register(.page_dom_content_loaded, self, onPageDOMContentLoaded);
try notification.register(.page_loaded, self, onPageLoaded);
}
pub fn deinit(self: *Self) void {
@@ -607,6 +609,16 @@ pub fn BrowserContext(comptime CDP_T: type) type {
return @import("domains/page.zig").pageFrameCreated(self, msg);
}
pub fn onPageDOMContentLoaded(ctx: *anyopaque, msg: *const Notification.PageDOMContentLoaded) !void {
const self: *Self = @ptrCast(@alignCast(ctx));
return @import("domains/page.zig").pageDOMContentLoaded(self, msg);
}
pub fn onPageLoaded(ctx: *anyopaque, msg: *const Notification.PageLoaded) !void {
const self: *Self = @ptrCast(@alignCast(ctx));
return @import("domains/page.zig").pageLoaded(self, msg);
}
pub fn onPageNetworkIdle(ctx: *anyopaque, msg: *const Notification.PageNetworkIdle) !void {
const self: *Self = @ptrCast(@alignCast(ctx));
return @import("domains/page.zig").pageNetworkIdle(self, msg);

View File

@@ -30,6 +30,7 @@ pub fn processMessage(cmd: anytype) !void {
getMarkdown,
getSemanticTree,
getInteractiveElements,
getNodeDetails,
getStructuredData,
detectForms,
clickNode,
@@ -42,6 +43,7 @@ pub fn processMessage(cmd: anytype) !void {
.getMarkdown => return getMarkdown(cmd),
.getSemanticTree => return getSemanticTree(cmd),
.getInteractiveElements => return getInteractiveElements(cmd),
.getNodeDetails => return getNodeDetails(cmd),
.getStructuredData => return getStructuredData(cmd),
.detectForms => return detectForms(cmd),
.clickNode => return clickNode(cmd),
@@ -141,6 +143,24 @@ fn getInteractiveElements(cmd: anytype) !void {
}, .{});
}
fn getNodeDetails(cmd: anytype) !void {
const Params = struct {
backendNodeId: Node.Id,
};
const params = (try cmd.params(Params)) orelse return error.InvalidParam;
const bc = cmd.browser_context orelse return error.NoBrowserContext;
const page = bc.session.currentPage() orelse return error.PageNotLoaded;
const node = (bc.node_registry.lookup_by_id.get(params.backendNodeId) orelse return error.InvalidNodeId).dom;
const details = SemanticTree.getNodeDetails(cmd.arena, node, &bc.node_registry, page) catch return error.InternalError;
return cmd.sendResult(.{
.nodeDetails = details,
}, .{});
}
fn getStructuredData(cmd: anytype) !void {
const bc = cmd.browser_context orelse return error.NoBrowserContext;
const page = bc.session.currentPage() orelse return error.PageNotLoaded;

View File

@@ -420,7 +420,6 @@ pub fn pageNavigated(arena: Allocator, bc: anytype, event: *const Notification.P
// things, but no session.
const session_id = bc.session_id orelse return;
const timestamp = event.timestamp;
const frame_id = &id.toFrameId(event.frame_id);
const loader_id = &id.toLoaderId(event.req_id);
@@ -472,9 +471,9 @@ pub fn pageNavigated(arena: Allocator, bc: anytype, event: *const Notification.P
const page = bc.session.currentPage() orelse return error.PageNotLoaded;
// When we actually recreated the context we should have the inspector send
// this event, see: resetContextGroup Sending this event will tell the
// client that the context ids they had are invalid and the context shouls
// be dropped The client will expect us to send new contextCreated events,
// this event, see: resetContextGroup. Sending this event will tell the
// client that the context ids they had are invalid and the context should
// be dropped. The client will expect us to send new contextCreated events,
// such that the client has new id's for the active contexts.
// Only send executionContextsCleared for main frame navigations. For child
// frames (iframes), clearing all contexts would destroy the main frame's
@@ -484,6 +483,18 @@ pub fn pageNavigated(arena: Allocator, bc: anytype, event: *const Notification.P
try cdp.sendEvent("Runtime.executionContextsCleared", null, .{ .session_id = session_id });
}
// frameNavigated event
try cdp.sendEvent("Page.frameNavigated", .{
.type = "Navigation",
.frame = Frame{
.id = frame_id,
.url = event.url,
.loaderId = loader_id,
.securityOrigin = bc.security_origin,
.secureContextType = bc.secure_context_type,
},
}, .{ .session_id = session_id });
{
const aux_data = try std.fmt.allocPrint(arena, "{{\"isDefault\":true,\"type\":\"default\",\"frameId\":\"{s}\",\"loaderId\":\"{s}\"}}", .{ frame_id, loader_id });
@@ -554,18 +565,22 @@ pub fn pageNavigated(arena: Allocator, bc: anytype, event: *const Notification.P
// chromedp client expects to receive the events is this order.
// see https://github.com/chromedp/chromedp/issues/1558
try cdp.sendEvent("DOM.documentUpdated", null, .{ .session_id = session_id });
}
pub fn pageDOMContentLoaded(bc: anytype, event: *const Notification.PageDOMContentLoaded) !void {
const session_id = bc.session_id orelse return;
const timestamp = event.timestamp;
var cdp = bc.cdp;
// domContentEventFired event
// TODO: partially hard coded
try cdp.sendEvent(
"Page.domContentEventFired",
.{ .timestamp = timestamp },
.{ .session_id = session_id },
);
// lifecycle DOMContentLoaded event
// TODO: partially hard coded
if (bc.page_life_cycle_events) {
const frame_id = &id.toFrameId(event.frame_id);
const loader_id = &id.toLoaderId(event.req_id);
try cdp.sendEvent("Page.lifecycleEvent", LifecycleEvent{
.timestamp = timestamp,
.name = "DOMContentLoaded",
@@ -573,16 +588,23 @@ pub fn pageNavigated(arena: Allocator, bc: anytype, event: *const Notification.P
.loaderId = loader_id,
}, .{ .session_id = session_id });
}
}
pub fn pageLoaded(bc: anytype, event: *const Notification.PageLoaded) !void {
const session_id = bc.session_id orelse return;
const timestamp = event.timestamp;
var cdp = bc.cdp;
const frame_id = &id.toFrameId(event.frame_id);
// loadEventFired event
try cdp.sendEvent(
"Page.loadEventFired",
.{ .timestamp = timestamp },
.{ .session_id = session_id },
);
// lifecycle DOMContentLoaded event
if (bc.page_life_cycle_events) {
const loader_id = &id.toLoaderId(event.req_id);
try cdp.sendEvent("Page.lifecycleEvent", LifecycleEvent{
.timestamp = timestamp,
.name = "load",
@@ -591,7 +613,6 @@ pub fn pageNavigated(arena: Allocator, bc: anytype, event: *const Notification.P
}, .{ .session_id = session_id });
}
// frameStoppedLoading
return cdp.sendEvent("Page.frameStoppedLoading", .{
.frameId = frame_id,
}, .{ .session_id = session_id });

View File

@@ -237,6 +237,10 @@ pub fn RC(comptime T: type) type {
session.releaseArena(kv.value.arena);
}
}
pub fn format(self: @This(), writer: *std.Io.Writer) !void {
return writer.print("{d}", .{self._refs});
}
};
}

View File

@@ -39,7 +39,6 @@ pub const Scope = enum {
telemetry,
unknown_prop,
mcp,
cache,
};
const Opts = struct {

View File

@@ -75,6 +75,19 @@ pub const tool_list = [_]protocol.Tool{
\\}
),
},
.{
.name = "nodeDetails",
.description = "Get detailed information about a specific node by its backend node ID. Returns tag, role, name, interactivity, disabled state, value, input type, placeholder, href, checked state, and select options.",
.inputSchema = protocol.minify(
\\{
\\ "type": "object",
\\ "properties": {
\\ "backendNodeId": { "type": "integer", "description": "The backend node ID of the element to inspect." }
\\ },
\\ "required": ["backendNodeId"]
\\}
),
},
.{
.name = "interactiveElements",
.description = "Extract interactive elements from the opened page. If a url is provided, it navigates to that url first.",
@@ -256,6 +269,7 @@ const ToolAction = enum {
navigate,
markdown,
links,
nodeDetails,
interactiveElements,
structuredData,
detectForms,
@@ -272,6 +286,7 @@ const tool_map = std.StaticStringMap(ToolAction).initComptime(.{
.{ "navigate", .navigate },
.{ "markdown", .markdown },
.{ "links", .links },
.{ "nodeDetails", .nodeDetails },
.{ "interactiveElements", .interactiveElements },
.{ "structuredData", .structuredData },
.{ "detectForms", .detectForms },
@@ -305,6 +320,7 @@ pub fn handleCall(server: *Server, arena: std.mem.Allocator, req: protocol.Reque
.goto, .navigate => try handleGoto(server, arena, req.id.?, call_params.arguments),
.markdown => try handleMarkdown(server, arena, req.id.?, call_params.arguments),
.links => try handleLinks(server, arena, req.id.?, call_params.arguments),
.nodeDetails => try handleNodeDetails(server, arena, req.id.?, call_params.arguments),
.interactiveElements => try handleInteractiveElements(server, arena, req.id.?, call_params.arguments),
.structuredData => try handleStructuredData(server, arena, req.id.?, call_params.arguments),
.detectForms => try handleDetectForms(server, arena, req.id.?, call_params.arguments),
@@ -373,6 +389,32 @@ fn handleSemanticTree(server: *Server, arena: std.mem.Allocator, id: std.json.Va
};
}
fn handleNodeDetails(server: *Server, arena: std.mem.Allocator, id: std.json.Value, arguments: ?std.json.Value) !void {
const Params = struct {
backendNodeId: CDPNode.Id,
};
const args = try parseArgs(Params, arena, arguments, server, id, "nodeDetails");
_ = server.session.currentPage() orelse {
return server.sendError(id, .PageNotLoaded, "Page not loaded");
};
const node = server.node_registry.lookup_by_id.get(args.backendNodeId) orelse {
return server.sendError(id, .InvalidParams, "Node not found");
};
const page = server.session.currentPage().?;
const details = lp.SemanticTree.getNodeDetails(arena, node.dom, &server.node_registry, page) catch {
return server.sendError(id, .InternalError, "Failed to get node details");
};
var aw: std.Io.Writer.Allocating = .init(arena);
try std.json.Stringify.value(&details, .{}, &aw.writer);
const content = [_]protocol.TextContent([]const u8){.{ .text = aw.written() }};
try server.sendResult(id, protocol.CallToolResult([]const u8){ .content = &content });
}
fn handleInteractiveElements(server: *Server, arena: std.mem.Allocator, id: std.json.Value, arguments: ?std.json.Value) !void {
const args = try parseArgsOrDefault(UrlParams, arena, arguments, server, id);
const page = try ensurePage(server, id, args.url);

View File

@@ -29,9 +29,7 @@ const libcurl = @import("../sys/libcurl.zig");
const net_http = @import("http.zig");
const RobotStore = @import("Robots.zig").RobotStore;
const WebBotAuth = @import("WebBotAuth.zig");
const Cache = @import("cache/Cache.zig");
const App = @import("../App.zig");
const Runtime = @This();
const Listener = struct {
@@ -47,12 +45,10 @@ const MAX_TICK_CALLBACKS = 16;
allocator: Allocator,
app: *App,
config: *const Config,
ca_blob: ?net_http.Blob,
robot_store: RobotStore,
web_bot_auth: ?WebBotAuth,
cache: ?Cache,
connections: []net_http.Connection,
available: std.DoublyLinkedList = .{},
@@ -204,7 +200,7 @@ fn globalDeinit() void {
libcurl.curl_global_cleanup();
}
pub fn init(allocator: Allocator, app: *App, config: *const Config) !Runtime {
pub fn init(allocator: Allocator, config: *const Config) !Runtime {
globalInit(allocator);
errdefer globalDeinit();
@@ -237,11 +233,6 @@ pub fn init(allocator: Allocator, app: *App, config: *const Config) !Runtime {
else
null;
const cache = if (config.cacheDir()) |cache_dir_path|
Cache{ .kind = .{ .fs = try .init(cache_dir_path) } }
else
null;
return .{
.allocator = allocator,
.config = config,
@@ -253,10 +244,8 @@ pub fn init(allocator: Allocator, app: *App, config: *const Config) !Runtime {
.available = available,
.connections = connections,
.app = app,
.robot_store = RobotStore.init(allocator),
.web_bot_auth = web_bot_auth,
.cache = cache,
};
}
@@ -289,8 +278,6 @@ pub fn deinit(self: *Runtime) void {
wba.deinit(self.allocator);
}
if (self.cache) |*cache| cache.deinit();
globalDeinit();
}

View File

@@ -1,156 +0,0 @@
// Copyright (C) 2023-2026 Lightpanda (Selecy SAS)
//
// Francis Bouvier <francis@lightpanda.io>
// Pierre Tachoire <pierre@lightpanda.io>
//
// This program is free software: you can redistribute it and/or modify
// it under the terms of the GNU Affero General Public License as
// published by the Free Software Foundation, either version 3 of the
// License, or (at your option) any later version.
//
// This program is distributed in the hope that it will be useful,
// but WITHOUT ANY WARRANTY; without even the implied warranty of
// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
// GNU Affero General Public License for more details.
//
// You should have received a copy of the GNU Affero General Public License
// along with this program. If not, see <https://www.gnu.org/licenses/>.
const std = @import("std");
const Http = @import("../http.zig");
const FsCache = @import("FsCache.zig");
/// A browser-wide cache for resources across the network.
/// This mostly conforms to RFC9111 with regards to caching behavior.
pub const Cache = @This();
kind: union(enum) {
fs: FsCache,
},
pub fn deinit(self: *Cache) void {
return switch (self.kind) {
inline else => |*c| c.deinit(),
};
}
pub fn get(self: *Cache, arena: std.mem.Allocator, req: CacheRequest) ?CachedResponse {
return switch (self.kind) {
inline else => |*c| c.get(arena, req),
};
}
pub fn put(self: *Cache, metadata: CachedMetadata, body: []const u8) !void {
return switch (self.kind) {
inline else => |*c| c.put(metadata, body),
};
}
pub const CacheControl = struct {
max_age: u64,
pub fn parse(value: []const u8) ?CacheControl {
var cc: CacheControl = .{ .max_age = undefined };
var max_age_set = false;
var max_s_age_set = false;
var is_public = false;
var iter = std.mem.splitScalar(u8, value, ',');
while (iter.next()) |part| {
const directive = std.mem.trim(u8, part, &std.ascii.whitespace);
if (std.ascii.eqlIgnoreCase(directive, "no-store")) {
return null;
} else if (std.ascii.eqlIgnoreCase(directive, "no-cache")) {
return null;
} else if (std.ascii.eqlIgnoreCase(directive, "public")) {
is_public = true;
} else if (std.ascii.startsWithIgnoreCase(directive, "max-age=")) {
if (!max_s_age_set) {
if (std.fmt.parseInt(u64, directive[8..], 10) catch null) |max_age| {
cc.max_age = max_age;
max_age_set = true;
}
}
} else if (std.ascii.startsWithIgnoreCase(directive, "s-maxage=")) {
if (std.fmt.parseInt(u64, directive[9..], 10) catch null) |max_age| {
cc.max_age = max_age;
max_age_set = true;
max_s_age_set = true;
}
}
}
if (!max_age_set) return null;
if (!is_public) return null;
if (cc.max_age == 0) return null;
return cc;
}
};
pub const CachedMetadata = struct {
url: [:0]const u8,
content_type: []const u8,
status: u16,
stored_at: i64,
age_at_store: u64,
cache_control: CacheControl,
/// Response Headers
headers: []const Http.Header,
/// These are Request Headers used by Vary.
vary_headers: []const Http.Header,
};
pub const CacheRequest = struct {
url: []const u8,
timestamp: i64,
request_headers: []const Http.Header,
};
pub const CachedData = union(enum) {
buffer: []const u8,
file: struct {
file: std.fs.File,
offset: usize,
len: usize,
},
};
pub const CachedResponse = struct {
metadata: CachedMetadata,
data: CachedData,
};
pub fn tryCache(
arena: std.mem.Allocator,
timestamp: i64,
url: [:0]const u8,
status: u16,
content_type: ?[]const u8,
cache_control: ?[]const u8,
vary: ?[]const u8,
age: ?[]const u8,
has_set_cookie: bool,
has_authorization: bool,
) !?CachedMetadata {
if (status != 200) return null;
if (has_set_cookie) return null;
if (has_authorization) return null;
if (vary) |v| if (std.mem.eql(u8, v, "*")) return null;
const cc = CacheControl.parse(cache_control orelse return null) orelse return null;
return .{
.url = url,
.content_type = if (content_type) |ct| try arena.dupe(u8, ct) else "application/octet-stream",
.status = status,
.stored_at = timestamp,
.age_at_store = if (age) |a| std.fmt.parseInt(u64, a, 10) catch 0 else 0,
.cache_control = cc,
.headers = &.{},
.vary_headers = &.{},
};
}

View File

@@ -1,580 +0,0 @@
// Copyright (C) 2023-2026 Lightpanda (Selecy SAS)
//
// Francis Bouvier <francis@lightpanda.io>
// Pierre Tachoire <pierre@lightpanda.io>
//
// This program is free software: you can redistribute it and/or modify
// it under the terms of the GNU Affero General Public License as
// published by the Free Software Foundation, either version 3 of the
// License, or (at your option) any later version.
//
// This program is distributed in the hope that it will be useful,
// but WITHOUT ANY WARRANTY; without even the implied warranty of
// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
// GNU Affero General Public License for more details.
//
// You should have received a copy of the GNU Affero General Public License
// along with this program. If not, see <https://www.gnu.org/licenses/>.
const std = @import("std");
const log = @import("../../log.zig");
const Cache = @import("Cache.zig");
const Http = @import("../http.zig");
const CacheRequest = Cache.CacheRequest;
const CachedMetadata = Cache.CachedMetadata;
const CachedResponse = Cache.CachedResponse;
const CACHE_VERSION: usize = 1;
const LOCK_STRIPES = 16;
comptime {
std.debug.assert(std.math.isPowerOfTwo(LOCK_STRIPES));
}
pub const FsCache = @This();
dir: std.fs.Dir,
locks: [LOCK_STRIPES]std.Thread.Mutex = .{std.Thread.Mutex{}} ** LOCK_STRIPES,
const CacheMetadataJson = struct {
version: usize,
metadata: CachedMetadata,
};
fn getLockPtr(self: *FsCache, key: *const [HASHED_KEY_LEN]u8) *std.Thread.Mutex {
const lock_idx = std.hash.Wyhash.hash(0, key[0..]) & (LOCK_STRIPES - 1);
return &self.locks[lock_idx];
}
const BODY_LEN_HEADER_LEN = 8;
const HASHED_KEY_LEN = 64;
const HASHED_PATH_LEN = HASHED_KEY_LEN + 6;
const HASHED_TMP_PATH_LEN = HASHED_PATH_LEN + 4;
fn hashKey(key: []const u8) [HASHED_KEY_LEN]u8 {
var digest: [std.crypto.hash.sha2.Sha256.digest_length]u8 = undefined;
std.crypto.hash.sha2.Sha256.hash(key, &digest, .{});
var hex: [HASHED_KEY_LEN]u8 = undefined;
_ = std.fmt.bufPrint(&hex, "{s}", .{std.fmt.bytesToHex(&digest, .lower)}) catch unreachable;
return hex;
}
fn cachePath(hashed_key: *const [HASHED_KEY_LEN]u8) [HASHED_PATH_LEN]u8 {
var path: [HASHED_PATH_LEN]u8 = undefined;
_ = std.fmt.bufPrint(&path, "{s}.cache", .{hashed_key}) catch unreachable;
return path;
}
fn cacheTmpPath(hashed_key: *const [HASHED_KEY_LEN]u8) [HASHED_TMP_PATH_LEN]u8 {
var path: [HASHED_TMP_PATH_LEN]u8 = undefined;
_ = std.fmt.bufPrint(&path, "{s}.cache.tmp", .{hashed_key}) catch unreachable;
return path;
}
pub fn init(path: []const u8) !FsCache {
const cwd = std.fs.cwd();
cwd.makeDir(path) catch |err| switch (err) {
error.PathAlreadyExists => {},
else => return err,
};
const dir = try cwd.openDir(path, .{ .iterate = true });
return .{ .dir = dir };
}
pub fn deinit(self: *FsCache) void {
self.dir.close();
}
pub fn get(self: *FsCache, arena: std.mem.Allocator, req: CacheRequest) ?Cache.CachedResponse {
const hashed_key = hashKey(req.url);
const cache_p = cachePath(&hashed_key);
const lock = self.getLockPtr(&hashed_key);
lock.lock();
defer lock.unlock();
const file = self.dir.openFile(&cache_p, .{ .mode = .read_only }) catch |e| {
switch (e) {
std.fs.File.OpenError.FileNotFound => {
log.debug(.cache, "miss", .{ .url = req.url, .hash = &hashed_key });
},
else => |err| {
log.warn(.cache, "open file err", .{ .url = req.url, .err = err });
},
}
return null;
};
var cleanup = false;
defer if (cleanup) {
file.close();
self.dir.deleteFile(&cache_p) catch |e| {
log.err(.cache, "clean fail", .{ .url = req.url, .file = &cache_p, .err = e });
};
};
var file_buf: [1024]u8 = undefined;
var len_buf: [BODY_LEN_HEADER_LEN]u8 = undefined;
var file_reader = file.reader(&file_buf);
const file_reader_iface = &file_reader.interface;
file_reader_iface.readSliceAll(&len_buf) catch |e| {
log.warn(.cache, "read header", .{ .url = req.url, .err = e });
cleanup = true;
return null;
};
const body_len = std.mem.readInt(u64, &len_buf, .little);
// Now we read metadata.
file_reader.seekTo(body_len + BODY_LEN_HEADER_LEN) catch |e| {
log.warn(.cache, "seek metadata", .{ .url = req.url, .err = e });
cleanup = true;
return null;
};
var json_reader = std.json.Reader.init(arena, file_reader_iface);
const cache_file: CacheMetadataJson = std.json.parseFromTokenSourceLeaky(
CacheMetadataJson,
arena,
&json_reader,
.{ .allocate = .alloc_always },
) catch |e| {
log.warn(.cache, "metadata parse", .{ .url = req.url, .err = e });
cleanup = true;
return null;
};
if (cache_file.version != CACHE_VERSION) {
log.warn(.cache, "version", .{ .url = req.url, .expected = CACHE_VERSION, .got = cache_file.version });
cleanup = true;
return null;
}
const metadata = cache_file.metadata;
// Check entry expiration.
const now = req.timestamp;
const age = (now - metadata.stored_at) + @as(i64, @intCast(metadata.age_at_store));
if (age < 0 or @as(u64, @intCast(age)) >= metadata.cache_control.max_age) {
log.debug(.cache, "expired", .{ .url = req.url });
cleanup = true;
return null;
}
// If we have Vary headers, ensure they are present & matching.
for (metadata.vary_headers) |vary_hdr| {
const name = vary_hdr.name;
const value = vary_hdr.value;
const incoming = for (req.request_headers) |h| {
if (std.ascii.eqlIgnoreCase(h.name, name)) break h.value;
} else "";
if (!std.ascii.eqlIgnoreCase(value, incoming)) {
log.debug(.cache, "vary mismatch", .{ .url = req.url, .header = name });
return null;
}
}
// On the case of a hash collision.
if (!std.ascii.eqlIgnoreCase(metadata.url, req.url)) {
log.warn(.cache, "collision", .{ .url = req.url, .expected = metadata.url, .got = req.url });
cleanup = true;
return null;
}
return .{
.metadata = metadata,
.data = .{
.file = .{
.file = file,
.offset = BODY_LEN_HEADER_LEN,
.len = body_len,
},
},
};
}
pub fn put(self: *FsCache, meta: CachedMetadata, body: []const u8) !void {
const hashed_key = hashKey(meta.url);
const cache_p = cachePath(&hashed_key);
const cache_tmp_p = cacheTmpPath(&hashed_key);
const lock = self.getLockPtr(&hashed_key);
lock.lock();
defer lock.unlock();
const file = try self.dir.createFile(&cache_tmp_p, .{ .truncate = true });
defer file.close();
var writer_buf: [1024]u8 = undefined;
var file_writer = file.writer(&writer_buf);
var file_writer_iface = &file_writer.interface;
var len_buf: [8]u8 = undefined;
std.mem.writeInt(u64, &len_buf, body.len, .little);
try file_writer_iface.writeAll(&len_buf);
try file_writer_iface.writeAll(body);
try std.json.Stringify.value(
CacheMetadataJson{ .version = CACHE_VERSION, .metadata = meta },
.{ .whitespace = .minified },
file_writer_iface,
);
try file_writer_iface.flush();
try self.dir.rename(&cache_tmp_p, &cache_p);
}
const testing = std.testing;
fn setupCache() !struct { tmp: testing.TmpDir, cache: Cache } {
var tmp = testing.tmpDir(.{});
errdefer tmp.cleanup();
const path = try tmp.dir.realpathAlloc(testing.allocator, ".");
defer testing.allocator.free(path);
return .{
.tmp = tmp,
.cache = Cache{ .kind = .{ .fs = try FsCache.init(path) } },
};
}
test "FsCache: basic put and get" {
var setup = try setupCache();
defer {
setup.cache.deinit();
setup.tmp.cleanup();
}
const cache = &setup.cache;
var arena = std.heap.ArenaAllocator.init(testing.allocator);
defer arena.deinit();
const now = std.time.timestamp();
const meta = CachedMetadata{
.url = "https://example.com",
.content_type = "text/html",
.status = 200,
.stored_at = now,
.age_at_store = 0,
.cache_control = .{ .max_age = 600 },
.headers = &.{},
.vary_headers = &.{},
};
const body = "hello world";
try cache.put(meta, body);
const result = cache.get(
arena.allocator(),
.{
.url = "https://example.com",
.timestamp = now,
.request_headers = &.{},
},
) orelse return error.CacheMiss;
const f = result.data.file;
const file = f.file;
defer file.close();
var buf: [64]u8 = undefined;
var file_reader = file.reader(&buf);
try file_reader.seekTo(f.offset);
const read_buf = try file_reader.interface.readAlloc(testing.allocator, f.len);
defer testing.allocator.free(read_buf);
try testing.expectEqualStrings(body, read_buf);
}
test "FsCache: get expiration" {
var setup = try setupCache();
defer {
setup.cache.deinit();
setup.tmp.cleanup();
}
const cache = &setup.cache;
var arena = std.heap.ArenaAllocator.init(testing.allocator);
defer arena.deinit();
const now = 5000;
const max_age = 1000;
const meta = CachedMetadata{
.url = "https://example.com",
.content_type = "text/html",
.status = 200,
.stored_at = now,
.age_at_store = 900,
.cache_control = .{ .max_age = max_age },
.headers = &.{},
.vary_headers = &.{},
};
const body = "hello world";
try cache.put(meta, body);
const result = cache.get(
arena.allocator(),
.{
.url = "https://example.com",
.timestamp = now + 50,
.request_headers = &.{},
},
) orelse return error.CacheMiss;
result.data.file.file.close();
try testing.expectEqual(null, cache.get(
arena.allocator(),
.{
.url = "https://example.com",
.timestamp = now + 200,
.request_headers = &.{},
},
));
try testing.expectEqual(null, cache.get(
arena.allocator(),
.{
.url = "https://example.com",
.timestamp = now,
.request_headers = &.{},
},
));
}
test "FsCache: put override" {
var setup = try setupCache();
defer {
setup.cache.deinit();
setup.tmp.cleanup();
}
const cache = &setup.cache;
var arena = std.heap.ArenaAllocator.init(testing.allocator);
defer arena.deinit();
{
const now = 5000;
const max_age = 1000;
const meta = CachedMetadata{
.url = "https://example.com",
.content_type = "text/html",
.status = 200,
.stored_at = now,
.age_at_store = 900,
.cache_control = .{ .max_age = max_age },
.headers = &.{},
.vary_headers = &.{},
};
const body = "hello world";
try cache.put(meta, body);
const result = cache.get(
arena.allocator(),
.{
.url = "https://example.com",
.timestamp = now,
.request_headers = &.{},
},
) orelse return error.CacheMiss;
const f = result.data.file;
const file = f.file;
defer file.close();
var buf: [64]u8 = undefined;
var file_reader = file.reader(&buf);
try file_reader.seekTo(f.offset);
const read_buf = try file_reader.interface.readAlloc(testing.allocator, f.len);
defer testing.allocator.free(read_buf);
try testing.expectEqualStrings(body, read_buf);
}
{
const now = 10000;
const max_age = 2000;
const meta = CachedMetadata{
.url = "https://example.com",
.content_type = "text/html",
.status = 200,
.stored_at = now,
.age_at_store = 0,
.cache_control = .{ .max_age = max_age },
.headers = &.{},
.vary_headers = &.{},
};
const body = "goodbye world";
try cache.put(meta, body);
const result = cache.get(
arena.allocator(),
.{
.url = "https://example.com",
.timestamp = now,
.request_headers = &.{},
},
) orelse return error.CacheMiss;
const f = result.data.file;
const file = f.file;
defer file.close();
var buf: [64]u8 = undefined;
var file_reader = file.reader(&buf);
try file_reader.seekTo(f.offset);
const read_buf = try file_reader.interface.readAlloc(testing.allocator, f.len);
defer testing.allocator.free(read_buf);
try testing.expectEqualStrings(body, read_buf);
}
}
test "FsCache: garbage file" {
var setup = try setupCache();
defer {
setup.cache.deinit();
setup.tmp.cleanup();
}
const hashed_key = hashKey("https://example.com");
const cache_p = cachePath(&hashed_key);
const file = try setup.cache.kind.fs.dir.createFile(&cache_p, .{});
try file.writeAll("this is not a valid cache file !@#$%");
file.close();
var arena = std.heap.ArenaAllocator.init(testing.allocator);
defer arena.deinit();
try testing.expectEqual(
null,
setup.cache.get(arena.allocator(), .{
.url = "https://example.com",
.timestamp = 5000,
.request_headers = &.{},
}),
);
}
test "FsCache: vary hit and miss" {
var setup = try setupCache();
defer {
setup.cache.deinit();
setup.tmp.cleanup();
}
const cache = &setup.cache;
var arena = std.heap.ArenaAllocator.init(testing.allocator);
defer arena.deinit();
const now = std.time.timestamp();
const meta = CachedMetadata{
.url = "https://example.com",
.content_type = "text/html",
.status = 200,
.stored_at = now,
.age_at_store = 0,
.cache_control = .{ .max_age = 600 },
.headers = &.{},
.vary_headers = &.{
.{ .name = "Accept-Encoding", .value = "gzip" },
},
};
try cache.put(meta, "hello world");
const result = cache.get(arena.allocator(), .{
.url = "https://example.com",
.timestamp = now,
.request_headers = &.{
.{ .name = "Accept-Encoding", .value = "gzip" },
},
}) orelse return error.CacheMiss;
result.data.file.file.close();
try testing.expectEqual(null, cache.get(arena.allocator(), .{
.url = "https://example.com",
.timestamp = now,
.request_headers = &.{
.{ .name = "Accept-Encoding", .value = "br" },
},
}));
try testing.expectEqual(null, cache.get(arena.allocator(), .{
.url = "https://example.com",
.timestamp = now,
.request_headers = &.{},
}));
const result2 = cache.get(arena.allocator(), .{
.url = "https://example.com",
.timestamp = now,
.request_headers = &.{
.{ .name = "Accept-Encoding", .value = "gzip" },
},
}) orelse return error.CacheMiss;
result2.data.file.file.close();
}
test "FsCache: vary multiple headers" {
var setup = try setupCache();
defer {
setup.cache.deinit();
setup.tmp.cleanup();
}
const cache = &setup.cache;
var arena = std.heap.ArenaAllocator.init(testing.allocator);
defer arena.deinit();
const now = std.time.timestamp();
const meta = CachedMetadata{
.url = "https://example.com",
.content_type = "text/html",
.status = 200,
.stored_at = now,
.age_at_store = 0,
.cache_control = .{ .max_age = 600 },
.headers = &.{},
.vary_headers = &.{
.{ .name = "Accept-Encoding", .value = "gzip" },
.{ .name = "Accept-Language", .value = "en" },
},
};
try cache.put(meta, "hello world");
const result = cache.get(arena.allocator(), .{
.url = "https://example.com",
.timestamp = now,
.request_headers = &.{
.{ .name = "Accept-Encoding", .value = "gzip" },
.{ .name = "Accept-Language", .value = "en" },
},
}) orelse return error.CacheMiss;
result.data.file.file.close();
try testing.expectEqual(null, cache.get(arena.allocator(), .{
.url = "https://example.com",
.timestamp = now,
.request_headers = &.{
.{ .name = "Accept-Encoding", .value = "gzip" },
.{ .name = "Accept-Language", .value = "fr" },
},
}));
}

View File

@@ -79,7 +79,7 @@ pub const Headers = struct {
self.headers = updated_headers;
}
pub fn parseHeader(header_str: []const u8) ?Header {
fn parseHeader(header_str: []const u8) ?Header {
const colon_pos = std.mem.indexOfScalar(u8, header_str, ':') orelse return null;
const name = std.mem.trim(u8, header_str[0..colon_pos], " \t");
@@ -88,9 +88,22 @@ pub const Headers = struct {
return .{ .name = name, .value = value };
}
pub fn iterator(self: Headers) HeaderIterator {
return .{ .curl_slist = .{ .header = self.headers } };
pub fn iterator(self: *Headers) Iterator {
return .{
.header = self.headers,
};
}
const Iterator = struct {
header: [*c]libcurl.CurlSList,
pub fn next(self: *Iterator) ?Header {
const h = self.header orelse return null;
self.header = h.*.next;
return parseHeader(std.mem.span(@as([*:0]const u8, @ptrCast(h.*.data))));
}
};
};
// In normal cases, the header iterator comes from the curl linked list.
@@ -99,7 +112,6 @@ pub const Headers = struct {
// This union, is an iterator that exposes the same API for either case.
pub const HeaderIterator = union(enum) {
curl: CurlHeaderIterator,
curl_slist: CurlSListIterator,
list: ListHeaderIterator,
pub fn next(self: *HeaderIterator) ?Header {
@@ -108,19 +120,6 @@ pub const HeaderIterator = union(enum) {
}
}
pub fn collect(self: *HeaderIterator, allocator: std.mem.Allocator) !std.ArrayList(Header) {
var list: std.ArrayList(Header) = .empty;
while (self.next()) |hdr| {
try list.append(allocator, .{
.name = try allocator.dupe(u8, hdr.name),
.value = try allocator.dupe(u8, hdr.value),
});
}
return list;
}
const CurlHeaderIterator = struct {
conn: *const Connection,
prev: ?*libcurl.CurlHeader = null,
@@ -137,16 +136,6 @@ pub const HeaderIterator = union(enum) {
}
};
const CurlSListIterator = struct {
header: [*c]libcurl.CurlSList,
pub fn next(self: *CurlSListIterator) ?Header {
const h = self.header orelse return null;
self.header = h.*.next;
return Headers.parseHeader(std.mem.span(@as([*:0]const u8, @ptrCast(h.*.data))));
}
};
const ListHeaderIterator = struct {
index: usize = 0,
list: []const Header,