Compare commits

..

1 Commits

Author SHA1 Message Date
Karl Seguin
c3c465347d Provide a failing callback to ValueSerializer for host objects
V8 needs our help when serializing host (e.g. a Zig dom instance) objects. We
don't currently have this implemented, so this provides the callback that throws
an error. Our wrapper returns Nothing when no callback is provided, which v8
doesn't allow (via assertion).
2026-03-31 16:13:55 +08:00
16 changed files with 220 additions and 1217 deletions

View File

@@ -55,7 +55,7 @@ pub fn init(allocator: Allocator, config: *const Config) !*App {
.arena_pool = undefined,
};
app.network = try Network.init(allocator, app, config);
app.network = try Network.init(allocator, config);
errdefer app.network.deinit();
app.platform = try Platform.init();

View File

@@ -157,13 +157,6 @@ pub fn userAgentSuffix(self: *const Config) ?[]const u8 {
};
}
pub fn cacheDir(self: *const Config) ?[]const u8 {
return switch (self.mode) {
inline .serve, .fetch, .mcp => |opts| opts.common.cache_dir,
else => null,
};
}
pub fn cdpTimeout(self: *const Config) usize {
return switch (self.mode) {
.serve => |opts| if (opts.timeout > 604_800) 604_800_000 else @as(usize, opts.timeout) * 1000,
@@ -273,7 +266,6 @@ pub const Common = struct {
log_format: ?log.Format = null,
log_filter_scopes: ?[]log.Scope = null,
user_agent_suffix: ?[]const u8 = null,
cache_dir: ?[]const u8 = null,
web_bot_auth_key_file: ?[]const u8 = null,
web_bot_auth_keyid: ?[]const u8 = null,
@@ -1008,14 +1000,5 @@ fn parseCommonArg(
return true;
}
if (std.mem.eql(u8, "--cache_dir", opt)) {
const str = args.next() orelse {
log.fatal(.app, "missing argument value", .{ .arg = "--cache_dir" });
return error.InvalidArgument;
};
common.cache_dir = try allocator.dupe(u8, str);
return true;
}
return false;
}

View File

@@ -32,9 +32,6 @@ const CookieJar = @import("webapi/storage/Cookie.zig").Jar;
const http = @import("../network/http.zig");
const Runtime = @import("../network/Runtime.zig");
const Robots = @import("../network/Robots.zig");
const Cache = @import("../network/cache/Cache.zig");
const CacheMetadata = Cache.CachedMetadata;
const CachedResponse = Cache.CachedResponse;
const IS_DEBUG = builtin.mode == .Debug;
@@ -314,81 +311,7 @@ pub fn request(self: *Client, req: Request) !void {
return self.fetchRobotsThenProcessRequest(robots_url, req);
}
fn serveFromCache(req: Request, cached: *const CachedResponse) !void {
const response = Response.fromCached(req.ctx, cached);
defer switch (cached.data) {
.buffer => |_| {},
.file => |f| f.file.close(),
};
if (req.start_callback) |cb| {
try cb(response);
}
const proceed = try req.header_callback(response);
if (!proceed) {
req.error_callback(req.ctx, error.Abort);
return;
}
switch (cached.data) {
.buffer => |data| {
if (data.len > 0) {
try req.data_callback(response, data);
}
},
.file => |f| {
const file = f.file;
var buf: [1024]u8 = undefined;
var file_reader = file.reader(&buf);
try file_reader.seekTo(f.offset);
const reader = &file_reader.interface;
var read_buf: [1024]u8 = undefined;
var remaining = f.len;
while (remaining > 0) {
const read_len = @min(read_buf.len, remaining);
const n = try reader.readSliceShort(read_buf[0..read_len]);
if (n == 0) break;
remaining -= n;
try req.data_callback(response, read_buf[0..n]);
}
},
}
try req.done_callback(req.ctx);
}
fn processRequest(self: *Client, req: Request) !void {
if (self.network.cache) |*cache| {
if (req.method == .GET) {
const arena = try self.network.app.arena_pool.acquire(.{ .debug = "HttpClient.processRequest.cache" });
defer self.network.app.arena_pool.release(arena);
var iter = req.headers.iterator();
const req_header_list = try iter.collect(arena);
if (cache.get(arena, .{
.url = req.url,
.timestamp = std.time.timestamp(),
.request_headers = req_header_list.items,
})) |cached| {
log.debug(.browser, "http.cache.get", .{
.url = req.url,
.found = true,
.metadata = cached.metadata,
});
defer req.headers.deinit();
return serveFromCache(req, &cached);
} else {
log.debug(.browser, "http.cache.get", .{ .url = req.url, .found = false });
}
}
}
const transfer = try self.makeTransfer(req);
transfer.req.notification.dispatch(.http_request_start, &.{ .transfer = transfer });
@@ -476,10 +399,8 @@ fn fetchRobotsThenProcessRequest(self: *Client, robots_url: [:0]const u8, req: R
try entry.value_ptr.append(self.allocator, req);
}
fn robotsHeaderCallback(response: Response) !bool {
const ctx: *RobotsRequestContext = @ptrCast(@alignCast(response.ctx));
// Robots callbacks only happen on real live requests.
const transfer = response.inner.transfer;
fn robotsHeaderCallback(transfer: *Transfer) !bool {
const ctx: *RobotsRequestContext = @ptrCast(@alignCast(transfer.ctx));
if (transfer.response_header) |hdr| {
log.debug(.browser, "robots status", .{ .status = hdr.status, .robots_url = ctx.robots_url });
@@ -493,8 +414,8 @@ fn robotsHeaderCallback(response: Response) !bool {
return true;
}
fn robotsDataCallback(response: Response, data: []const u8) !void {
const ctx: *RobotsRequestContext = @ptrCast(@alignCast(response.ctx));
fn robotsDataCallback(transfer: *Transfer, data: []const u8) !void {
const ctx: *RobotsRequestContext = @ptrCast(@alignCast(transfer.ctx));
try ctx.buffer.appendSlice(ctx.client.allocator, data);
}
@@ -713,43 +634,13 @@ fn makeTransfer(self: *Client, req: Request) !*Transfer {
.id = id,
.url = req.url,
.req = req,
.ctx = req.ctx,
.client = self,
.max_response_size = self.network.config.httpMaxResponseSize(),
};
return transfer;
}
fn requestFailed(transfer: *Transfer, err: anyerror, comptime execute_callback: bool) void {
if (transfer._notified_fail) {
// we can force a failed request within a callback, which will eventually
// result in this being called again in the more general loop. We do this
// because we can raise a more specific error inside a callback in some cases
return;
}
transfer._notified_fail = true;
transfer.req.notification.dispatch(.http_request_fail, &.{
.transfer = transfer,
.err = err,
});
if (execute_callback) {
transfer.req.error_callback(transfer.req.ctx, err);
} else if (transfer.req.shutdown_callback) |cb| {
cb(transfer.req.ctx);
}
}
// Same restriction as changeProxy. Should be ok since this is only called on
// BrowserContext deinit.
pub fn restoreOriginalProxy(self: *Client) !void {
try self.ensureNoActiveConnection();
self.http_proxy = self.network.config.httpProxy();
self.use_proxy = self.http_proxy != null;
}
fn makeRequest(self: *Client, conn: *http.Connection, transfer: *Transfer) anyerror!void {
{
// Reset per-response state for retries (auth challenge, queue).
@@ -783,7 +674,7 @@ fn makeRequest(self: *Client, conn: *http.Connection, transfer: *Transfer) anyer
self.active += 1;
if (transfer.req.start_callback) |cb| {
cb(Response.fromTransfer(transfer)) catch |err| {
cb(transfer) catch |err| {
transfer.deinit();
return err;
};
@@ -851,10 +742,7 @@ fn processOneMessage(self: *Client, msg: http.Handles.MultiMessage, transfer: *T
// TODO give a way to configure the number of auth retries.
if (transfer._auth_challenge != null and transfer._tries < 10) {
var wait_for_interception = false;
transfer.req.notification.dispatch(
.http_request_auth_required,
&.{ .transfer = transfer, .wait_for_interception = &wait_for_interception },
);
transfer.req.notification.dispatch(.http_request_auth_required, &.{ .transfer = transfer, .wait_for_interception = &wait_for_interception });
if (wait_for_interception) {
self.intercepted += 1;
if (comptime IS_DEBUG) {
@@ -953,11 +841,10 @@ fn processOneMessage(self: *Client, msg: http.Handles.MultiMessage, transfer: *T
}
}
const body = transfer._stream_buffer.items;
// Replay buffered body through user's data_callback.
if (transfer._stream_buffer.items.len > 0) {
try transfer.req.data_callback(Response.fromTransfer(transfer), body);
const body = transfer._stream_buffer.items;
try transfer.req.data_callback(transfer, body);
transfer.req.notification.dispatch(.http_response_data, &.{
.data = body,
@@ -974,21 +861,7 @@ fn processOneMessage(self: *Client, msg: http.Handles.MultiMessage, transfer: *T
// will load more resources.
transfer.releaseConn();
try transfer.req.done_callback(transfer.req.ctx);
if (transfer._pending_cache_metadata) |metadata| {
const cache = &self.network.cache.?;
// TODO: Support Vary Keying
const cache_key = transfer.req.url;
log.debug(.browser, "http cache", .{ .key = cache_key, .metadata = metadata });
cache.put(metadata, body) catch |err| {
log.warn(.http, "cache put failed", .{ .err = err });
};
log.debug(.browser, "http.cache.put", .{ .url = transfer.req.url });
}
try transfer.req.done_callback(transfer.ctx);
transfer.req.notification.dispatch(.http_request_done, &.{
.transfer = transfer,
});
@@ -1066,9 +939,9 @@ pub const Request = struct {
// arbitrary data that can be associated with this request
ctx: *anyopaque = undefined,
start_callback: ?*const fn (response: Response) anyerror!void = null,
header_callback: *const fn (response: Response) anyerror!bool,
data_callback: *const fn (response: Response, data: []const u8) anyerror!void,
start_callback: ?*const fn (transfer: *Transfer) anyerror!void = null,
header_callback: *const fn (transfer: *Transfer) anyerror!bool,
data_callback: *const fn (transfer: *Transfer, data: []const u8) anyerror!void,
done_callback: *const fn (ctx: *anyopaque) anyerror!void,
error_callback: *const fn (ctx: *anyopaque, err: anyerror) void,
shutdown_callback: ?*const fn (ctx: *anyopaque) void = null,
@@ -1094,84 +967,16 @@ pub const Request = struct {
};
};
pub const Response = struct {
ctx: *anyopaque,
inner: union(enum) {
transfer: *Transfer,
cached: *const CachedResponse,
},
pub fn fromTransfer(transfer: *Transfer) Response {
return .{ .ctx = transfer.req.ctx, .inner = .{ .transfer = transfer } };
}
pub fn fromCached(ctx: *anyopaque, resp: *const CachedResponse) Response {
return .{ .ctx = ctx, .inner = .{ .cached = resp } };
}
pub fn status(self: Response) ?u16 {
return switch (self.inner) {
.transfer => |t| if (t.response_header) |rh| rh.status else null,
.cached => |c| c.metadata.status,
};
}
pub fn contentType(self: Response) ?[]const u8 {
return switch (self.inner) {
.transfer => |t| if (t.response_header) |*rh| rh.contentType() else null,
.cached => |c| c.metadata.content_type,
};
}
pub fn contentLength(self: Response) ?u32 {
return switch (self.inner) {
.transfer => |t| t.getContentLength(),
.cached => |c| switch (c.data) {
.buffer => |buf| @intCast(buf.len),
.file => |f| @intCast(f.len),
},
};
}
pub fn redirectCount(self: Response) ?u32 {
return switch (self.inner) {
.transfer => |t| if (t.response_header) |rh| rh.redirect_count else null,
.cached => 0,
};
}
pub fn url(self: Response) [:0]const u8 {
return switch (self.inner) {
.transfer => |t| t.url,
.cached => |c| c.metadata.url,
};
}
pub fn headerIterator(self: Response) HeaderIterator {
return switch (self.inner) {
.transfer => |t| t.responseHeaderIterator(),
.cached => |c| HeaderIterator{ .list = .{ .list = c.metadata.headers } },
};
}
pub fn abort(self: Response, err: anyerror) void {
switch (self.inner) {
.transfer => |t| t.abort(err),
.cached => {},
}
}
};
pub const Transfer = struct {
arena: ArenaAllocator,
id: u32 = 0,
req: Request,
url: [:0]const u8,
ctx: *anyopaque, // copied from req.ctx to make it easier for callback handlers
client: *Client,
// total bytes received in the response, including the response status line,
// the headers, and the [encoded] body.
bytes_received: usize = 0,
_pending_cache_metadata: ?CacheMetadata = null,
aborted: bool = false,
@@ -1260,7 +1065,7 @@ pub const Transfer = struct {
// as abort (doesn't send a notification, doesn't invoke an error callback)
fn kill(self: *Transfer) void {
if (self.req.shutdown_callback) |cb| {
cb(self.req.ctx);
cb(self.ctx);
}
if (self._performing or self.client.performing) {
@@ -1296,7 +1101,7 @@ pub const Transfer = struct {
});
if (execute_callback) {
self.req.error_callback(self.req.ctx, err);
self.req.error_callback(self.ctx, err);
} else if (self.req.shutdown_callback) |cb| {
cb(self.ctx);
}
@@ -1543,60 +1348,11 @@ pub const Transfer = struct {
}
}
const proceed = transfer.req.header_callback(Response.fromTransfer(transfer)) catch |err| {
const proceed = transfer.req.header_callback(transfer) catch |err| {
log.err(.http, "header_callback", .{ .err = err, .req = transfer });
return err;
};
if (transfer.client.network.cache != null and transfer.req.method == .GET) {
const rh = &transfer.response_header.?;
const allocator = transfer.arena.allocator();
const vary = if (conn.getResponseHeader("vary", 0)) |h| h.value else null;
const maybe_cm = try Cache.tryCache(
allocator,
std.time.timestamp(),
transfer.url,
rh.status,
rh.contentType(),
if (conn.getResponseHeader("cache-control", 0)) |h| h.value else null,
vary,
if (conn.getResponseHeader("age", 0)) |h| h.value else null,
conn.getResponseHeader("set-cookie", 0) != null,
conn.getResponseHeader("authorization", 0) != null,
);
if (maybe_cm) |cm| {
transfer._pending_cache_metadata = cm;
var iter = transfer.responseHeaderIterator();
var header_list = try iter.collect(allocator);
const end_of_response = header_list.items.len;
transfer._pending_cache_metadata.?.headers = header_list.items[0..end_of_response];
if (vary) |vary_str| {
var req_it = transfer.req.headers.iterator();
while (req_it.next()) |hdr| {
var vary_iter = std.mem.splitScalar(u8, vary_str, ',');
while (vary_iter.next()) |part| {
const name = std.mem.trim(u8, part, &std.ascii.whitespace);
if (std.ascii.eqlIgnoreCase(hdr.name, name)) {
try header_list.append(allocator, .{
.name = try allocator.dupe(u8, hdr.name),
.value = try allocator.dupe(u8, hdr.value),
});
}
}
}
transfer._pending_cache_metadata.?.vary_headers = header_list.items[end_of_response..];
}
}
}
transfer.req.notification.dispatch(.http_response_header_done, &.{
.transfer = transfer,
});
@@ -1699,7 +1455,7 @@ pub const Transfer = struct {
fn _fulfill(transfer: *Transfer, status: u16, headers: []const http.Header, body: ?[]const u8) !void {
const req = &transfer.req;
if (req.start_callback) |cb| {
try cb(Response.fromTransfer(transfer));
try cb(transfer);
}
transfer.response_header = .{
@@ -1718,13 +1474,13 @@ pub const Transfer = struct {
}
lp.assert(transfer._header_done_called == false, "Transfer.fulfill header_done_called", .{});
if (try req.header_callback(Response.fromTransfer(transfer)) == false) {
if (try req.header_callback(transfer) == false) {
transfer.abort(error.Abort);
return;
}
if (body) |b| {
try req.data_callback(Response.fromTransfer(transfer), b);
try req.data_callback(transfer, b);
}
try req.done_callback(req.ctx);
@@ -1761,10 +1517,10 @@ pub const Transfer = struct {
};
const Noop = struct {
fn headerCallback(_: Response) !bool {
fn headerCallback(_: *Transfer) !bool {
return true;
}
fn dataCallback(_: Response, _: []const u8) !void {}
fn dataCallback(_: *Transfer, _: []const u8) !void {}
fn doneCallback(_: *anyopaque) !void {}
fn errorCallback(_: *anyopaque, _: anyerror) void {}
};

View File

@@ -27,9 +27,6 @@ charset: [41]u8 = default_charset,
charset_len: usize = default_charset_len,
is_default_charset: bool = true,
type_buf: [127]u8 = @splat(0),
sub_type_buf: [127]u8 = @splat(0),
/// String "UTF-8" continued by null characters.
const default_charset = .{ 'U', 'T', 'F', '-', '8' } ++ .{0} ** 36;
const default_charset_len = 5;
@@ -64,10 +61,7 @@ pub const ContentType = union(ContentTypeEnum) {
image_webp: void,
application_json: void,
unknown: void,
other: struct {
type: []const u8,
sub_type: []const u8,
},
other: struct { type: []const u8, sub_type: []const u8 },
};
pub fn contentTypeString(mime: *const Mime) []const u8 {
@@ -118,18 +112,17 @@ fn parseCharset(value: []const u8) error{ CharsetTooBig, Invalid }![]const u8 {
return value;
}
pub fn parse(input: []const u8) !Mime {
pub fn parse(input: []u8) !Mime {
if (input.len > 255) {
return error.TooBig;
}
var buf: [255]u8 = undefined;
const normalized = std.ascii.lowerString(&buf, std.mem.trim(u8, input, &std.ascii.whitespace));
// Zig's trim API is broken. The return type is always `[]const u8`,
// even if the input type is `[]u8`. @constCast is safe here.
var normalized = @constCast(std.mem.trim(u8, input, &std.ascii.whitespace));
_ = std.ascii.lowerString(normalized, normalized);
var mime = Mime{ .content_type = undefined };
const content_type, const type_len = try parseContentType(normalized, &mime.type_buf, &mime.sub_type_buf);
const content_type, const type_len = try parseContentType(normalized);
if (type_len >= normalized.len) {
return .{ .content_type = content_type };
}
@@ -170,12 +163,13 @@ pub fn parse(input: []const u8) !Mime {
}
}
mime.params = params;
mime.charset = charset;
mime.charset_len = charset_len;
mime.content_type = content_type;
mime.is_default_charset = !has_explicit_charset;
return mime;
return .{
.params = params,
.charset = charset,
.charset_len = charset_len,
.content_type = content_type,
.is_default_charset = !has_explicit_charset,
};
}
/// Prescan the first 1024 bytes of an HTML document for a charset declaration.
@@ -401,7 +395,7 @@ pub fn isText(mime: *const Mime) bool {
}
// we expect value to be lowercase
fn parseContentType(value: []const u8, type_buf: []u8, sub_type_buf: []u8) !struct { ContentType, usize } {
fn parseContentType(value: []const u8) !struct { ContentType, usize } {
const end = std.mem.indexOfScalarPos(u8, value, 0, ';') orelse value.len;
const type_name = trimRight(value[0..end]);
const attribute_start = end + 1;
@@ -450,18 +444,10 @@ fn parseContentType(value: []const u8, type_buf: []u8, sub_type_buf: []u8) !stru
return error.Invalid;
}
@memcpy(type_buf[0..main_type.len], main_type);
@memcpy(sub_type_buf[0..sub_type.len], sub_type);
return .{
.{
.other = .{
.type = type_buf[0..main_type.len],
.sub_type = sub_type_buf[0..sub_type.len],
},
},
attribute_start,
};
return .{ .{ .other = .{
.type = main_type,
.sub_type = sub_type,
} }, attribute_start };
}
const VALID_CODEPOINTS = blk: {
@@ -475,13 +461,6 @@ const VALID_CODEPOINTS = blk: {
break :blk v;
};
pub fn typeString(self: *const Mime) []const u8 {
return switch (self.content_type) {
.other => |o| o.type[0..o.type_len],
else => "",
};
}
fn validType(value: []const u8) bool {
for (value) |b| {
if (VALID_CODEPOINTS[b] == false) {

View File

@@ -854,10 +854,12 @@ fn notifyParentLoadComplete(self: *Page) void {
parent.iframeCompletedLoading(self.iframe.?);
}
fn pageHeaderDoneCallback(response: HttpClient.Response) !bool {
var self: *Page = @ptrCast(@alignCast(response.ctx));
fn pageHeaderDoneCallback(transfer: *HttpClient.Transfer) !bool {
var self: *Page = @ptrCast(@alignCast(transfer.ctx));
const response_url = response.url();
const header = &transfer.response_header.?;
const response_url = std.mem.span(header.url);
if (std.mem.eql(u8, response_url, self.url) == false) {
// would be different than self.url in the case of a redirect
self.url = try self.arena.dupeZ(u8, response_url);
@@ -871,8 +873,8 @@ fn pageHeaderDoneCallback(response: HttpClient.Response) !bool {
if (comptime IS_DEBUG) {
log.debug(.page, "navigate header", .{
.url = self.url,
.status = response.status(),
.content_type = response.contentType(),
.status = header.status,
.content_type = header.contentType(),
.type = self._type,
});
}
@@ -880,14 +882,14 @@ fn pageHeaderDoneCallback(response: HttpClient.Response) !bool {
return true;
}
fn pageDataCallback(response: HttpClient.Response, data: []const u8) !void {
var self: *Page = @ptrCast(@alignCast(response.ctx));
fn pageDataCallback(transfer: *HttpClient.Transfer, data: []const u8) !void {
var self: *Page = @ptrCast(@alignCast(transfer.ctx));
if (self._parse_state == .pre) {
// we lazily do this, because we might need the first chunk of data
// to sniff the content type
var mime: Mime = blk: {
if (response.contentType()) |ct| {
if (transfer.response_header.?.contentType()) |ct| {
break :blk try Mime.parse(ct);
}
break :blk Mime.sniff(data);

View File

@@ -694,86 +694,82 @@ pub const Script = struct {
self.manager.page.releaseArena(self.arena);
}
fn startCallback(response: HttpClient.Response) !void {
log.debug(.http, "script fetch start", .{ .req = response });
fn startCallback(transfer: *HttpClient.Transfer) !void {
log.debug(.http, "script fetch start", .{ .req = transfer });
}
fn headerCallback(response: HttpClient.Response) !bool {
const self: *Script = @ptrCast(@alignCast(response.ctx));
self.status = response.status().?;
if (response.status() != 200) {
fn headerCallback(transfer: *HttpClient.Transfer) !bool {
const self: *Script = @ptrCast(@alignCast(transfer.ctx));
const header = &transfer.response_header.?;
self.status = header.status;
if (header.status != 200) {
log.info(.http, "script header", .{
.req = response,
.status = response.status(),
.content_type = response.contentType(),
.req = transfer,
.status = header.status,
.content_type = header.contentType(),
});
return false;
}
if (comptime IS_DEBUG) {
log.debug(.http, "script header", .{
.req = response,
.status = response.status(),
.content_type = response.contentType(),
.req = transfer,
.status = header.status,
.content_type = header.contentType(),
});
}
switch (response.inner) {
.transfer => |transfer| {
// temp debug, trying to figure out why the next assert sometimes
// fails. Is the buffer just corrupt or is headerCallback really
// being called twice?
lp.assert(self.header_callback_called == false, "ScriptManager.Header recall", .{
.m = @tagName(std.meta.activeTag(self.mode)),
.a1 = self.debug_transfer_id,
.a2 = self.debug_transfer_tries,
.a3 = self.debug_transfer_aborted,
.a4 = self.debug_transfer_bytes_received,
.a5 = self.debug_transfer_notified_fail,
.a7 = self.debug_transfer_intercept_state,
.a8 = self.debug_transfer_auth_challenge,
.a9 = self.debug_transfer_easy_id,
.b1 = transfer.id,
.b2 = transfer._tries,
.b3 = transfer.aborted,
.b4 = transfer.bytes_received,
.b5 = transfer._notified_fail,
.b7 = @intFromEnum(transfer._intercept_state),
.b8 = transfer._auth_challenge != null,
.b9 = if (transfer._conn) |c| @intFromPtr(c._easy) else 0,
});
self.header_callback_called = true;
self.debug_transfer_id = transfer.id;
self.debug_transfer_tries = transfer._tries;
self.debug_transfer_aborted = transfer.aborted;
self.debug_transfer_bytes_received = transfer.bytes_received;
self.debug_transfer_notified_fail = transfer._notified_fail;
self.debug_transfer_intercept_state = @intFromEnum(transfer._intercept_state);
self.debug_transfer_auth_challenge = transfer._auth_challenge != null;
self.debug_transfer_easy_id = if (transfer._conn) |c| @intFromPtr(c._easy) else 0;
},
else => {},
{
// temp debug, trying to figure out why the next assert sometimes
// fails. Is the buffer just corrupt or is headerCallback really
// being called twice?
lp.assert(self.header_callback_called == false, "ScriptManager.Header recall", .{
.m = @tagName(std.meta.activeTag(self.mode)),
.a1 = self.debug_transfer_id,
.a2 = self.debug_transfer_tries,
.a3 = self.debug_transfer_aborted,
.a4 = self.debug_transfer_bytes_received,
.a5 = self.debug_transfer_notified_fail,
.a7 = self.debug_transfer_intercept_state,
.a8 = self.debug_transfer_auth_challenge,
.a9 = self.debug_transfer_easy_id,
.b1 = transfer.id,
.b2 = transfer._tries,
.b3 = transfer.aborted,
.b4 = transfer.bytes_received,
.b5 = transfer._notified_fail,
.b7 = @intFromEnum(transfer._intercept_state),
.b8 = transfer._auth_challenge != null,
.b9 = if (transfer._conn) |c| @intFromPtr(c._easy) else 0,
});
self.header_callback_called = true;
self.debug_transfer_id = transfer.id;
self.debug_transfer_tries = transfer._tries;
self.debug_transfer_aborted = transfer.aborted;
self.debug_transfer_bytes_received = transfer.bytes_received;
self.debug_transfer_notified_fail = transfer._notified_fail;
self.debug_transfer_intercept_state = @intFromEnum(transfer._intercept_state);
self.debug_transfer_auth_challenge = transfer._auth_challenge != null;
self.debug_transfer_easy_id = if (transfer._conn) |c| @intFromPtr(c._easy) else 0;
}
lp.assert(self.source.remote.capacity == 0, "ScriptManager.Header buffer", .{ .capacity = self.source.remote.capacity });
var buffer: std.ArrayList(u8) = .empty;
if (response.contentLength()) |cl| {
if (transfer.getContentLength()) |cl| {
try buffer.ensureTotalCapacity(self.arena, cl);
}
self.source = .{ .remote = buffer };
return true;
}
fn dataCallback(response: HttpClient.Response, data: []const u8) !void {
const self: *Script = @ptrCast(@alignCast(response.ctx));
self._dataCallback(response, data) catch |err| {
log.err(.http, "SM.dataCallback", .{ .err = err, .transfer = response, .len = data.len });
fn dataCallback(transfer: *HttpClient.Transfer, data: []const u8) !void {
const self: *Script = @ptrCast(@alignCast(transfer.ctx));
self._dataCallback(transfer, data) catch |err| {
log.err(.http, "SM.dataCallback", .{ .err = err, .transfer = transfer, .len = data.len });
return err;
};
}
fn _dataCallback(self: *Script, _: HttpClient.Response, data: []const u8) !void {
fn _dataCallback(self: *Script, _: *HttpClient.Transfer, data: []const u8) !void {
try self.source.remote.appendSlice(self.arena, data);
}

View File

@@ -245,15 +245,37 @@ pub fn toJson(self: Value, allocator: Allocator) ![]u8 {
return js.String.toSliceWithAlloc(.{ .local = local, .handle = str_handle }, allocator);
}
// Currently does not support host objects (Blob, File, etc.) or transferables
// which require delegate callbacks to be implemented.
// Throws a DataCloneError for host objects (Blob, File, etc.) that cannot be serialized.
// Does not support transferables which require additional delegate callbacks.
pub fn structuredClone(self: Value) !Value {
const local = self.local;
const v8_context = local.handle;
const v8_isolate = local.isolate.handle;
const SerializerDelegate = struct {
// Called when V8 encounters a host object it doesn't know how to serialize.
// Returns false to indicate the object cannot be cloned, and throws a DataCloneError.
// V8 asserts has_exception() after this returns false, so we must throw here.
fn writeHostObject(_: ?*anyopaque, isolate: ?*v8.Isolate, _: ?*const v8.Object) callconv(.c) v8.MaybeBool {
const iso = isolate orelse return .{ .has_value = true, .value = false };
const message = v8.v8__String__NewFromUtf8(iso, "The object cannot be cloned.", v8.kNormal, -1);
const error_value = v8.v8__Exception__Error(message) orelse return .{ .has_value = true, .value = false };
_ = v8.v8__Isolate__ThrowException(iso, error_value);
return .{ .has_value = true, .value = false };
}
// Called by V8 to report serialization errors. The exception should already be thrown.
fn throwDataCloneError(_: ?*anyopaque, _: ?*const v8.String) callconv(.c) void {}
};
const size, const data = blk: {
const serializer = v8.v8__ValueSerializer__New(v8_isolate, null) orelse return error.JsException;
const serializer = v8.v8__ValueSerializer__New(v8_isolate, &.{
.data = null,
.get_shared_array_buffer_id = null,
.write_host_object = SerializerDelegate.writeHostObject,
.throw_data_clone_error = SerializerDelegate.throwDataCloneError,
}) orelse return error.JsException;
defer v8.v8__ValueSerializer__DELETE(serializer);
var write_result: v8.MaybeBool = undefined;

View File

@@ -1,5 +1,6 @@
<!DOCTYPE html>
<script src="../testing.js"></script>
<body></body>
<script id=window>
testing.expectEqual(window, globalThis);
@@ -260,6 +261,28 @@
}
testing.expectEqual(true, threw);
}
// Host objects (DOM elements) cannot be cloned - should throw, not crash
{
let threw = false;
try {
structuredClone(document.body);
} catch (err) {
threw = true;
}
testing.expectEqual(true, threw);
}
// Objects containing host objects cannot be cloned - should throw, not crash
{
let threw = false;
try {
structuredClone({ element: document.body });
} catch (err) {
threw = true;
}
testing.expectEqual(true, threw);
}
</script>
<script id=cached_getter_wrong_this>

View File

@@ -127,16 +127,16 @@ fn handleBlobUrl(url: []const u8, resolver: js.PromiseResolver, page: *Page) !js
return resolver.promise();
}
fn httpStartCallback(response: HttpClient.Response) !void {
const self: *Fetch = @ptrCast(@alignCast(response.ctx));
fn httpStartCallback(transfer: *HttpClient.Transfer) !void {
const self: *Fetch = @ptrCast(@alignCast(transfer.ctx));
if (comptime IS_DEBUG) {
log.debug(.http, "request start", .{ .url = self._url, .source = "fetch" });
}
self._response._http_response = response;
self._response._transfer = transfer;
}
fn httpHeaderDoneCallback(response: HttpClient.Response) !bool {
const self: *Fetch = @ptrCast(@alignCast(response.ctx));
fn httpHeaderDoneCallback(transfer: *HttpClient.Transfer) !bool {
const self: *Fetch = @ptrCast(@alignCast(transfer.ctx));
if (self._signal) |signal| {
if (signal._aborted) {
@@ -145,24 +145,25 @@ fn httpHeaderDoneCallback(response: HttpClient.Response) !bool {
}
const arena = self._response._arena;
if (response.contentLength()) |cl| {
if (transfer.getContentLength()) |cl| {
try self._buf.ensureTotalCapacity(arena, cl);
}
const res = self._response;
const header = transfer.response_header.?;
if (comptime IS_DEBUG) {
log.debug(.http, "request header", .{
.source = "fetch",
.url = self._url,
.status = response.status(),
.status = header.status,
});
}
res._status = response.status().?;
res._status_text = std.http.Status.phrase(@enumFromInt(response.status().?)) orelse "";
res._url = try arena.dupeZ(u8, response.url());
res._is_redirected = response.redirectCount().? > 0;
res._status = header.status;
res._status_text = std.http.Status.phrase(@enumFromInt(header.status)) orelse "";
res._url = try arena.dupeZ(u8, std.mem.span(header.url));
res._is_redirected = header.redirect_count > 0;
// Determine response type based on origin comparison
const page_origin = URL.getOrigin(arena, self._page.url) catch null;
@@ -182,7 +183,7 @@ fn httpHeaderDoneCallback(response: HttpClient.Response) !bool {
res._type = .basic;
}
var it = response.headerIterator();
var it = transfer.responseHeaderIterator();
while (it.next()) |hdr| {
try res._headers.append(hdr.name, hdr.value, self._page);
}
@@ -190,8 +191,8 @@ fn httpHeaderDoneCallback(response: HttpClient.Response) !bool {
return true;
}
fn httpDataCallback(response: HttpClient.Response, data: []const u8) !void {
const self: *Fetch = @ptrCast(@alignCast(response.ctx));
fn httpDataCallback(transfer: *HttpClient.Transfer, data: []const u8) !void {
const self: *Fetch = @ptrCast(@alignCast(transfer.ctx));
// Check if aborted
if (self._signal) |signal| {
@@ -206,7 +207,7 @@ fn httpDataCallback(response: HttpClient.Response, data: []const u8) !void {
fn httpDoneCallback(ctx: *anyopaque) !void {
const self: *Fetch = @ptrCast(@alignCast(ctx));
var response = self._response;
response._http_response = null;
response._transfer = null;
response._body = self._buf.items;
log.info(.http, "request complete", .{
@@ -229,7 +230,7 @@ fn httpErrorCallback(ctx: *anyopaque, _: anyerror) void {
const self: *Fetch = @ptrCast(@alignCast(ctx));
var response = self._response;
response._http_response = null;
response._transfer = null;
// the response is only passed on v8 on success, if we're here, it's safe to
// clear this. (defer since `self is in the response's arena).
@@ -255,7 +256,7 @@ fn httpShutdownCallback(ctx: *anyopaque) void {
if (self._owns_response) {
var response = self._response;
response._http_response = null;
response._transfer = null;
response.deinit(self._page._session);
// Do not access `self` after this point: the Fetch struct was
// allocated from response._arena which has been released.

View File

@@ -48,7 +48,7 @@ _type: Type,
_status_text: []const u8,
_url: [:0]const u8,
_is_redirected: bool,
_http_response: ?HttpClient.Response = null,
_transfer: ?*HttpClient.Transfer = null,
const InitOpts = struct {
status: u16 = 200,
@@ -81,9 +81,9 @@ pub fn init(body_: ?[]const u8, opts_: ?InitOpts, page: *Page) !*Response {
}
pub fn deinit(self: *Response, session: *Session) void {
if (self._http_response) |resp| {
resp.abort(error.Abort);
self._http_response = null;
if (self._transfer) |transfer| {
transfer.abort(error.Abort);
self._transfer = null;
}
session.releaseArena(self._arena);
}
@@ -191,7 +191,7 @@ pub fn clone(self: *const Response, page: *Page) !*Response {
._type = self._type,
._is_redirected = self._is_redirected,
._headers = try Headers.init(.{ .obj = self._headers }, page),
._http_response = null,
._transfer = null,
};
return cloned;
}

View File

@@ -43,7 +43,7 @@ _rc: lp.RC(u8) = .{},
_page: *Page,
_proto: *XMLHttpRequestEventTarget,
_arena: Allocator,
_http_response: ?HttpClient.Response = null,
_transfer: ?*HttpClient.Transfer = null,
_url: [:0]const u8 = "",
_method: net_http.Method = .GET,
@@ -99,9 +99,9 @@ pub fn init(page: *Page) !*XMLHttpRequest {
}
pub fn deinit(self: *XMLHttpRequest, session: *Session) void {
if (self._http_response) |resp| {
resp.abort(error.Abort);
self._http_response = null;
if (self._transfer) |transfer| {
transfer.abort(error.Abort);
self._transfer = null;
}
if (self._on_ready_state_change) |func| {
@@ -175,9 +175,9 @@ pub fn setWithCredentials(self: *XMLHttpRequest, value: bool) !void {
// TODO: url should be a union, as it can be multiple things
pub fn open(self: *XMLHttpRequest, method_: []const u8, url: [:0]const u8) !void {
// Abort any in-progress request
if (self._http_response) |transfer| {
if (self._transfer) |transfer| {
transfer.abort(error.Abort);
self._http_response = null;
self._transfer = null;
}
// Reset internal state
@@ -387,33 +387,35 @@ pub fn getResponseXML(self: *XMLHttpRequest, page: *Page) !?*Node.Document {
};
}
fn httpStartCallback(response: HttpClient.Response) !void {
const self: *XMLHttpRequest = @ptrCast(@alignCast(response.ctx));
fn httpStartCallback(transfer: *HttpClient.Transfer) !void {
const self: *XMLHttpRequest = @ptrCast(@alignCast(transfer.ctx));
if (comptime IS_DEBUG) {
log.debug(.http, "request start", .{ .method = self._method, .url = self._url, .source = "xhr" });
}
self._http_response = response;
self._transfer = transfer;
self.acquireRef();
}
fn httpHeaderCallback(response: HttpClient.Response, header: net_http.Header) !void {
const self: *XMLHttpRequest = @ptrCast(@alignCast(response.ctx));
fn httpHeaderCallback(transfer: *HttpClient.Transfer, header: net_http.Header) !void {
const self: *XMLHttpRequest = @ptrCast(@alignCast(transfer.ctx));
const joined = try std.fmt.allocPrint(self._arena, "{s}: {s}", .{ header.name, header.value });
try self._response_headers.append(self._arena, joined);
}
fn httpHeaderDoneCallback(response: HttpClient.Response) !bool {
const self: *XMLHttpRequest = @ptrCast(@alignCast(response.ctx));
fn httpHeaderDoneCallback(transfer: *HttpClient.Transfer) !bool {
const self: *XMLHttpRequest = @ptrCast(@alignCast(transfer.ctx));
const header = &transfer.response_header.?;
if (comptime IS_DEBUG) {
log.debug(.http, "request header", .{
.source = "xhr",
.url = self._url,
.status = response.status(),
.status = header.status,
});
}
if (response.contentType()) |ct| {
if (header.contentType()) |ct| {
self._response_mime = Mime.parse(ct) catch |e| {
log.info(.http, "invalid content type", .{
.content_Type = ct,
@@ -424,18 +426,18 @@ fn httpHeaderDoneCallback(response: HttpClient.Response) !bool {
};
}
var it = response.headerIterator();
var it = transfer.responseHeaderIterator();
while (it.next()) |hdr| {
const joined = try std.fmt.allocPrint(self._arena, "{s}: {s}", .{ hdr.name, hdr.value });
try self._response_headers.append(self._arena, joined);
}
self._response_status = response.status().?;
if (response.contentLength()) |cl| {
self._response_status = header.status;
if (transfer.getContentLength()) |cl| {
self._response_len = cl;
try self._response_data.ensureTotalCapacity(self._arena, cl);
}
self._response_url = try self._arena.dupeZ(u8, response.url());
self._response_url = try self._arena.dupeZ(u8, std.mem.span(header.url));
const page = self._page;
@@ -450,8 +452,8 @@ fn httpHeaderDoneCallback(response: HttpClient.Response) !bool {
return true;
}
fn httpDataCallback(response: HttpClient.Response, data: []const u8) !void {
const self: *XMLHttpRequest = @ptrCast(@alignCast(response.ctx));
fn httpDataCallback(transfer: *HttpClient.Transfer, data: []const u8) !void {
const self: *XMLHttpRequest = @ptrCast(@alignCast(transfer.ctx));
try self._response_data.appendSlice(self._arena, data);
const page = self._page;
@@ -474,7 +476,7 @@ fn httpDoneCallback(ctx: *anyopaque) !void {
// Not that the request is done, the http/client will free the transfer
// object. It isn't safe to keep it around.
self._http_response = null;
self._transfer = null;
const page = self._page;
@@ -497,22 +499,22 @@ fn httpErrorCallback(ctx: *anyopaque, err: anyerror) void {
const self: *XMLHttpRequest = @ptrCast(@alignCast(ctx));
// http client will close it after an error, it isn't safe to keep around
self.handleError(err);
if (self._http_response != null) {
self._http_response = null;
if (self._transfer != null) {
self._transfer = null;
self.releaseRef(self._page._session);
}
}
fn httpShutdownCallback(ctx: *anyopaque) void {
const self: *XMLHttpRequest = @ptrCast(@alignCast(ctx));
self._http_response = null;
self._transfer = null;
}
pub fn abort(self: *XMLHttpRequest) void {
self.handleError(error.Abort);
if (self._http_response) |resp| {
self._http_response = null;
resp.abort(error.Abort);
if (self._transfer) |transfer| {
self._transfer = null;
transfer.abort(error.Abort);
self.releaseRef(self._page._session);
}
}

View File

@@ -39,7 +39,6 @@ pub const Scope = enum {
telemetry,
unknown_prop,
mcp,
cache,
};
const Opts = struct {

View File

@@ -29,9 +29,7 @@ const libcurl = @import("../sys/libcurl.zig");
const net_http = @import("http.zig");
const RobotStore = @import("Robots.zig").RobotStore;
const WebBotAuth = @import("WebBotAuth.zig");
const Cache = @import("cache/Cache.zig");
const App = @import("../App.zig");
const Runtime = @This();
const Listener = struct {
@@ -47,12 +45,10 @@ const MAX_TICK_CALLBACKS = 16;
allocator: Allocator,
app: *App,
config: *const Config,
ca_blob: ?net_http.Blob,
robot_store: RobotStore,
web_bot_auth: ?WebBotAuth,
cache: ?Cache,
connections: []net_http.Connection,
available: std.DoublyLinkedList = .{},
@@ -204,7 +200,7 @@ fn globalDeinit() void {
libcurl.curl_global_cleanup();
}
pub fn init(allocator: Allocator, app: *App, config: *const Config) !Runtime {
pub fn init(allocator: Allocator, config: *const Config) !Runtime {
globalInit(allocator);
errdefer globalDeinit();
@@ -237,11 +233,6 @@ pub fn init(allocator: Allocator, app: *App, config: *const Config) !Runtime {
else
null;
const cache = if (config.cacheDir()) |cache_dir_path|
Cache{ .kind = .{ .fs = try .init(cache_dir_path) } }
else
null;
return .{
.allocator = allocator,
.config = config,
@@ -253,10 +244,8 @@ pub fn init(allocator: Allocator, app: *App, config: *const Config) !Runtime {
.available = available,
.connections = connections,
.app = app,
.robot_store = RobotStore.init(allocator),
.web_bot_auth = web_bot_auth,
.cache = cache,
};
}
@@ -289,8 +278,6 @@ pub fn deinit(self: *Runtime) void {
wba.deinit(self.allocator);
}
if (self.cache) |*cache| cache.deinit();
globalDeinit();
}

View File

@@ -1,156 +0,0 @@
// Copyright (C) 2023-2026 Lightpanda (Selecy SAS)
//
// Francis Bouvier <francis@lightpanda.io>
// Pierre Tachoire <pierre@lightpanda.io>
//
// This program is free software: you can redistribute it and/or modify
// it under the terms of the GNU Affero General Public License as
// published by the Free Software Foundation, either version 3 of the
// License, or (at your option) any later version.
//
// This program is distributed in the hope that it will be useful,
// but WITHOUT ANY WARRANTY; without even the implied warranty of
// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
// GNU Affero General Public License for more details.
//
// You should have received a copy of the GNU Affero General Public License
// along with this program. If not, see <https://www.gnu.org/licenses/>.
const std = @import("std");
const Http = @import("../http.zig");
const FsCache = @import("FsCache.zig");
/// A browser-wide cache for resources across the network.
/// This mostly conforms to RFC9111 with regards to caching behavior.
pub const Cache = @This();
kind: union(enum) {
fs: FsCache,
},
pub fn deinit(self: *Cache) void {
return switch (self.kind) {
inline else => |*c| c.deinit(),
};
}
pub fn get(self: *Cache, arena: std.mem.Allocator, req: CacheRequest) ?CachedResponse {
return switch (self.kind) {
inline else => |*c| c.get(arena, req),
};
}
pub fn put(self: *Cache, metadata: CachedMetadata, body: []const u8) !void {
return switch (self.kind) {
inline else => |*c| c.put(metadata, body),
};
}
pub const CacheControl = struct {
max_age: u64,
pub fn parse(value: []const u8) ?CacheControl {
var cc: CacheControl = .{ .max_age = undefined };
var max_age_set = false;
var max_s_age_set = false;
var is_public = false;
var iter = std.mem.splitScalar(u8, value, ',');
while (iter.next()) |part| {
const directive = std.mem.trim(u8, part, &std.ascii.whitespace);
if (std.ascii.eqlIgnoreCase(directive, "no-store")) {
return null;
} else if (std.ascii.eqlIgnoreCase(directive, "no-cache")) {
return null;
} else if (std.ascii.eqlIgnoreCase(directive, "public")) {
is_public = true;
} else if (std.ascii.startsWithIgnoreCase(directive, "max-age=")) {
if (!max_s_age_set) {
if (std.fmt.parseInt(u64, directive[8..], 10) catch null) |max_age| {
cc.max_age = max_age;
max_age_set = true;
}
}
} else if (std.ascii.startsWithIgnoreCase(directive, "s-maxage=")) {
if (std.fmt.parseInt(u64, directive[9..], 10) catch null) |max_age| {
cc.max_age = max_age;
max_age_set = true;
max_s_age_set = true;
}
}
}
if (!max_age_set) return null;
if (!is_public) return null;
if (cc.max_age == 0) return null;
return cc;
}
};
pub const CachedMetadata = struct {
url: [:0]const u8,
content_type: []const u8,
status: u16,
stored_at: i64,
age_at_store: u64,
cache_control: CacheControl,
/// Response Headers
headers: []const Http.Header,
/// These are Request Headers used by Vary.
vary_headers: []const Http.Header,
};
pub const CacheRequest = struct {
url: []const u8,
timestamp: i64,
request_headers: []const Http.Header,
};
pub const CachedData = union(enum) {
buffer: []const u8,
file: struct {
file: std.fs.File,
offset: usize,
len: usize,
},
};
pub const CachedResponse = struct {
metadata: CachedMetadata,
data: CachedData,
};
pub fn tryCache(
arena: std.mem.Allocator,
timestamp: i64,
url: [:0]const u8,
status: u16,
content_type: ?[]const u8,
cache_control: ?[]const u8,
vary: ?[]const u8,
age: ?[]const u8,
has_set_cookie: bool,
has_authorization: bool,
) !?CachedMetadata {
if (status != 200) return null;
if (has_set_cookie) return null;
if (has_authorization) return null;
if (vary) |v| if (std.mem.eql(u8, v, "*")) return null;
const cc = CacheControl.parse(cache_control orelse return null) orelse return null;
return .{
.url = url,
.content_type = if (content_type) |ct| try arena.dupe(u8, ct) else "application/octet-stream",
.status = status,
.stored_at = timestamp,
.age_at_store = if (age) |a| std.fmt.parseInt(u64, a, 10) catch 0 else 0,
.cache_control = cc,
.headers = &.{},
.vary_headers = &.{},
};
}

View File

@@ -1,580 +0,0 @@
// Copyright (C) 2023-2026 Lightpanda (Selecy SAS)
//
// Francis Bouvier <francis@lightpanda.io>
// Pierre Tachoire <pierre@lightpanda.io>
//
// This program is free software: you can redistribute it and/or modify
// it under the terms of the GNU Affero General Public License as
// published by the Free Software Foundation, either version 3 of the
// License, or (at your option) any later version.
//
// This program is distributed in the hope that it will be useful,
// but WITHOUT ANY WARRANTY; without even the implied warranty of
// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
// GNU Affero General Public License for more details.
//
// You should have received a copy of the GNU Affero General Public License
// along with this program. If not, see <https://www.gnu.org/licenses/>.
const std = @import("std");
const log = @import("../../log.zig");
const Cache = @import("Cache.zig");
const Http = @import("../http.zig");
const CacheRequest = Cache.CacheRequest;
const CachedMetadata = Cache.CachedMetadata;
const CachedResponse = Cache.CachedResponse;
const CACHE_VERSION: usize = 1;
const LOCK_STRIPES = 16;
comptime {
std.debug.assert(std.math.isPowerOfTwo(LOCK_STRIPES));
}
pub const FsCache = @This();
dir: std.fs.Dir,
locks: [LOCK_STRIPES]std.Thread.Mutex = .{std.Thread.Mutex{}} ** LOCK_STRIPES,
const CacheMetadataJson = struct {
version: usize,
metadata: CachedMetadata,
};
fn getLockPtr(self: *FsCache, key: *const [HASHED_KEY_LEN]u8) *std.Thread.Mutex {
const lock_idx = std.hash.Wyhash.hash(0, key[0..]) & (LOCK_STRIPES - 1);
return &self.locks[lock_idx];
}
const BODY_LEN_HEADER_LEN = 8;
const HASHED_KEY_LEN = 64;
const HASHED_PATH_LEN = HASHED_KEY_LEN + 6;
const HASHED_TMP_PATH_LEN = HASHED_PATH_LEN + 4;
fn hashKey(key: []const u8) [HASHED_KEY_LEN]u8 {
var digest: [std.crypto.hash.sha2.Sha256.digest_length]u8 = undefined;
std.crypto.hash.sha2.Sha256.hash(key, &digest, .{});
var hex: [HASHED_KEY_LEN]u8 = undefined;
_ = std.fmt.bufPrint(&hex, "{s}", .{std.fmt.bytesToHex(&digest, .lower)}) catch unreachable;
return hex;
}
fn cachePath(hashed_key: *const [HASHED_KEY_LEN]u8) [HASHED_PATH_LEN]u8 {
var path: [HASHED_PATH_LEN]u8 = undefined;
_ = std.fmt.bufPrint(&path, "{s}.cache", .{hashed_key}) catch unreachable;
return path;
}
fn cacheTmpPath(hashed_key: *const [HASHED_KEY_LEN]u8) [HASHED_TMP_PATH_LEN]u8 {
var path: [HASHED_TMP_PATH_LEN]u8 = undefined;
_ = std.fmt.bufPrint(&path, "{s}.cache.tmp", .{hashed_key}) catch unreachable;
return path;
}
pub fn init(path: []const u8) !FsCache {
const cwd = std.fs.cwd();
cwd.makeDir(path) catch |err| switch (err) {
error.PathAlreadyExists => {},
else => return err,
};
const dir = try cwd.openDir(path, .{ .iterate = true });
return .{ .dir = dir };
}
pub fn deinit(self: *FsCache) void {
self.dir.close();
}
pub fn get(self: *FsCache, arena: std.mem.Allocator, req: CacheRequest) ?Cache.CachedResponse {
const hashed_key = hashKey(req.url);
const cache_p = cachePath(&hashed_key);
const lock = self.getLockPtr(&hashed_key);
lock.lock();
defer lock.unlock();
const file = self.dir.openFile(&cache_p, .{ .mode = .read_only }) catch |e| {
switch (e) {
std.fs.File.OpenError.FileNotFound => {
log.debug(.cache, "miss", .{ .url = req.url, .hash = &hashed_key });
},
else => |err| {
log.warn(.cache, "open file err", .{ .url = req.url, .err = err });
},
}
return null;
};
var cleanup = false;
defer if (cleanup) {
file.close();
self.dir.deleteFile(&cache_p) catch |e| {
log.err(.cache, "clean fail", .{ .url = req.url, .file = &cache_p, .err = e });
};
};
var file_buf: [1024]u8 = undefined;
var len_buf: [BODY_LEN_HEADER_LEN]u8 = undefined;
var file_reader = file.reader(&file_buf);
const file_reader_iface = &file_reader.interface;
file_reader_iface.readSliceAll(&len_buf) catch |e| {
log.warn(.cache, "read header", .{ .url = req.url, .err = e });
cleanup = true;
return null;
};
const body_len = std.mem.readInt(u64, &len_buf, .little);
// Now we read metadata.
file_reader.seekTo(body_len + BODY_LEN_HEADER_LEN) catch |e| {
log.warn(.cache, "seek metadata", .{ .url = req.url, .err = e });
cleanup = true;
return null;
};
var json_reader = std.json.Reader.init(arena, file_reader_iface);
const cache_file: CacheMetadataJson = std.json.parseFromTokenSourceLeaky(
CacheMetadataJson,
arena,
&json_reader,
.{ .allocate = .alloc_always },
) catch |e| {
log.warn(.cache, "metadata parse", .{ .url = req.url, .err = e });
cleanup = true;
return null;
};
if (cache_file.version != CACHE_VERSION) {
log.warn(.cache, "version", .{ .url = req.url, .expected = CACHE_VERSION, .got = cache_file.version });
cleanup = true;
return null;
}
const metadata = cache_file.metadata;
// Check entry expiration.
const now = req.timestamp;
const age = (now - metadata.stored_at) + @as(i64, @intCast(metadata.age_at_store));
if (age < 0 or @as(u64, @intCast(age)) >= metadata.cache_control.max_age) {
log.debug(.cache, "expired", .{ .url = req.url });
cleanup = true;
return null;
}
// If we have Vary headers, ensure they are present & matching.
for (metadata.vary_headers) |vary_hdr| {
const name = vary_hdr.name;
const value = vary_hdr.value;
const incoming = for (req.request_headers) |h| {
if (std.ascii.eqlIgnoreCase(h.name, name)) break h.value;
} else "";
if (!std.ascii.eqlIgnoreCase(value, incoming)) {
log.debug(.cache, "vary mismatch", .{ .url = req.url, .header = name });
return null;
}
}
// On the case of a hash collision.
if (!std.ascii.eqlIgnoreCase(metadata.url, req.url)) {
log.warn(.cache, "collision", .{ .url = req.url, .expected = metadata.url, .got = req.url });
cleanup = true;
return null;
}
return .{
.metadata = metadata,
.data = .{
.file = .{
.file = file,
.offset = BODY_LEN_HEADER_LEN,
.len = body_len,
},
},
};
}
pub fn put(self: *FsCache, meta: CachedMetadata, body: []const u8) !void {
const hashed_key = hashKey(meta.url);
const cache_p = cachePath(&hashed_key);
const cache_tmp_p = cacheTmpPath(&hashed_key);
const lock = self.getLockPtr(&hashed_key);
lock.lock();
defer lock.unlock();
const file = try self.dir.createFile(&cache_tmp_p, .{ .truncate = true });
defer file.close();
var writer_buf: [1024]u8 = undefined;
var file_writer = file.writer(&writer_buf);
var file_writer_iface = &file_writer.interface;
var len_buf: [8]u8 = undefined;
std.mem.writeInt(u64, &len_buf, body.len, .little);
try file_writer_iface.writeAll(&len_buf);
try file_writer_iface.writeAll(body);
try std.json.Stringify.value(
CacheMetadataJson{ .version = CACHE_VERSION, .metadata = meta },
.{ .whitespace = .minified },
file_writer_iface,
);
try file_writer_iface.flush();
try self.dir.rename(&cache_tmp_p, &cache_p);
}
const testing = std.testing;
fn setupCache() !struct { tmp: testing.TmpDir, cache: Cache } {
var tmp = testing.tmpDir(.{});
errdefer tmp.cleanup();
const path = try tmp.dir.realpathAlloc(testing.allocator, ".");
defer testing.allocator.free(path);
return .{
.tmp = tmp,
.cache = Cache{ .kind = .{ .fs = try FsCache.init(path) } },
};
}
test "FsCache: basic put and get" {
var setup = try setupCache();
defer {
setup.cache.deinit();
setup.tmp.cleanup();
}
const cache = &setup.cache;
var arena = std.heap.ArenaAllocator.init(testing.allocator);
defer arena.deinit();
const now = std.time.timestamp();
const meta = CachedMetadata{
.url = "https://example.com",
.content_type = "text/html",
.status = 200,
.stored_at = now,
.age_at_store = 0,
.cache_control = .{ .max_age = 600 },
.headers = &.{},
.vary_headers = &.{},
};
const body = "hello world";
try cache.put(meta, body);
const result = cache.get(
arena.allocator(),
.{
.url = "https://example.com",
.timestamp = now,
.request_headers = &.{},
},
) orelse return error.CacheMiss;
const f = result.data.file;
const file = f.file;
defer file.close();
var buf: [64]u8 = undefined;
var file_reader = file.reader(&buf);
try file_reader.seekTo(f.offset);
const read_buf = try file_reader.interface.readAlloc(testing.allocator, f.len);
defer testing.allocator.free(read_buf);
try testing.expectEqualStrings(body, read_buf);
}
test "FsCache: get expiration" {
var setup = try setupCache();
defer {
setup.cache.deinit();
setup.tmp.cleanup();
}
const cache = &setup.cache;
var arena = std.heap.ArenaAllocator.init(testing.allocator);
defer arena.deinit();
const now = 5000;
const max_age = 1000;
const meta = CachedMetadata{
.url = "https://example.com",
.content_type = "text/html",
.status = 200,
.stored_at = now,
.age_at_store = 900,
.cache_control = .{ .max_age = max_age },
.headers = &.{},
.vary_headers = &.{},
};
const body = "hello world";
try cache.put(meta, body);
const result = cache.get(
arena.allocator(),
.{
.url = "https://example.com",
.timestamp = now + 50,
.request_headers = &.{},
},
) orelse return error.CacheMiss;
result.data.file.file.close();
try testing.expectEqual(null, cache.get(
arena.allocator(),
.{
.url = "https://example.com",
.timestamp = now + 200,
.request_headers = &.{},
},
));
try testing.expectEqual(null, cache.get(
arena.allocator(),
.{
.url = "https://example.com",
.timestamp = now,
.request_headers = &.{},
},
));
}
test "FsCache: put override" {
var setup = try setupCache();
defer {
setup.cache.deinit();
setup.tmp.cleanup();
}
const cache = &setup.cache;
var arena = std.heap.ArenaAllocator.init(testing.allocator);
defer arena.deinit();
{
const now = 5000;
const max_age = 1000;
const meta = CachedMetadata{
.url = "https://example.com",
.content_type = "text/html",
.status = 200,
.stored_at = now,
.age_at_store = 900,
.cache_control = .{ .max_age = max_age },
.headers = &.{},
.vary_headers = &.{},
};
const body = "hello world";
try cache.put(meta, body);
const result = cache.get(
arena.allocator(),
.{
.url = "https://example.com",
.timestamp = now,
.request_headers = &.{},
},
) orelse return error.CacheMiss;
const f = result.data.file;
const file = f.file;
defer file.close();
var buf: [64]u8 = undefined;
var file_reader = file.reader(&buf);
try file_reader.seekTo(f.offset);
const read_buf = try file_reader.interface.readAlloc(testing.allocator, f.len);
defer testing.allocator.free(read_buf);
try testing.expectEqualStrings(body, read_buf);
}
{
const now = 10000;
const max_age = 2000;
const meta = CachedMetadata{
.url = "https://example.com",
.content_type = "text/html",
.status = 200,
.stored_at = now,
.age_at_store = 0,
.cache_control = .{ .max_age = max_age },
.headers = &.{},
.vary_headers = &.{},
};
const body = "goodbye world";
try cache.put(meta, body);
const result = cache.get(
arena.allocator(),
.{
.url = "https://example.com",
.timestamp = now,
.request_headers = &.{},
},
) orelse return error.CacheMiss;
const f = result.data.file;
const file = f.file;
defer file.close();
var buf: [64]u8 = undefined;
var file_reader = file.reader(&buf);
try file_reader.seekTo(f.offset);
const read_buf = try file_reader.interface.readAlloc(testing.allocator, f.len);
defer testing.allocator.free(read_buf);
try testing.expectEqualStrings(body, read_buf);
}
}
test "FsCache: garbage file" {
var setup = try setupCache();
defer {
setup.cache.deinit();
setup.tmp.cleanup();
}
const hashed_key = hashKey("https://example.com");
const cache_p = cachePath(&hashed_key);
const file = try setup.cache.kind.fs.dir.createFile(&cache_p, .{});
try file.writeAll("this is not a valid cache file !@#$%");
file.close();
var arena = std.heap.ArenaAllocator.init(testing.allocator);
defer arena.deinit();
try testing.expectEqual(
null,
setup.cache.get(arena.allocator(), .{
.url = "https://example.com",
.timestamp = 5000,
.request_headers = &.{},
}),
);
}
test "FsCache: vary hit and miss" {
var setup = try setupCache();
defer {
setup.cache.deinit();
setup.tmp.cleanup();
}
const cache = &setup.cache;
var arena = std.heap.ArenaAllocator.init(testing.allocator);
defer arena.deinit();
const now = std.time.timestamp();
const meta = CachedMetadata{
.url = "https://example.com",
.content_type = "text/html",
.status = 200,
.stored_at = now,
.age_at_store = 0,
.cache_control = .{ .max_age = 600 },
.headers = &.{},
.vary_headers = &.{
.{ .name = "Accept-Encoding", .value = "gzip" },
},
};
try cache.put(meta, "hello world");
const result = cache.get(arena.allocator(), .{
.url = "https://example.com",
.timestamp = now,
.request_headers = &.{
.{ .name = "Accept-Encoding", .value = "gzip" },
},
}) orelse return error.CacheMiss;
result.data.file.file.close();
try testing.expectEqual(null, cache.get(arena.allocator(), .{
.url = "https://example.com",
.timestamp = now,
.request_headers = &.{
.{ .name = "Accept-Encoding", .value = "br" },
},
}));
try testing.expectEqual(null, cache.get(arena.allocator(), .{
.url = "https://example.com",
.timestamp = now,
.request_headers = &.{},
}));
const result2 = cache.get(arena.allocator(), .{
.url = "https://example.com",
.timestamp = now,
.request_headers = &.{
.{ .name = "Accept-Encoding", .value = "gzip" },
},
}) orelse return error.CacheMiss;
result2.data.file.file.close();
}
test "FsCache: vary multiple headers" {
var setup = try setupCache();
defer {
setup.cache.deinit();
setup.tmp.cleanup();
}
const cache = &setup.cache;
var arena = std.heap.ArenaAllocator.init(testing.allocator);
defer arena.deinit();
const now = std.time.timestamp();
const meta = CachedMetadata{
.url = "https://example.com",
.content_type = "text/html",
.status = 200,
.stored_at = now,
.age_at_store = 0,
.cache_control = .{ .max_age = 600 },
.headers = &.{},
.vary_headers = &.{
.{ .name = "Accept-Encoding", .value = "gzip" },
.{ .name = "Accept-Language", .value = "en" },
},
};
try cache.put(meta, "hello world");
const result = cache.get(arena.allocator(), .{
.url = "https://example.com",
.timestamp = now,
.request_headers = &.{
.{ .name = "Accept-Encoding", .value = "gzip" },
.{ .name = "Accept-Language", .value = "en" },
},
}) orelse return error.CacheMiss;
result.data.file.file.close();
try testing.expectEqual(null, cache.get(arena.allocator(), .{
.url = "https://example.com",
.timestamp = now,
.request_headers = &.{
.{ .name = "Accept-Encoding", .value = "gzip" },
.{ .name = "Accept-Language", .value = "fr" },
},
}));
}

View File

@@ -79,7 +79,7 @@ pub const Headers = struct {
self.headers = updated_headers;
}
pub fn parseHeader(header_str: []const u8) ?Header {
fn parseHeader(header_str: []const u8) ?Header {
const colon_pos = std.mem.indexOfScalar(u8, header_str, ':') orelse return null;
const name = std.mem.trim(u8, header_str[0..colon_pos], " \t");
@@ -88,9 +88,22 @@ pub const Headers = struct {
return .{ .name = name, .value = value };
}
pub fn iterator(self: Headers) HeaderIterator {
return .{ .curl_slist = .{ .header = self.headers } };
pub fn iterator(self: *Headers) Iterator {
return .{
.header = self.headers,
};
}
const Iterator = struct {
header: [*c]libcurl.CurlSList,
pub fn next(self: *Iterator) ?Header {
const h = self.header orelse return null;
self.header = h.*.next;
return parseHeader(std.mem.span(@as([*:0]const u8, @ptrCast(h.*.data))));
}
};
};
// In normal cases, the header iterator comes from the curl linked list.
@@ -99,7 +112,6 @@ pub const Headers = struct {
// This union, is an iterator that exposes the same API for either case.
pub const HeaderIterator = union(enum) {
curl: CurlHeaderIterator,
curl_slist: CurlSListIterator,
list: ListHeaderIterator,
pub fn next(self: *HeaderIterator) ?Header {
@@ -108,19 +120,6 @@ pub const HeaderIterator = union(enum) {
}
}
pub fn collect(self: *HeaderIterator, allocator: std.mem.Allocator) !std.ArrayList(Header) {
var list: std.ArrayList(Header) = .empty;
while (self.next()) |hdr| {
try list.append(allocator, .{
.name = try allocator.dupe(u8, hdr.name),
.value = try allocator.dupe(u8, hdr.value),
});
}
return list;
}
const CurlHeaderIterator = struct {
conn: *const Connection,
prev: ?*libcurl.CurlHeader = null,
@@ -137,16 +136,6 @@ pub const HeaderIterator = union(enum) {
}
};
const CurlSListIterator = struct {
header: [*c]libcurl.CurlSList,
pub fn next(self: *CurlSListIterator) ?Header {
const h = self.header orelse return null;
self.header = h.*.next;
return Headers.parseHeader(std.mem.span(@as([*:0]const u8, @ptrCast(h.*.data))));
}
};
const ListHeaderIterator = struct {
index: usize = 0,
list: []const Header,