Compare commits

...

37 Commits

Author SHA1 Message Date
Muki Kiboigo
5bb86b77fb add layers to HttpClient 2026-04-01 08:11:52 -07:00
Muki Kiboigo
c02a966776 fix self.req.ctx in HttpClient 2026-04-01 06:04:01 -07:00
Muki Kiboigo
99112d75ff remove cache revalidation stubs 2026-04-01 06:04:01 -07:00
Muki Kiboigo
49b3a039cd add Vary support 2026-04-01 06:04:01 -07:00
Muki Kiboigo
7750125472 add more FsCache tests 2026-04-01 06:04:01 -07:00
Muki Kiboigo
e986878554 always close file on serveFromCache 2026-04-01 06:04:00 -07:00
Muki Kiboigo
f5d36c87cb better logging for cache 2026-04-01 06:04:00 -07:00
Muki Kiboigo
d288a63ebb switch to single file cache 2026-04-01 06:04:00 -07:00
Muki Kiboigo
501c523534 properly deinit cache 2026-04-01 06:04:00 -07:00
Muki Kiboigo
41f9e6521b use wyhash for power of two lock stripes 2026-04-01 06:04:00 -07:00
Muki Kiboigo
4632f241d2 require timestamp passed in with cache request 2026-04-01 06:04:00 -07:00
Muki Kiboigo
7ef024ba05 remove unused cache method on fs cache 2026-04-01 06:04:00 -07:00
Muki Kiboigo
917da08d5d add basic fs cache get/put test 2026-04-01 06:03:59 -07:00
Muki Kiboigo
6a090af250 check age on fs cache get 2026-04-01 06:03:59 -07:00
Muki Kiboigo
92996fe2d5 only store stuff when we know we will cache 2026-04-01 06:03:59 -07:00
Muki Kiboigo
2c7eba9d83 shortcircuit a lot of caching checks 2026-04-01 06:03:59 -07:00
Muki Kiboigo
cfb6eaa412 ensure fs cache file is closed after use 2026-04-01 06:03:59 -07:00
Muki Kiboigo
b26c35aa7e use CacheRequest instead of key 2026-04-01 06:03:59 -07:00
Muki Kiboigo
0a5a9f8012 add striped lock to FsCache 2026-04-01 06:03:59 -07:00
Muki Kiboigo
41cf6efe32 use arena_pool for cache get 2026-04-01 06:03:56 -07:00
Muki Kiboigo
81e23fd020 use writer for fs cache body file 2026-04-01 06:02:34 -07:00
Muki Kiboigo
983b2ab79d use json for fs cache metadata file 2026-04-01 06:02:33 -07:00
Muki Kiboigo
fe7583f931 use sha256 instead of wyhash 2026-04-01 06:02:33 -07:00
Muki Kiboigo
06b3351d97 store type_buf and sub_type_buf in Mime 2026-04-01 06:02:33 -07:00
Muki Kiboigo
43aa267324 add more http caching rules 2026-04-01 06:02:33 -07:00
Muki Kiboigo
69df3c69ae use CacheControl and Vary 2026-04-01 06:02:33 -07:00
Muki Kiboigo
6c81c670d8 cache headers along with response 2026-04-01 06:02:33 -07:00
Muki Kiboigo
b2972a7508 add basic caching support 2026-04-01 06:02:32 -07:00
Muki Kiboigo
a7fa575446 create cache owned by the network struct 2026-04-01 06:02:32 -07:00
Muki Kiboigo
1b6e9df68b use enum approach instead of vtable 2026-04-01 06:02:32 -07:00
Muki Kiboigo
5c545d94d7 add basic FsCache impl 2026-04-01 06:02:32 -07:00
Muki Kiboigo
65307c7a5d add CachedResponse variant to Response 2026-04-01 06:02:32 -07:00
Muki Kiboigo
f12d9b64a6 add headerIterator to Http Response 2026-04-01 06:02:32 -07:00
Muki Kiboigo
f6a27ecbc4 use Response instead of Transfer in callbacks 2026-04-01 06:02:30 -07:00
Muki Kiboigo
489c6e299f allow Mime parse to use []const u8 2026-04-01 06:00:41 -07:00
Muki Kiboigo
fc87bb849c add cache dir to configuration opts 2026-04-01 06:00:40 -07:00
Muki Kiboigo
98532cb5f3 create Cache interface file 2026-04-01 06:00:38 -07:00
17 changed files with 2118 additions and 1149 deletions

View File

@@ -55,7 +55,7 @@ pub fn init(allocator: Allocator, config: *const Config) !*App {
.arena_pool = undefined, .arena_pool = undefined,
}; };
app.network = try Network.init(allocator, config); app.network = try Network.init(allocator, app, config);
errdefer app.network.deinit(); errdefer app.network.deinit();
app.platform = try Platform.init(); app.platform = try Platform.init();

View File

@@ -156,6 +156,13 @@ pub fn userAgentSuffix(self: *const Config) ?[]const u8 {
}; };
} }
pub fn cacheDir(self: *const Config) ?[]const u8 {
return switch (self.mode) {
inline .serve, .fetch, .mcp => |opts| opts.common.cache_dir,
else => null,
};
}
pub fn cdpTimeout(self: *const Config) usize { pub fn cdpTimeout(self: *const Config) usize {
return switch (self.mode) { return switch (self.mode) {
.serve => |opts| if (opts.timeout > 604_800) 604_800_000 else @as(usize, opts.timeout) * 1000, .serve => |opts| if (opts.timeout > 604_800) 604_800_000 else @as(usize, opts.timeout) * 1000,
@@ -267,6 +274,7 @@ pub const Common = struct {
log_format: ?log.Format = null, log_format: ?log.Format = null,
log_filter_scopes: ?[]log.Scope = null, log_filter_scopes: ?[]log.Scope = null,
user_agent_suffix: ?[]const u8 = null, user_agent_suffix: ?[]const u8 = null,
cache_dir: ?[]const u8 = null,
web_bot_auth_key_file: ?[]const u8 = null, web_bot_auth_key_file: ?[]const u8 = null,
web_bot_auth_keyid: ?[]const u8 = null, web_bot_auth_keyid: ?[]const u8 = null,
@@ -1047,5 +1055,14 @@ fn parseCommonArg(
return true; return true;
} }
if (std.mem.eql(u8, "--cache_dir", opt)) {
const str = args.next() orelse {
log.fatal(.app, "missing argument value", .{ .arg = "--cache_dir" });
return error.InvalidArgument;
};
common.cache_dir = try allocator.dupe(u8, str);
return true;
}
return false; return false;
} }

View File

@@ -260,14 +260,14 @@ pub const Client = struct {
fn start(self: *Client) void { fn start(self: *Client) void {
const http = self.http; const http = self.http;
http.cdp_client = .{ http.setCdpClient(.{
.socket = self.ws.socket, .socket = self.ws.socket,
.ctx = self, .ctx = self,
.blocking_read_start = Client.blockingReadStart, .blocking_read_start = Client.blockingReadStart,
.blocking_read = Client.blockingRead, .blocking_read = Client.blockingRead,
.blocking_read_end = Client.blockingReadStop, .blocking_read_end = Client.blockingReadStop,
}; });
defer http.cdp_client = null; defer http.setCdpClient(null);
self.httpLoop(http) catch |err| { self.httpLoop(http) catch |err| {
log.err(.app, "CDP client loop", .{ .err = err }); log.err(.app, "CDP client loop", .{ .err = err });

File diff suppressed because it is too large Load Diff

View File

@@ -27,6 +27,9 @@ charset: [41]u8 = default_charset,
charset_len: usize = default_charset_len, charset_len: usize = default_charset_len,
is_default_charset: bool = true, is_default_charset: bool = true,
type_buf: [127]u8 = @splat(0),
sub_type_buf: [127]u8 = @splat(0),
/// String "UTF-8" continued by null characters. /// String "UTF-8" continued by null characters.
const default_charset = .{ 'U', 'T', 'F', '-', '8' } ++ .{0} ** 36; const default_charset = .{ 'U', 'T', 'F', '-', '8' } ++ .{0} ** 36;
const default_charset_len = 5; const default_charset_len = 5;
@@ -61,7 +64,10 @@ pub const ContentType = union(ContentTypeEnum) {
image_webp: void, image_webp: void,
application_json: void, application_json: void,
unknown: void, unknown: void,
other: struct { type: []const u8, sub_type: []const u8 }, other: struct {
type: []const u8,
sub_type: []const u8,
},
}; };
pub fn contentTypeString(mime: *const Mime) []const u8 { pub fn contentTypeString(mime: *const Mime) []const u8 {
@@ -112,17 +118,18 @@ fn parseCharset(value: []const u8) error{ CharsetTooBig, Invalid }![]const u8 {
return value; return value;
} }
pub fn parse(input: []u8) !Mime { pub fn parse(input: []const u8) !Mime {
if (input.len > 255) { if (input.len > 255) {
return error.TooBig; return error.TooBig;
} }
// Zig's trim API is broken. The return type is always `[]const u8`, var buf: [255]u8 = undefined;
// even if the input type is `[]u8`. @constCast is safe here. const normalized = std.ascii.lowerString(&buf, std.mem.trim(u8, input, &std.ascii.whitespace));
var normalized = @constCast(std.mem.trim(u8, input, &std.ascii.whitespace));
_ = std.ascii.lowerString(normalized, normalized); _ = std.ascii.lowerString(normalized, normalized);
const content_type, const type_len = try parseContentType(normalized); var mime = Mime{ .content_type = undefined };
const content_type, const type_len = try parseContentType(normalized, &mime.type_buf, &mime.sub_type_buf);
if (type_len >= normalized.len) { if (type_len >= normalized.len) {
return .{ .content_type = content_type }; return .{ .content_type = content_type };
} }
@@ -163,13 +170,12 @@ pub fn parse(input: []u8) !Mime {
} }
} }
return .{ mime.params = params;
.params = params, mime.charset = charset;
.charset = charset, mime.charset_len = charset_len;
.charset_len = charset_len, mime.content_type = content_type;
.content_type = content_type, mime.is_default_charset = !has_explicit_charset;
.is_default_charset = !has_explicit_charset, return mime;
};
} }
/// Prescan the first 1024 bytes of an HTML document for a charset declaration. /// Prescan the first 1024 bytes of an HTML document for a charset declaration.
@@ -395,7 +401,7 @@ pub fn isText(mime: *const Mime) bool {
} }
// we expect value to be lowercase // we expect value to be lowercase
fn parseContentType(value: []const u8) !struct { ContentType, usize } { fn parseContentType(value: []const u8, type_buf: []u8, sub_type_buf: []u8) !struct { ContentType, usize } {
const end = std.mem.indexOfScalarPos(u8, value, 0, ';') orelse value.len; const end = std.mem.indexOfScalarPos(u8, value, 0, ';') orelse value.len;
const type_name = trimRight(value[0..end]); const type_name = trimRight(value[0..end]);
const attribute_start = end + 1; const attribute_start = end + 1;
@@ -444,10 +450,18 @@ fn parseContentType(value: []const u8) !struct { ContentType, usize } {
return error.Invalid; return error.Invalid;
} }
return .{ .{ .other = .{ @memcpy(type_buf[0..main_type.len], main_type);
.type = main_type, @memcpy(sub_type_buf[0..sub_type.len], sub_type);
.sub_type = sub_type,
} }, attribute_start }; return .{
.{
.other = .{
.type = type_buf[0..main_type.len],
.sub_type = sub_type_buf[0..sub_type.len],
},
},
attribute_start,
};
} }
const VALID_CODEPOINTS = blk: { const VALID_CODEPOINTS = blk: {
@@ -461,6 +475,13 @@ const VALID_CODEPOINTS = blk: {
break :blk v; break :blk v;
}; };
pub fn typeString(self: *const Mime) []const u8 {
return switch (self.content_type) {
.other => |o| o.type[0..o.type_len],
else => "",
};
}
fn validType(value: []const u8) bool { fn validType(value: []const u8) bool {
for (value) |b| { for (value) |b| {
if (VALID_CODEPOINTS[b] == false) { if (VALID_CODEPOINTS[b] == false) {

View File

@@ -854,12 +854,10 @@ fn notifyParentLoadComplete(self: *Page) void {
parent.iframeCompletedLoading(self.iframe.?); parent.iframeCompletedLoading(self.iframe.?);
} }
fn pageHeaderDoneCallback(transfer: *HttpClient.Transfer) !bool { fn pageHeaderDoneCallback(response: HttpClient.Response) !bool {
var self: *Page = @ptrCast(@alignCast(transfer.ctx)); var self: *Page = @ptrCast(@alignCast(response.ctx));
const header = &transfer.response_header.?; const response_url = response.url();
const response_url = std.mem.span(header.url);
if (std.mem.eql(u8, response_url, self.url) == false) { if (std.mem.eql(u8, response_url, self.url) == false) {
// would be different than self.url in the case of a redirect // would be different than self.url in the case of a redirect
self.url = try self.arena.dupeZ(u8, response_url); self.url = try self.arena.dupeZ(u8, response_url);
@@ -873,8 +871,8 @@ fn pageHeaderDoneCallback(transfer: *HttpClient.Transfer) !bool {
if (comptime IS_DEBUG) { if (comptime IS_DEBUG) {
log.debug(.page, "navigate header", .{ log.debug(.page, "navigate header", .{
.url = self.url, .url = self.url,
.status = header.status, .status = response.status(),
.content_type = header.contentType(), .content_type = response.contentType(),
.type = self._type, .type = self._type,
}); });
} }
@@ -895,14 +893,14 @@ fn pageHeaderDoneCallback(transfer: *HttpClient.Transfer) !bool {
return true; return true;
} }
fn pageDataCallback(transfer: *HttpClient.Transfer, data: []const u8) !void { fn pageDataCallback(response: HttpClient.Response, data: []const u8) !void {
var self: *Page = @ptrCast(@alignCast(transfer.ctx)); var self: *Page = @ptrCast(@alignCast(response.ctx));
if (self._parse_state == .pre) { if (self._parse_state == .pre) {
// we lazily do this, because we might need the first chunk of data // we lazily do this, because we might need the first chunk of data
// to sniff the content type // to sniff the content type
var mime: Mime = blk: { var mime: Mime = blk: {
if (transfer.response_header.?.contentType()) |ct| { if (response.contentType()) |ct| {
break :blk try Mime.parse(ct); break :blk try Mime.parse(ct);
} }
break :blk Mime.sniff(data); break :blk Mime.sniff(data);

View File

@@ -137,7 +137,7 @@ fn _tick(self: *Runner, comptime is_cdp: bool, opts: TickOpts) !CDPTickResult {
.pre, .raw, .text, .image => { .pre, .raw, .text, .image => {
// The main page hasn't started/finished navigating. // The main page hasn't started/finished navigating.
// There's no JS to run, and no reason to run the scheduler. // There's no JS to run, and no reason to run the scheduler.
if (http_client.active == 0 and (comptime is_cdp) == false) { if (http_client.active() == 0 and (comptime is_cdp) == false) {
// haven't started navigating, I guess. // haven't started navigating, I guess.
return .done; return .done;
} }
@@ -171,8 +171,8 @@ fn _tick(self: *Runner, comptime is_cdp: bool, opts: TickOpts) !CDPTickResult {
// Each call to this runs scheduled load events. // Each call to this runs scheduled load events.
try page.dispatchLoad(); try page.dispatchLoad();
const http_active = http_client.active; const http_active = http_client.active();
const total_network_activity = http_active + http_client.intercepted; const total_network_activity = http_active + http_client.intercepted();
if (page._notified_network_almost_idle.check(total_network_activity <= 2)) { if (page._notified_network_almost_idle.check(total_network_activity <= 2)) {
page.notifyNetworkAlmostIdle(); page.notifyNetworkAlmostIdle();
} }
@@ -185,7 +185,7 @@ fn _tick(self: *Runner, comptime is_cdp: bool, opts: TickOpts) !CDPTickResult {
// because is_cdp is true, and that can only be // because is_cdp is true, and that can only be
// the case when interception isn't possible. // the case when interception isn't possible.
if (comptime IS_DEBUG) { if (comptime IS_DEBUG) {
std.debug.assert(http_client.intercepted == 0); std.debug.assert(http_client.intercepted() == 0);
} }
if (browser.hasBackgroundTasks()) { if (browser.hasBackgroundTasks()) {

View File

@@ -694,82 +694,86 @@ pub const Script = struct {
self.manager.page.releaseArena(self.arena); self.manager.page.releaseArena(self.arena);
} }
fn startCallback(transfer: *HttpClient.Transfer) !void { fn startCallback(response: HttpClient.Response) !void {
log.debug(.http, "script fetch start", .{ .req = transfer }); log.debug(.http, "script fetch start", .{ .req = response });
} }
fn headerCallback(transfer: *HttpClient.Transfer) !bool { fn headerCallback(response: HttpClient.Response) !bool {
const self: *Script = @ptrCast(@alignCast(transfer.ctx)); const self: *Script = @ptrCast(@alignCast(response.ctx));
const header = &transfer.response_header.?;
self.status = header.status; self.status = response.status().?;
if (header.status != 200) { if (response.status() != 200) {
log.info(.http, "script header", .{ log.info(.http, "script header", .{
.req = transfer, .req = response,
.status = header.status, .status = response.status(),
.content_type = header.contentType(), .content_type = response.contentType(),
}); });
return false; return false;
} }
if (comptime IS_DEBUG) { if (comptime IS_DEBUG) {
log.debug(.http, "script header", .{ log.debug(.http, "script header", .{
.req = transfer, .req = response,
.status = header.status, .status = response.status(),
.content_type = header.contentType(), .content_type = response.contentType(),
}); });
} }
{ switch (response.inner) {
// temp debug, trying to figure out why the next assert sometimes .transfer => |transfer| {
// fails. Is the buffer just corrupt or is headerCallback really // temp debug, trying to figure out why the next assert sometimes
// being called twice? // fails. Is the buffer just corrupt or is headerCallback really
lp.assert(self.header_callback_called == false, "ScriptManager.Header recall", .{ // being called twice?
.m = @tagName(std.meta.activeTag(self.mode)), lp.assert(self.header_callback_called == false, "ScriptManager.Header recall", .{
.a1 = self.debug_transfer_id, .m = @tagName(std.meta.activeTag(self.mode)),
.a2 = self.debug_transfer_tries, .a1 = self.debug_transfer_id,
.a3 = self.debug_transfer_aborted, .a2 = self.debug_transfer_tries,
.a4 = self.debug_transfer_bytes_received, .a3 = self.debug_transfer_aborted,
.a5 = self.debug_transfer_notified_fail, .a4 = self.debug_transfer_bytes_received,
.a7 = self.debug_transfer_intercept_state, .a5 = self.debug_transfer_notified_fail,
.a8 = self.debug_transfer_auth_challenge, .a7 = self.debug_transfer_intercept_state,
.a9 = self.debug_transfer_easy_id, .a8 = self.debug_transfer_auth_challenge,
.b1 = transfer.id, .a9 = self.debug_transfer_easy_id,
.b2 = transfer._tries, .b1 = transfer.id,
.b3 = transfer.aborted, .b2 = transfer._tries,
.b4 = transfer.bytes_received, .b3 = transfer.aborted,
.b5 = transfer._notified_fail, .b4 = transfer.bytes_received,
.b7 = @intFromEnum(transfer._intercept_state), .b5 = transfer._notified_fail,
.b8 = transfer._auth_challenge != null, .b7 = @intFromEnum(transfer._intercept_state),
.b9 = if (transfer._conn) |c| @intFromPtr(c._easy) else 0, .b8 = transfer._auth_challenge != null,
}); .b9 = if (transfer._conn) |c| @intFromPtr(c._easy) else 0,
self.header_callback_called = true; });
self.debug_transfer_id = transfer.id; self.header_callback_called = true;
self.debug_transfer_tries = transfer._tries; self.debug_transfer_id = transfer.id;
self.debug_transfer_aborted = transfer.aborted; self.debug_transfer_tries = transfer._tries;
self.debug_transfer_bytes_received = transfer.bytes_received; self.debug_transfer_aborted = transfer.aborted;
self.debug_transfer_notified_fail = transfer._notified_fail; self.debug_transfer_bytes_received = transfer.bytes_received;
self.debug_transfer_intercept_state = @intFromEnum(transfer._intercept_state); self.debug_transfer_notified_fail = transfer._notified_fail;
self.debug_transfer_auth_challenge = transfer._auth_challenge != null; self.debug_transfer_intercept_state = @intFromEnum(transfer._intercept_state);
self.debug_transfer_easy_id = if (transfer._conn) |c| @intFromPtr(c._easy) else 0; self.debug_transfer_auth_challenge = transfer._auth_challenge != null;
self.debug_transfer_easy_id = if (transfer._conn) |c| @intFromPtr(c._easy) else 0;
},
else => {},
} }
lp.assert(self.source.remote.capacity == 0, "ScriptManager.Header buffer", .{ .capacity = self.source.remote.capacity }); lp.assert(self.source.remote.capacity == 0, "ScriptManager.Header buffer", .{ .capacity = self.source.remote.capacity });
var buffer: std.ArrayList(u8) = .empty; var buffer: std.ArrayList(u8) = .empty;
if (transfer.getContentLength()) |cl| { if (response.contentLength()) |cl| {
try buffer.ensureTotalCapacity(self.arena, cl); try buffer.ensureTotalCapacity(self.arena, cl);
} }
self.source = .{ .remote = buffer }; self.source = .{ .remote = buffer };
return true; return true;
} }
fn dataCallback(transfer: *HttpClient.Transfer, data: []const u8) !void { fn dataCallback(response: HttpClient.Response, data: []const u8) !void {
const self: *Script = @ptrCast(@alignCast(transfer.ctx)); const self: *Script = @ptrCast(@alignCast(response.ctx));
self._dataCallback(transfer, data) catch |err| { self._dataCallback(response, data) catch |err| {
log.err(.http, "SM.dataCallback", .{ .err = err, .transfer = transfer, .len = data.len }); log.err(.http, "SM.dataCallback", .{ .err = err, .transfer = response, .len = data.len });
return err; return err;
}; };
} }
fn _dataCallback(self: *Script, _: *HttpClient.Transfer, data: []const u8) !void {
fn _dataCallback(self: *Script, _: HttpClient.Response, data: []const u8) !void {
try self.source.remote.appendSlice(self.arena, data); try self.source.remote.appendSlice(self.arena, data);
} }

View File

@@ -127,16 +127,16 @@ fn handleBlobUrl(url: []const u8, resolver: js.PromiseResolver, page: *Page) !js
return resolver.promise(); return resolver.promise();
} }
fn httpStartCallback(transfer: *HttpClient.Transfer) !void { fn httpStartCallback(response: HttpClient.Response) !void {
const self: *Fetch = @ptrCast(@alignCast(transfer.ctx)); const self: *Fetch = @ptrCast(@alignCast(response.ctx));
if (comptime IS_DEBUG) { if (comptime IS_DEBUG) {
log.debug(.http, "request start", .{ .url = self._url, .source = "fetch" }); log.debug(.http, "request start", .{ .url = self._url, .source = "fetch" });
} }
self._response._transfer = transfer; self._response._http_response = response;
} }
fn httpHeaderDoneCallback(transfer: *HttpClient.Transfer) !bool { fn httpHeaderDoneCallback(response: HttpClient.Response) !bool {
const self: *Fetch = @ptrCast(@alignCast(transfer.ctx)); const self: *Fetch = @ptrCast(@alignCast(response.ctx));
if (self._signal) |signal| { if (self._signal) |signal| {
if (signal._aborted) { if (signal._aborted) {
@@ -145,25 +145,24 @@ fn httpHeaderDoneCallback(transfer: *HttpClient.Transfer) !bool {
} }
const arena = self._response._arena; const arena = self._response._arena;
if (transfer.getContentLength()) |cl| { if (response.contentLength()) |cl| {
try self._buf.ensureTotalCapacity(arena, cl); try self._buf.ensureTotalCapacity(arena, cl);
} }
const res = self._response; const res = self._response;
const header = transfer.response_header.?;
if (comptime IS_DEBUG) { if (comptime IS_DEBUG) {
log.debug(.http, "request header", .{ log.debug(.http, "request header", .{
.source = "fetch", .source = "fetch",
.url = self._url, .url = self._url,
.status = header.status, .status = response.status(),
}); });
} }
res._status = header.status; res._status = response.status().?;
res._status_text = std.http.Status.phrase(@enumFromInt(header.status)) orelse ""; res._status_text = std.http.Status.phrase(@enumFromInt(response.status().?)) orelse "";
res._url = try arena.dupeZ(u8, std.mem.span(header.url)); res._url = try arena.dupeZ(u8, response.url());
res._is_redirected = header.redirect_count > 0; res._is_redirected = response.redirectCount().? > 0;
// Determine response type based on origin comparison // Determine response type based on origin comparison
const page_origin = URL.getOrigin(arena, self._page.url) catch null; const page_origin = URL.getOrigin(arena, self._page.url) catch null;
@@ -183,7 +182,7 @@ fn httpHeaderDoneCallback(transfer: *HttpClient.Transfer) !bool {
res._type = .basic; res._type = .basic;
} }
var it = transfer.responseHeaderIterator(); var it = response.headerIterator();
while (it.next()) |hdr| { while (it.next()) |hdr| {
try res._headers.append(hdr.name, hdr.value, self._page); try res._headers.append(hdr.name, hdr.value, self._page);
} }
@@ -191,8 +190,8 @@ fn httpHeaderDoneCallback(transfer: *HttpClient.Transfer) !bool {
return true; return true;
} }
fn httpDataCallback(transfer: *HttpClient.Transfer, data: []const u8) !void { fn httpDataCallback(response: HttpClient.Response, data: []const u8) !void {
const self: *Fetch = @ptrCast(@alignCast(transfer.ctx)); const self: *Fetch = @ptrCast(@alignCast(response.ctx));
// Check if aborted // Check if aborted
if (self._signal) |signal| { if (self._signal) |signal| {
@@ -207,7 +206,7 @@ fn httpDataCallback(transfer: *HttpClient.Transfer, data: []const u8) !void {
fn httpDoneCallback(ctx: *anyopaque) !void { fn httpDoneCallback(ctx: *anyopaque) !void {
const self: *Fetch = @ptrCast(@alignCast(ctx)); const self: *Fetch = @ptrCast(@alignCast(ctx));
var response = self._response; var response = self._response;
response._transfer = null; response._http_response = null;
response._body = self._buf.items; response._body = self._buf.items;
log.info(.http, "request complete", .{ log.info(.http, "request complete", .{
@@ -230,7 +229,7 @@ fn httpErrorCallback(ctx: *anyopaque, _: anyerror) void {
const self: *Fetch = @ptrCast(@alignCast(ctx)); const self: *Fetch = @ptrCast(@alignCast(ctx));
var response = self._response; var response = self._response;
response._transfer = null; response._http_response = null;
// the response is only passed on v8 on success, if we're here, it's safe to // the response is only passed on v8 on success, if we're here, it's safe to
// clear this. (defer since `self is in the response's arena). // clear this. (defer since `self is in the response's arena).
@@ -256,7 +255,7 @@ fn httpShutdownCallback(ctx: *anyopaque) void {
if (self._owns_response) { if (self._owns_response) {
var response = self._response; var response = self._response;
response._transfer = null; response._http_response = null;
response.deinit(self._page._session); response.deinit(self._page._session);
// Do not access `self` after this point: the Fetch struct was // Do not access `self` after this point: the Fetch struct was
// allocated from response._arena which has been released. // allocated from response._arena which has been released.

View File

@@ -48,7 +48,7 @@ _type: Type,
_status_text: []const u8, _status_text: []const u8,
_url: [:0]const u8, _url: [:0]const u8,
_is_redirected: bool, _is_redirected: bool,
_transfer: ?*HttpClient.Transfer = null, _http_response: ?HttpClient.Response = null,
const InitOpts = struct { const InitOpts = struct {
status: u16 = 200, status: u16 = 200,
@@ -81,9 +81,9 @@ pub fn init(body_: ?[]const u8, opts_: ?InitOpts, page: *Page) !*Response {
} }
pub fn deinit(self: *Response, session: *Session) void { pub fn deinit(self: *Response, session: *Session) void {
if (self._transfer) |transfer| { if (self._http_response) |resp| {
transfer.abort(error.Abort); resp.abort(error.Abort);
self._transfer = null; self._http_response = null;
} }
session.releaseArena(self._arena); session.releaseArena(self._arena);
} }
@@ -191,7 +191,7 @@ pub fn clone(self: *const Response, page: *Page) !*Response {
._type = self._type, ._type = self._type,
._is_redirected = self._is_redirected, ._is_redirected = self._is_redirected,
._headers = try Headers.init(.{ .obj = self._headers }, page), ._headers = try Headers.init(.{ .obj = self._headers }, page),
._transfer = null, ._http_response = null,
}; };
return cloned; return cloned;
} }

View File

@@ -43,7 +43,7 @@ _rc: lp.RC(u8) = .{},
_page: *Page, _page: *Page,
_proto: *XMLHttpRequestEventTarget, _proto: *XMLHttpRequestEventTarget,
_arena: Allocator, _arena: Allocator,
_transfer: ?*HttpClient.Transfer = null, _http_response: ?HttpClient.Response = null,
_active_request: bool = false, _active_request: bool = false,
_url: [:0]const u8 = "", _url: [:0]const u8 = "",
@@ -100,9 +100,9 @@ pub fn init(page: *Page) !*XMLHttpRequest {
} }
pub fn deinit(self: *XMLHttpRequest, session: *Session) void { pub fn deinit(self: *XMLHttpRequest, session: *Session) void {
if (self._transfer) |transfer| { if (self._http_response) |resp| {
transfer.abort(error.Abort); resp.abort(error.Abort);
self._transfer = null; self._http_response = null;
} }
if (self._on_ready_state_change) |func| { if (self._on_ready_state_change) |func| {
@@ -184,9 +184,9 @@ pub fn setWithCredentials(self: *XMLHttpRequest, value: bool) !void {
// TODO: url should be a union, as it can be multiple things // TODO: url should be a union, as it can be multiple things
pub fn open(self: *XMLHttpRequest, method_: []const u8, url: [:0]const u8) !void { pub fn open(self: *XMLHttpRequest, method_: []const u8, url: [:0]const u8) !void {
// Abort any in-progress request // Abort any in-progress request
if (self._transfer) |transfer| { if (self._http_response) |transfer| {
transfer.abort(error.Abort); transfer.abort(error.Abort);
self._transfer = null; self._http_response = null;
} }
// Reset internal state // Reset internal state
@@ -398,34 +398,32 @@ pub fn getResponseXML(self: *XMLHttpRequest, page: *Page) !?*Node.Document {
}; };
} }
fn httpStartCallback(transfer: *HttpClient.Transfer) !void { fn httpStartCallback(response: HttpClient.Response) !void {
const self: *XMLHttpRequest = @ptrCast(@alignCast(transfer.ctx)); const self: *XMLHttpRequest = @ptrCast(@alignCast(response.ctx));
if (comptime IS_DEBUG) { if (comptime IS_DEBUG) {
log.debug(.http, "request start", .{ .method = self._method, .url = self._url, .source = "xhr" }); log.debug(.http, "request start", .{ .method = self._method, .url = self._url, .source = "xhr" });
} }
self._transfer = transfer; self._http_response = response;
} }
fn httpHeaderCallback(transfer: *HttpClient.Transfer, header: http.Header) !void { fn httpHeaderCallback(response: HttpClient.Response, header: http.Header) !void {
const self: *XMLHttpRequest = @ptrCast(@alignCast(transfer.ctx)); const self: *XMLHttpRequest = @ptrCast(@alignCast(response.ctx));
const joined = try std.fmt.allocPrint(self._arena, "{s}: {s}", .{ header.name, header.value }); const joined = try std.fmt.allocPrint(self._arena, "{s}: {s}", .{ header.name, header.value });
try self._response_headers.append(self._arena, joined); try self._response_headers.append(self._arena, joined);
} }
fn httpHeaderDoneCallback(transfer: *HttpClient.Transfer) !bool { fn httpHeaderDoneCallback(response: HttpClient.Response) !bool {
const self: *XMLHttpRequest = @ptrCast(@alignCast(transfer.ctx)); const self: *XMLHttpRequest = @ptrCast(@alignCast(response.ctx));
const header = &transfer.response_header.?;
if (comptime IS_DEBUG) { if (comptime IS_DEBUG) {
log.debug(.http, "request header", .{ log.debug(.http, "request header", .{
.source = "xhr", .source = "xhr",
.url = self._url, .url = self._url,
.status = header.status, .status = response.status(),
}); });
} }
if (header.contentType()) |ct| { if (response.contentType()) |ct| {
self._response_mime = Mime.parse(ct) catch |e| { self._response_mime = Mime.parse(ct) catch |e| {
log.info(.http, "invalid content type", .{ log.info(.http, "invalid content type", .{
.content_Type = ct, .content_Type = ct,
@@ -436,18 +434,18 @@ fn httpHeaderDoneCallback(transfer: *HttpClient.Transfer) !bool {
}; };
} }
var it = transfer.responseHeaderIterator(); var it = response.headerIterator();
while (it.next()) |hdr| { while (it.next()) |hdr| {
const joined = try std.fmt.allocPrint(self._arena, "{s}: {s}", .{ hdr.name, hdr.value }); const joined = try std.fmt.allocPrint(self._arena, "{s}: {s}", .{ hdr.name, hdr.value });
try self._response_headers.append(self._arena, joined); try self._response_headers.append(self._arena, joined);
} }
self._response_status = header.status; self._response_status = response.status().?;
if (transfer.getContentLength()) |cl| { if (response.contentLength()) |cl| {
self._response_len = cl; self._response_len = cl;
try self._response_data.ensureTotalCapacity(self._arena, cl); try self._response_data.ensureTotalCapacity(self._arena, cl);
} }
self._response_url = try self._arena.dupeZ(u8, std.mem.span(header.url)); self._response_url = try self._arena.dupeZ(u8, response.url());
const page = self._page; const page = self._page;
@@ -462,8 +460,8 @@ fn httpHeaderDoneCallback(transfer: *HttpClient.Transfer) !bool {
return true; return true;
} }
fn httpDataCallback(transfer: *HttpClient.Transfer, data: []const u8) !void { fn httpDataCallback(response: HttpClient.Response, data: []const u8) !void {
const self: *XMLHttpRequest = @ptrCast(@alignCast(transfer.ctx)); const self: *XMLHttpRequest = @ptrCast(@alignCast(response.ctx));
try self._response_data.appendSlice(self._arena, data); try self._response_data.appendSlice(self._arena, data);
const page = self._page; const page = self._page;
@@ -486,7 +484,7 @@ fn httpDoneCallback(ctx: *anyopaque) !void {
// Not that the request is done, the http/client will free the transfer // Not that the request is done, the http/client will free the transfer
// object. It isn't safe to keep it around. // object. It isn't safe to keep it around.
self._transfer = null; self._http_response = null;
const page = self._page; const page = self._page;
@@ -509,22 +507,22 @@ fn httpErrorCallback(ctx: *anyopaque, err: anyerror) void {
const self: *XMLHttpRequest = @ptrCast(@alignCast(ctx)); const self: *XMLHttpRequest = @ptrCast(@alignCast(ctx));
// http client will close it after an error, it isn't safe to keep around // http client will close it after an error, it isn't safe to keep around
self.handleError(err); self.handleError(err);
if (self._transfer != null) { if (self._http_response != null) {
self._transfer = null; self._http_response = null;
} }
self.releaseSelfRef(); self.releaseSelfRef();
} }
fn httpShutdownCallback(ctx: *anyopaque) void { fn httpShutdownCallback(ctx: *anyopaque) void {
const self: *XMLHttpRequest = @ptrCast(@alignCast(ctx)); const self: *XMLHttpRequest = @ptrCast(@alignCast(ctx));
self._transfer = null; self._http_response = null;
} }
pub fn abort(self: *XMLHttpRequest) void { pub fn abort(self: *XMLHttpRequest) void {
self.handleError(error.Abort); self.handleError(error.Abort);
if (self._transfer) |transfer| { if (self._http_response) |resp| {
self._transfer = null; self._http_response = null;
transfer.abort(error.Abort); resp.abort(error.Abort);
} }
self.releaseSelfRef(); self.releaseSelfRef();
} }

View File

@@ -139,8 +139,8 @@ fn setLifecycleEventsEnabled(cmd: *CDP.Command) !void {
try sendPageLifecycle(bc, "load", now, frame_id, loader_id); try sendPageLifecycle(bc, "load", now, frame_id, loader_id);
const http_client = page._session.browser.http_client; const http_client = page._session.browser.http_client;
const http_active = http_client.active; const http_active = http_client.active();
const total_network_activity = http_active + http_client.intercepted; const total_network_activity = http_active + http_client.intercepted();
if (page._notified_network_almost_idle.check(total_network_activity <= 2)) { if (page._notified_network_almost_idle.check(total_network_activity <= 2)) {
try sendPageLifecycle(bc, "networkAlmostIdle", now, frame_id, loader_id); try sendPageLifecycle(bc, "networkAlmostIdle", now, frame_id, loader_id);
} }

View File

@@ -39,6 +39,7 @@ pub const Scope = enum {
telemetry, telemetry,
unknown_prop, unknown_prop,
mcp, mcp,
cache,
}; };
const Opts = struct { const Opts = struct {

View File

@@ -29,7 +29,9 @@ const libcurl = @import("../sys/libcurl.zig");
const http = @import("http.zig"); const http = @import("http.zig");
const RobotStore = @import("Robots.zig").RobotStore; const RobotStore = @import("Robots.zig").RobotStore;
const WebBotAuth = @import("WebBotAuth.zig"); const WebBotAuth = @import("WebBotAuth.zig");
const Cache = @import("cache/Cache.zig");
const App = @import("../App.zig");
const Network = @This(); const Network = @This();
const Listener = struct { const Listener = struct {
@@ -45,10 +47,12 @@ const MAX_TICK_CALLBACKS = 16;
allocator: Allocator, allocator: Allocator,
app: *App,
config: *const Config, config: *const Config,
ca_blob: ?http.Blob, ca_blob: ?http.Blob,
robot_store: RobotStore, robot_store: RobotStore,
web_bot_auth: ?WebBotAuth, web_bot_auth: ?WebBotAuth,
cache: ?Cache,
connections: []http.Connection, connections: []http.Connection,
available: std.DoublyLinkedList = .{}, available: std.DoublyLinkedList = .{},
@@ -200,7 +204,7 @@ fn globalDeinit() void {
libcurl.curl_global_cleanup(); libcurl.curl_global_cleanup();
} }
pub fn init(allocator: Allocator, config: *const Config) !Network { pub fn init(allocator: Allocator, app: *App, config: *const Config) !Network {
globalInit(allocator); globalInit(allocator);
errdefer globalDeinit(); errdefer globalDeinit();
@@ -233,6 +237,11 @@ pub fn init(allocator: Allocator, config: *const Config) !Network {
else else
null; null;
const cache = if (config.cacheDir()) |cache_dir_path|
Cache{ .kind = .{ .fs = try .init(cache_dir_path) } }
else
null;
return .{ return .{
.allocator = allocator, .allocator = allocator,
.config = config, .config = config,
@@ -244,8 +253,10 @@ pub fn init(allocator: Allocator, config: *const Config) !Network {
.available = available, .available = available,
.connections = connections, .connections = connections,
.app = app,
.robot_store = RobotStore.init(allocator), .robot_store = RobotStore.init(allocator),
.web_bot_auth = web_bot_auth, .web_bot_auth = web_bot_auth,
.cache = cache,
}; };
} }
@@ -278,6 +289,8 @@ pub fn deinit(self: *Network) void {
wba.deinit(self.allocator); wba.deinit(self.allocator);
} }
if (self.cache) |*cache| cache.deinit();
globalDeinit(); globalDeinit();
} }

156
src/network/cache/Cache.zig vendored Normal file
View File

@@ -0,0 +1,156 @@
// Copyright (C) 2023-2026 Lightpanda (Selecy SAS)
//
// Francis Bouvier <francis@lightpanda.io>
// Pierre Tachoire <pierre@lightpanda.io>
//
// This program is free software: you can redistribute it and/or modify
// it under the terms of the GNU Affero General Public License as
// published by the Free Software Foundation, either version 3 of the
// License, or (at your option) any later version.
//
// This program is distributed in the hope that it will be useful,
// but WITHOUT ANY WARRANTY; without even the implied warranty of
// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
// GNU Affero General Public License for more details.
//
// You should have received a copy of the GNU Affero General Public License
// along with this program. If not, see <https://www.gnu.org/licenses/>.
const std = @import("std");
const Http = @import("../http.zig");
const FsCache = @import("FsCache.zig");
/// A browser-wide cache for resources across the network.
/// This mostly conforms to RFC9111 with regards to caching behavior.
pub const Cache = @This();
kind: union(enum) {
fs: FsCache,
},
pub fn deinit(self: *Cache) void {
return switch (self.kind) {
inline else => |*c| c.deinit(),
};
}
pub fn get(self: *Cache, arena: std.mem.Allocator, req: CacheRequest) ?CachedResponse {
return switch (self.kind) {
inline else => |*c| c.get(arena, req),
};
}
pub fn put(self: *Cache, metadata: CachedMetadata, body: []const u8) !void {
return switch (self.kind) {
inline else => |*c| c.put(metadata, body),
};
}
pub const CacheControl = struct {
max_age: u64,
pub fn parse(value: []const u8) ?CacheControl {
var cc: CacheControl = .{ .max_age = undefined };
var max_age_set = false;
var max_s_age_set = false;
var is_public = false;
var iter = std.mem.splitScalar(u8, value, ',');
while (iter.next()) |part| {
const directive = std.mem.trim(u8, part, &std.ascii.whitespace);
if (std.ascii.eqlIgnoreCase(directive, "no-store")) {
return null;
} else if (std.ascii.eqlIgnoreCase(directive, "no-cache")) {
return null;
} else if (std.ascii.eqlIgnoreCase(directive, "public")) {
is_public = true;
} else if (std.ascii.startsWithIgnoreCase(directive, "max-age=")) {
if (!max_s_age_set) {
if (std.fmt.parseInt(u64, directive[8..], 10) catch null) |max_age| {
cc.max_age = max_age;
max_age_set = true;
}
}
} else if (std.ascii.startsWithIgnoreCase(directive, "s-maxage=")) {
if (std.fmt.parseInt(u64, directive[9..], 10) catch null) |max_age| {
cc.max_age = max_age;
max_age_set = true;
max_s_age_set = true;
}
}
}
if (!max_age_set) return null;
if (!is_public) return null;
if (cc.max_age == 0) return null;
return cc;
}
};
pub const CachedMetadata = struct {
url: [:0]const u8,
content_type: []const u8,
status: u16,
stored_at: i64,
age_at_store: u64,
cache_control: CacheControl,
/// Response Headers
headers: []const Http.Header,
/// These are Request Headers used by Vary.
vary_headers: []const Http.Header,
};
pub const CacheRequest = struct {
url: []const u8,
timestamp: i64,
request_headers: []const Http.Header,
};
pub const CachedData = union(enum) {
buffer: []const u8,
file: struct {
file: std.fs.File,
offset: usize,
len: usize,
},
};
pub const CachedResponse = struct {
metadata: CachedMetadata,
data: CachedData,
};
pub fn tryCache(
arena: std.mem.Allocator,
timestamp: i64,
url: [:0]const u8,
status: u16,
content_type: ?[]const u8,
cache_control: ?[]const u8,
vary: ?[]const u8,
age: ?[]const u8,
has_set_cookie: bool,
has_authorization: bool,
) !?CachedMetadata {
if (status != 200) return null;
if (has_set_cookie) return null;
if (has_authorization) return null;
if (vary) |v| if (std.mem.eql(u8, v, "*")) return null;
const cc = CacheControl.parse(cache_control orelse return null) orelse return null;
return .{
.url = url,
.content_type = if (content_type) |ct| try arena.dupe(u8, ct) else "application/octet-stream",
.status = status,
.stored_at = timestamp,
.age_at_store = if (age) |a| std.fmt.parseInt(u64, a, 10) catch 0 else 0,
.cache_control = cc,
.headers = &.{},
.vary_headers = &.{},
};
}

580
src/network/cache/FsCache.zig vendored Normal file
View File

@@ -0,0 +1,580 @@
// Copyright (C) 2023-2026 Lightpanda (Selecy SAS)
//
// Francis Bouvier <francis@lightpanda.io>
// Pierre Tachoire <pierre@lightpanda.io>
//
// This program is free software: you can redistribute it and/or modify
// it under the terms of the GNU Affero General Public License as
// published by the Free Software Foundation, either version 3 of the
// License, or (at your option) any later version.
//
// This program is distributed in the hope that it will be useful,
// but WITHOUT ANY WARRANTY; without even the implied warranty of
// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
// GNU Affero General Public License for more details.
//
// You should have received a copy of the GNU Affero General Public License
// along with this program. If not, see <https://www.gnu.org/licenses/>.
const std = @import("std");
const log = @import("../../log.zig");
const Cache = @import("Cache.zig");
const Http = @import("../http.zig");
const CacheRequest = Cache.CacheRequest;
const CachedMetadata = Cache.CachedMetadata;
const CachedResponse = Cache.CachedResponse;
const CACHE_VERSION: usize = 1;
const LOCK_STRIPES = 16;
comptime {
std.debug.assert(std.math.isPowerOfTwo(LOCK_STRIPES));
}
pub const FsCache = @This();
dir: std.fs.Dir,
locks: [LOCK_STRIPES]std.Thread.Mutex = .{std.Thread.Mutex{}} ** LOCK_STRIPES,
const CacheMetadataJson = struct {
version: usize,
metadata: CachedMetadata,
};
fn getLockPtr(self: *FsCache, key: *const [HASHED_KEY_LEN]u8) *std.Thread.Mutex {
const lock_idx = std.hash.Wyhash.hash(0, key[0..]) & (LOCK_STRIPES - 1);
return &self.locks[lock_idx];
}
const BODY_LEN_HEADER_LEN = 8;
const HASHED_KEY_LEN = 64;
const HASHED_PATH_LEN = HASHED_KEY_LEN + 6;
const HASHED_TMP_PATH_LEN = HASHED_PATH_LEN + 4;
fn hashKey(key: []const u8) [HASHED_KEY_LEN]u8 {
var digest: [std.crypto.hash.sha2.Sha256.digest_length]u8 = undefined;
std.crypto.hash.sha2.Sha256.hash(key, &digest, .{});
var hex: [HASHED_KEY_LEN]u8 = undefined;
_ = std.fmt.bufPrint(&hex, "{s}", .{std.fmt.bytesToHex(&digest, .lower)}) catch unreachable;
return hex;
}
fn cachePath(hashed_key: *const [HASHED_KEY_LEN]u8) [HASHED_PATH_LEN]u8 {
var path: [HASHED_PATH_LEN]u8 = undefined;
_ = std.fmt.bufPrint(&path, "{s}.cache", .{hashed_key}) catch unreachable;
return path;
}
fn cacheTmpPath(hashed_key: *const [HASHED_KEY_LEN]u8) [HASHED_TMP_PATH_LEN]u8 {
var path: [HASHED_TMP_PATH_LEN]u8 = undefined;
_ = std.fmt.bufPrint(&path, "{s}.cache.tmp", .{hashed_key}) catch unreachable;
return path;
}
pub fn init(path: []const u8) !FsCache {
const cwd = std.fs.cwd();
cwd.makeDir(path) catch |err| switch (err) {
error.PathAlreadyExists => {},
else => return err,
};
const dir = try cwd.openDir(path, .{ .iterate = true });
return .{ .dir = dir };
}
pub fn deinit(self: *FsCache) void {
self.dir.close();
}
pub fn get(self: *FsCache, arena: std.mem.Allocator, req: CacheRequest) ?Cache.CachedResponse {
const hashed_key = hashKey(req.url);
const cache_p = cachePath(&hashed_key);
const lock = self.getLockPtr(&hashed_key);
lock.lock();
defer lock.unlock();
const file = self.dir.openFile(&cache_p, .{ .mode = .read_only }) catch |e| {
switch (e) {
std.fs.File.OpenError.FileNotFound => {
log.debug(.cache, "miss", .{ .url = req.url, .hash = &hashed_key });
},
else => |err| {
log.warn(.cache, "open file err", .{ .url = req.url, .err = err });
},
}
return null;
};
var cleanup = false;
defer if (cleanup) {
file.close();
self.dir.deleteFile(&cache_p) catch |e| {
log.err(.cache, "clean fail", .{ .url = req.url, .file = &cache_p, .err = e });
};
};
var file_buf: [1024]u8 = undefined;
var len_buf: [BODY_LEN_HEADER_LEN]u8 = undefined;
var file_reader = file.reader(&file_buf);
const file_reader_iface = &file_reader.interface;
file_reader_iface.readSliceAll(&len_buf) catch |e| {
log.warn(.cache, "read header", .{ .url = req.url, .err = e });
cleanup = true;
return null;
};
const body_len = std.mem.readInt(u64, &len_buf, .little);
// Now we read metadata.
file_reader.seekTo(body_len + BODY_LEN_HEADER_LEN) catch |e| {
log.warn(.cache, "seek metadata", .{ .url = req.url, .err = e });
cleanup = true;
return null;
};
var json_reader = std.json.Reader.init(arena, file_reader_iface);
const cache_file: CacheMetadataJson = std.json.parseFromTokenSourceLeaky(
CacheMetadataJson,
arena,
&json_reader,
.{ .allocate = .alloc_always },
) catch |e| {
log.warn(.cache, "metadata parse", .{ .url = req.url, .err = e });
cleanup = true;
return null;
};
if (cache_file.version != CACHE_VERSION) {
log.warn(.cache, "version", .{ .url = req.url, .expected = CACHE_VERSION, .got = cache_file.version });
cleanup = true;
return null;
}
const metadata = cache_file.metadata;
// Check entry expiration.
const now = req.timestamp;
const age = (now - metadata.stored_at) + @as(i64, @intCast(metadata.age_at_store));
if (age < 0 or @as(u64, @intCast(age)) >= metadata.cache_control.max_age) {
log.debug(.cache, "expired", .{ .url = req.url });
cleanup = true;
return null;
}
// If we have Vary headers, ensure they are present & matching.
for (metadata.vary_headers) |vary_hdr| {
const name = vary_hdr.name;
const value = vary_hdr.value;
const incoming = for (req.request_headers) |h| {
if (std.ascii.eqlIgnoreCase(h.name, name)) break h.value;
} else "";
if (!std.ascii.eqlIgnoreCase(value, incoming)) {
log.debug(.cache, "vary mismatch", .{ .url = req.url, .header = name });
return null;
}
}
// On the case of a hash collision.
if (!std.ascii.eqlIgnoreCase(metadata.url, req.url)) {
log.warn(.cache, "collision", .{ .url = req.url, .expected = metadata.url, .got = req.url });
cleanup = true;
return null;
}
return .{
.metadata = metadata,
.data = .{
.file = .{
.file = file,
.offset = BODY_LEN_HEADER_LEN,
.len = body_len,
},
},
};
}
pub fn put(self: *FsCache, meta: CachedMetadata, body: []const u8) !void {
const hashed_key = hashKey(meta.url);
const cache_p = cachePath(&hashed_key);
const cache_tmp_p = cacheTmpPath(&hashed_key);
const lock = self.getLockPtr(&hashed_key);
lock.lock();
defer lock.unlock();
const file = try self.dir.createFile(&cache_tmp_p, .{ .truncate = true });
defer file.close();
var writer_buf: [1024]u8 = undefined;
var file_writer = file.writer(&writer_buf);
var file_writer_iface = &file_writer.interface;
var len_buf: [8]u8 = undefined;
std.mem.writeInt(u64, &len_buf, body.len, .little);
try file_writer_iface.writeAll(&len_buf);
try file_writer_iface.writeAll(body);
try std.json.Stringify.value(
CacheMetadataJson{ .version = CACHE_VERSION, .metadata = meta },
.{ .whitespace = .minified },
file_writer_iface,
);
try file_writer_iface.flush();
try self.dir.rename(&cache_tmp_p, &cache_p);
}
const testing = std.testing;
fn setupCache() !struct { tmp: testing.TmpDir, cache: Cache } {
var tmp = testing.tmpDir(.{});
errdefer tmp.cleanup();
const path = try tmp.dir.realpathAlloc(testing.allocator, ".");
defer testing.allocator.free(path);
return .{
.tmp = tmp,
.cache = Cache{ .kind = .{ .fs = try FsCache.init(path) } },
};
}
test "FsCache: basic put and get" {
var setup = try setupCache();
defer {
setup.cache.deinit();
setup.tmp.cleanup();
}
const cache = &setup.cache;
var arena = std.heap.ArenaAllocator.init(testing.allocator);
defer arena.deinit();
const now = std.time.timestamp();
const meta = CachedMetadata{
.url = "https://example.com",
.content_type = "text/html",
.status = 200,
.stored_at = now,
.age_at_store = 0,
.cache_control = .{ .max_age = 600 },
.headers = &.{},
.vary_headers = &.{},
};
const body = "hello world";
try cache.put(meta, body);
const result = cache.get(
arena.allocator(),
.{
.url = "https://example.com",
.timestamp = now,
.request_headers = &.{},
},
) orelse return error.CacheMiss;
const f = result.data.file;
const file = f.file;
defer file.close();
var buf: [64]u8 = undefined;
var file_reader = file.reader(&buf);
try file_reader.seekTo(f.offset);
const read_buf = try file_reader.interface.readAlloc(testing.allocator, f.len);
defer testing.allocator.free(read_buf);
try testing.expectEqualStrings(body, read_buf);
}
test "FsCache: get expiration" {
var setup = try setupCache();
defer {
setup.cache.deinit();
setup.tmp.cleanup();
}
const cache = &setup.cache;
var arena = std.heap.ArenaAllocator.init(testing.allocator);
defer arena.deinit();
const now = 5000;
const max_age = 1000;
const meta = CachedMetadata{
.url = "https://example.com",
.content_type = "text/html",
.status = 200,
.stored_at = now,
.age_at_store = 900,
.cache_control = .{ .max_age = max_age },
.headers = &.{},
.vary_headers = &.{},
};
const body = "hello world";
try cache.put(meta, body);
const result = cache.get(
arena.allocator(),
.{
.url = "https://example.com",
.timestamp = now + 50,
.request_headers = &.{},
},
) orelse return error.CacheMiss;
result.data.file.file.close();
try testing.expectEqual(null, cache.get(
arena.allocator(),
.{
.url = "https://example.com",
.timestamp = now + 200,
.request_headers = &.{},
},
));
try testing.expectEqual(null, cache.get(
arena.allocator(),
.{
.url = "https://example.com",
.timestamp = now,
.request_headers = &.{},
},
));
}
test "FsCache: put override" {
var setup = try setupCache();
defer {
setup.cache.deinit();
setup.tmp.cleanup();
}
const cache = &setup.cache;
var arena = std.heap.ArenaAllocator.init(testing.allocator);
defer arena.deinit();
{
const now = 5000;
const max_age = 1000;
const meta = CachedMetadata{
.url = "https://example.com",
.content_type = "text/html",
.status = 200,
.stored_at = now,
.age_at_store = 900,
.cache_control = .{ .max_age = max_age },
.headers = &.{},
.vary_headers = &.{},
};
const body = "hello world";
try cache.put(meta, body);
const result = cache.get(
arena.allocator(),
.{
.url = "https://example.com",
.timestamp = now,
.request_headers = &.{},
},
) orelse return error.CacheMiss;
const f = result.data.file;
const file = f.file;
defer file.close();
var buf: [64]u8 = undefined;
var file_reader = file.reader(&buf);
try file_reader.seekTo(f.offset);
const read_buf = try file_reader.interface.readAlloc(testing.allocator, f.len);
defer testing.allocator.free(read_buf);
try testing.expectEqualStrings(body, read_buf);
}
{
const now = 10000;
const max_age = 2000;
const meta = CachedMetadata{
.url = "https://example.com",
.content_type = "text/html",
.status = 200,
.stored_at = now,
.age_at_store = 0,
.cache_control = .{ .max_age = max_age },
.headers = &.{},
.vary_headers = &.{},
};
const body = "goodbye world";
try cache.put(meta, body);
const result = cache.get(
arena.allocator(),
.{
.url = "https://example.com",
.timestamp = now,
.request_headers = &.{},
},
) orelse return error.CacheMiss;
const f = result.data.file;
const file = f.file;
defer file.close();
var buf: [64]u8 = undefined;
var file_reader = file.reader(&buf);
try file_reader.seekTo(f.offset);
const read_buf = try file_reader.interface.readAlloc(testing.allocator, f.len);
defer testing.allocator.free(read_buf);
try testing.expectEqualStrings(body, read_buf);
}
}
test "FsCache: garbage file" {
var setup = try setupCache();
defer {
setup.cache.deinit();
setup.tmp.cleanup();
}
const hashed_key = hashKey("https://example.com");
const cache_p = cachePath(&hashed_key);
const file = try setup.cache.kind.fs.dir.createFile(&cache_p, .{});
try file.writeAll("this is not a valid cache file !@#$%");
file.close();
var arena = std.heap.ArenaAllocator.init(testing.allocator);
defer arena.deinit();
try testing.expectEqual(
null,
setup.cache.get(arena.allocator(), .{
.url = "https://example.com",
.timestamp = 5000,
.request_headers = &.{},
}),
);
}
test "FsCache: vary hit and miss" {
var setup = try setupCache();
defer {
setup.cache.deinit();
setup.tmp.cleanup();
}
const cache = &setup.cache;
var arena = std.heap.ArenaAllocator.init(testing.allocator);
defer arena.deinit();
const now = std.time.timestamp();
const meta = CachedMetadata{
.url = "https://example.com",
.content_type = "text/html",
.status = 200,
.stored_at = now,
.age_at_store = 0,
.cache_control = .{ .max_age = 600 },
.headers = &.{},
.vary_headers = &.{
.{ .name = "Accept-Encoding", .value = "gzip" },
},
};
try cache.put(meta, "hello world");
const result = cache.get(arena.allocator(), .{
.url = "https://example.com",
.timestamp = now,
.request_headers = &.{
.{ .name = "Accept-Encoding", .value = "gzip" },
},
}) orelse return error.CacheMiss;
result.data.file.file.close();
try testing.expectEqual(null, cache.get(arena.allocator(), .{
.url = "https://example.com",
.timestamp = now,
.request_headers = &.{
.{ .name = "Accept-Encoding", .value = "br" },
},
}));
try testing.expectEqual(null, cache.get(arena.allocator(), .{
.url = "https://example.com",
.timestamp = now,
.request_headers = &.{},
}));
const result2 = cache.get(arena.allocator(), .{
.url = "https://example.com",
.timestamp = now,
.request_headers = &.{
.{ .name = "Accept-Encoding", .value = "gzip" },
},
}) orelse return error.CacheMiss;
result2.data.file.file.close();
}
test "FsCache: vary multiple headers" {
var setup = try setupCache();
defer {
setup.cache.deinit();
setup.tmp.cleanup();
}
const cache = &setup.cache;
var arena = std.heap.ArenaAllocator.init(testing.allocator);
defer arena.deinit();
const now = std.time.timestamp();
const meta = CachedMetadata{
.url = "https://example.com",
.content_type = "text/html",
.status = 200,
.stored_at = now,
.age_at_store = 0,
.cache_control = .{ .max_age = 600 },
.headers = &.{},
.vary_headers = &.{
.{ .name = "Accept-Encoding", .value = "gzip" },
.{ .name = "Accept-Language", .value = "en" },
},
};
try cache.put(meta, "hello world");
const result = cache.get(arena.allocator(), .{
.url = "https://example.com",
.timestamp = now,
.request_headers = &.{
.{ .name = "Accept-Encoding", .value = "gzip" },
.{ .name = "Accept-Language", .value = "en" },
},
}) orelse return error.CacheMiss;
result.data.file.file.close();
try testing.expectEqual(null, cache.get(arena.allocator(), .{
.url = "https://example.com",
.timestamp = now,
.request_headers = &.{
.{ .name = "Accept-Encoding", .value = "gzip" },
.{ .name = "Accept-Language", .value = "fr" },
},
}));
}

View File

@@ -79,7 +79,7 @@ pub const Headers = struct {
self.headers = updated_headers; self.headers = updated_headers;
} }
fn parseHeader(header_str: []const u8) ?Header { pub fn parseHeader(header_str: []const u8) ?Header {
const colon_pos = std.mem.indexOfScalar(u8, header_str, ':') orelse return null; const colon_pos = std.mem.indexOfScalar(u8, header_str, ':') orelse return null;
const name = std.mem.trim(u8, header_str[0..colon_pos], " \t"); const name = std.mem.trim(u8, header_str[0..colon_pos], " \t");
@@ -88,22 +88,9 @@ pub const Headers = struct {
return .{ .name = name, .value = value }; return .{ .name = name, .value = value };
} }
pub fn iterator(self: *Headers) Iterator { pub fn iterator(self: Headers) HeaderIterator {
return .{ return .{ .curl_slist = .{ .header = self.headers } };
.header = self.headers,
};
} }
const Iterator = struct {
header: [*c]libcurl.CurlSList,
pub fn next(self: *Iterator) ?Header {
const h = self.header orelse return null;
self.header = h.*.next;
return parseHeader(std.mem.span(@as([*:0]const u8, @ptrCast(h.*.data))));
}
};
}; };
// In normal cases, the header iterator comes from the curl linked list. // In normal cases, the header iterator comes from the curl linked list.
@@ -112,6 +99,7 @@ pub const Headers = struct {
// This union, is an iterator that exposes the same API for either case. // This union, is an iterator that exposes the same API for either case.
pub const HeaderIterator = union(enum) { pub const HeaderIterator = union(enum) {
curl: CurlHeaderIterator, curl: CurlHeaderIterator,
curl_slist: CurlSListIterator,
list: ListHeaderIterator, list: ListHeaderIterator,
pub fn next(self: *HeaderIterator) ?Header { pub fn next(self: *HeaderIterator) ?Header {
@@ -120,6 +108,19 @@ pub const HeaderIterator = union(enum) {
} }
} }
pub fn collect(self: *HeaderIterator, allocator: std.mem.Allocator) !std.ArrayList(Header) {
var list: std.ArrayList(Header) = .empty;
while (self.next()) |hdr| {
try list.append(allocator, .{
.name = try allocator.dupe(u8, hdr.name),
.value = try allocator.dupe(u8, hdr.value),
});
}
return list;
}
const CurlHeaderIterator = struct { const CurlHeaderIterator = struct {
conn: *const Connection, conn: *const Connection,
prev: ?*libcurl.CurlHeader = null, prev: ?*libcurl.CurlHeader = null,
@@ -136,6 +137,16 @@ pub const HeaderIterator = union(enum) {
} }
}; };
const CurlSListIterator = struct {
header: [*c]libcurl.CurlSList,
pub fn next(self: *CurlSListIterator) ?Header {
const h = self.header orelse return null;
self.header = h.*.next;
return Headers.parseHeader(std.mem.span(@as([*:0]const u8, @ptrCast(h.*.data))));
}
};
const ListHeaderIterator = struct { const ListHeaderIterator = struct {
index: usize = 0, index: usize = 0,
list: []const Header, list: []const Header,