diff --git a/src/App.zig b/src/App.zig index 8c4471ad..41ca9822 100644 --- a/src/App.zig +++ b/src/App.zig @@ -55,7 +55,7 @@ pub fn init(allocator: Allocator, config: *const Config) !*App { .arena_pool = undefined, }; - app.network = try Network.init(allocator, config); + app.network = try Network.init(allocator, app, config); errdefer app.network.deinit(); app.platform = try Platform.init(); diff --git a/src/Config.zig b/src/Config.zig index 1e0e6f69..6788db1d 100644 --- a/src/Config.zig +++ b/src/Config.zig @@ -156,6 +156,13 @@ pub fn userAgentSuffix(self: *const Config) ?[]const u8 { }; } +pub fn httpCacheDir(self: *const Config) ?[]const u8 { + return switch (self.mode) { + inline .serve, .fetch, .mcp => |opts| opts.common.http_cache_dir, + else => null, + }; +} + pub fn cdpTimeout(self: *const Config) usize { return switch (self.mode) { .serve => |opts| if (opts.timeout > 604_800) 604_800_000 else @as(usize, opts.timeout) * 1000, @@ -273,6 +280,7 @@ pub const Common = struct { log_format: ?log.Format = null, log_filter_scopes: ?[]log.Scope = null, user_agent_suffix: ?[]const u8 = null, + http_cache_dir: ?[]const u8 = null, web_bot_auth_key_file: ?[]const u8 = null, web_bot_auth_keyid: ?[]const u8 = null, @@ -392,6 +400,11 @@ pub fn printUsageAndExit(self: *const Config, success: bool) void { \\ \\--web-bot-auth-domain \\ Your domain e.g. yourdomain.com + \\ + \\--http-cache-dir + \\ Path to a directory to use as a Filesystem Cache for network resources. + \\ Omitting this will result is no caching. + \\ Defaults to no caching. ; // MAX_HELP_LEN| @@ -1066,5 +1079,14 @@ fn parseCommonArg( return true; } + if (std.mem.eql(u8, "--http-cache-dir", opt)) { + const str = args.next() orelse { + log.fatal(.app, "missing argument value", .{ .arg = "--http-cache-dir" }); + return error.InvalidArgument; + }; + common.http_cache_dir = try allocator.dupe(u8, str); + return true; + } + return false; } diff --git a/src/browser/HttpClient.zig b/src/browser/HttpClient.zig index e183bc6a..6247e323 100644 --- a/src/browser/HttpClient.zig +++ b/src/browser/HttpClient.zig @@ -32,6 +32,9 @@ const CookieJar = @import("webapi/storage/Cookie.zig").Jar; const http = @import("../network/http.zig"); const Network = @import("../network/Network.zig"); const Robots = @import("../network/Robots.zig"); +const Cache = @import("../network/cache/Cache.zig"); +const CacheMetadata = Cache.CachedMetadata; +const CachedResponse = Cache.CachedResponse; const IS_DEBUG = builtin.mode == .Debug; @@ -311,7 +314,73 @@ pub fn request(self: *Client, req: Request) !void { return self.fetchRobotsThenProcessRequest(robots_url, req); } +fn serveFromCache(req: Request, cached: *const CachedResponse) !void { + const response = Response.fromCached(req.ctx, cached); + defer switch (cached.data) { + .buffer => |_| {}, + .file => |f| f.file.close(), + }; + + if (req.start_callback) |cb| { + try cb(response); + } + + const proceed = try req.header_callback(response); + if (!proceed) { + req.error_callback(req.ctx, error.Abort); + return; + } + + switch (cached.data) { + .buffer => |data| { + if (data.len > 0) { + try req.data_callback(response, data); + } + }, + .file => |f| { + const file = f.file; + + var buf: [1024]u8 = undefined; + var file_reader = file.reader(&buf); + try file_reader.seekTo(f.offset); + const reader = &file_reader.interface; + + var read_buf: [1024]u8 = undefined; + var remaining = f.len; + + while (remaining > 0) { + const read_len = @min(read_buf.len, remaining); + const n = try reader.readSliceShort(read_buf[0..read_len]); + if (n == 0) break; + remaining -= n; + try req.data_callback(response, read_buf[0..n]); + } + }, + } + + try req.done_callback(req.ctx); +} + fn processRequest(self: *Client, req: Request) !void { + if (self.network.cache) |*cache| { + if (req.method == .GET) { + const arena = try self.network.app.arena_pool.acquire(.{ .debug = "HttpClient.processRequest.cache" }); + defer self.network.app.arena_pool.release(arena); + + var iter = req.headers.iterator(); + const req_header_list = try iter.collect(arena); + + if (cache.get(arena, .{ + .url = req.url, + .timestamp = std.time.timestamp(), + .request_headers = req_header_list.items, + })) |cached| { + defer req.headers.deinit(); + return serveFromCache(req, &cached); + } + } + } + const transfer = try self.makeTransfer(req); transfer.req.notification.dispatch(.http_request_start, &.{ .transfer = transfer }); @@ -399,8 +468,10 @@ fn fetchRobotsThenProcessRequest(self: *Client, robots_url: [:0]const u8, req: R try entry.value_ptr.append(self.allocator, req); } -fn robotsHeaderCallback(transfer: *Transfer) !bool { - const ctx: *RobotsRequestContext = @ptrCast(@alignCast(transfer.ctx)); +fn robotsHeaderCallback(response: Response) !bool { + const ctx: *RobotsRequestContext = @ptrCast(@alignCast(response.ctx)); + // Robots callbacks only happen on real live requests. + const transfer = response.inner.transfer; if (transfer.response_header) |hdr| { log.debug(.browser, "robots status", .{ .status = hdr.status, .robots_url = ctx.robots_url }); @@ -414,8 +485,8 @@ fn robotsHeaderCallback(transfer: *Transfer) !bool { return true; } -fn robotsDataCallback(transfer: *Transfer, data: []const u8) !void { - const ctx: *RobotsRequestContext = @ptrCast(@alignCast(transfer.ctx)); +fn robotsDataCallback(response: Response, data: []const u8) !void { + const ctx: *RobotsRequestContext = @ptrCast(@alignCast(response.ctx)); try ctx.buffer.appendSlice(ctx.client.allocator, data); } @@ -634,13 +705,43 @@ fn makeTransfer(self: *Client, req: Request) !*Transfer { .id = id, .url = req.url, .req = req, - .ctx = req.ctx, .client = self, .max_response_size = self.network.config.httpMaxResponseSize(), }; return transfer; } +fn requestFailed(transfer: *Transfer, err: anyerror, comptime execute_callback: bool) void { + if (transfer._notified_fail) { + // we can force a failed request within a callback, which will eventually + // result in this being called again in the more general loop. We do this + // because we can raise a more specific error inside a callback in some cases + return; + } + + transfer._notified_fail = true; + + transfer.req.notification.dispatch(.http_request_fail, &.{ + .transfer = transfer, + .err = err, + }); + + if (execute_callback) { + transfer.req.error_callback(transfer.req.ctx, err); + } else if (transfer.req.shutdown_callback) |cb| { + cb(transfer.req.ctx); + } +} + +// Same restriction as changeProxy. Should be ok since this is only called on +// BrowserContext deinit. +pub fn restoreOriginalProxy(self: *Client) !void { + try self.ensureNoActiveConnection(); + + self.http_proxy = self.network.config.httpProxy(); + self.use_proxy = self.http_proxy != null; +} + fn makeRequest(self: *Client, conn: *http.Connection, transfer: *Transfer) anyerror!void { { // Reset per-response state for retries (auth challenge, queue). @@ -674,7 +775,7 @@ fn makeRequest(self: *Client, conn: *http.Connection, transfer: *Transfer) anyer self.active += 1; if (transfer.req.start_callback) |cb| { - cb(transfer) catch |err| { + cb(Response.fromTransfer(transfer)) catch |err| { transfer.deinit(); return err; }; @@ -742,7 +843,10 @@ fn processOneMessage(self: *Client, msg: http.Handles.MultiMessage, transfer: *T // TODO give a way to configure the number of auth retries. if (transfer._auth_challenge != null and transfer._tries < 10) { var wait_for_interception = false; - transfer.req.notification.dispatch(.http_request_auth_required, &.{ .transfer = transfer, .wait_for_interception = &wait_for_interception }); + transfer.req.notification.dispatch( + .http_request_auth_required, + &.{ .transfer = transfer, .wait_for_interception = &wait_for_interception }, + ); if (wait_for_interception) { self.intercepted += 1; if (comptime IS_DEBUG) { @@ -841,10 +945,11 @@ fn processOneMessage(self: *Client, msg: http.Handles.MultiMessage, transfer: *T } } + const body = transfer._stream_buffer.items; + // Replay buffered body through user's data_callback. if (transfer._stream_buffer.items.len > 0) { - const body = transfer._stream_buffer.items; - try transfer.req.data_callback(transfer, body); + try transfer.req.data_callback(Response.fromTransfer(transfer), body); transfer.req.notification.dispatch(.http_response_data, &.{ .data = body, @@ -857,11 +962,19 @@ fn processOneMessage(self: *Client, msg: http.Handles.MultiMessage, transfer: *T } } + if (transfer._pending_cache_metadata) |metadata| { + const cache = &self.network.cache.?; + cache.put(metadata.*, body) catch |err| { + log.warn(.cache, "cache put failed", .{ .err = err }); + }; + } + // release conn ASAP so that it's available; some done_callbacks // will load more resources. transfer.releaseConn(); - try transfer.req.done_callback(transfer.ctx); + try transfer.req.done_callback(transfer.req.ctx); + transfer.req.notification.dispatch(.http_request_done, &.{ .transfer = transfer, }); @@ -939,9 +1052,9 @@ pub const Request = struct { // arbitrary data that can be associated with this request ctx: *anyopaque = undefined, - start_callback: ?*const fn (transfer: *Transfer) anyerror!void = null, - header_callback: *const fn (transfer: *Transfer) anyerror!bool, - data_callback: *const fn (transfer: *Transfer, data: []const u8) anyerror!void, + start_callback: ?*const fn (response: Response) anyerror!void = null, + header_callback: *const fn (response: Response) anyerror!bool, + data_callback: *const fn (response: Response, data: []const u8) anyerror!void, done_callback: *const fn (ctx: *anyopaque) anyerror!void, error_callback: *const fn (ctx: *anyopaque, err: anyerror) void, shutdown_callback: ?*const fn (ctx: *anyopaque) void = null, @@ -967,16 +1080,91 @@ pub const Request = struct { }; }; +pub const Response = struct { + ctx: *anyopaque, + inner: union(enum) { + transfer: *Transfer, + cached: *const CachedResponse, + }, + + pub fn fromTransfer(transfer: *Transfer) Response { + return .{ .ctx = transfer.req.ctx, .inner = .{ .transfer = transfer } }; + } + + pub fn fromCached(ctx: *anyopaque, resp: *const CachedResponse) Response { + return .{ .ctx = ctx, .inner = .{ .cached = resp } }; + } + + pub fn status(self: Response) ?u16 { + return switch (self.inner) { + .transfer => |t| if (t.response_header) |rh| rh.status else null, + .cached => |c| c.metadata.status, + }; + } + + pub fn contentType(self: Response) ?[]const u8 { + return switch (self.inner) { + .transfer => |t| if (t.response_header) |*rh| rh.contentType() else null, + .cached => |c| c.metadata.content_type, + }; + } + + pub fn contentLength(self: Response) ?u32 { + return switch (self.inner) { + .transfer => |t| t.getContentLength(), + .cached => |c| switch (c.data) { + .buffer => |buf| @intCast(buf.len), + .file => |f| @intCast(f.len), + }, + }; + } + + pub fn redirectCount(self: Response) ?u32 { + return switch (self.inner) { + .transfer => |t| if (t.response_header) |rh| rh.redirect_count else null, + .cached => 0, + }; + } + + pub fn url(self: Response) [:0]const u8 { + return switch (self.inner) { + .transfer => |t| t.url, + .cached => |c| c.metadata.url, + }; + } + + pub fn headerIterator(self: Response) HeaderIterator { + return switch (self.inner) { + .transfer => |t| t.responseHeaderIterator(), + .cached => |c| HeaderIterator{ .list = .{ .list = c.metadata.headers } }, + }; + } + + pub fn abort(self: Response, err: anyerror) void { + switch (self.inner) { + .transfer => |t| t.abort(err), + .cached => {}, + } + } + + pub fn format(self: Response, writer: *std.Io.Writer) !void { + return switch (self.inner) { + .transfer => |t| try t.format(writer), + .cached => |c| try c.format(writer), + }; + } +}; + pub const Transfer = struct { arena: ArenaAllocator, id: u32 = 0, req: Request, url: [:0]const u8, - ctx: *anyopaque, // copied from req.ctx to make it easier for callback handlers client: *Client, // total bytes received in the response, including the response status line, // the headers, and the [encoded] body. bytes_received: usize = 0, + _pending_cache_metadata: ?*CacheMetadata = null, aborted: bool = false, @@ -1065,7 +1253,7 @@ pub const Transfer = struct { // as abort (doesn't send a notification, doesn't invoke an error callback) fn kill(self: *Transfer) void { if (self.req.shutdown_callback) |cb| { - cb(self.ctx); + cb(self.req.ctx); } if (self._performing or self.client.performing) { @@ -1101,9 +1289,9 @@ pub const Transfer = struct { }); if (execute_callback) { - self.req.error_callback(self.ctx, err); + self.req.error_callback(self.req.ctx, err); } else if (self.req.shutdown_callback) |cb| { - cb(self.ctx); + cb(self.req.ctx); } } @@ -1352,11 +1540,61 @@ pub const Transfer = struct { .transfer = transfer, }); - const proceed = transfer.req.header_callback(transfer) catch |err| { + const proceed = transfer.req.header_callback(Response.fromTransfer(transfer)) catch |err| { log.err(.http, "header_callback", .{ .err = err, .req = transfer }); return err; }; + if (transfer.client.network.cache != null and transfer.req.method == .GET) { + const rh = &transfer.response_header.?; + const allocator = transfer.arena.allocator(); + + const vary = if (conn.getResponseHeader("vary", 0)) |h| h.value else null; + + const maybe_cm = try Cache.tryCache( + allocator, + std.time.timestamp(), + transfer.url, + rh.status, + rh.contentType(), + if (conn.getResponseHeader("cache-control", 0)) |h| h.value else null, + vary, + if (conn.getResponseHeader("age", 0)) |h| h.value else null, + conn.getResponseHeader("set-cookie", 0) != null, + conn.getResponseHeader("authorization", 0) != null, + ); + + if (maybe_cm) |cm| { + var iter = transfer.responseHeaderIterator(); + var header_list = try iter.collect(allocator); + const end_of_response = header_list.items.len; + + if (vary) |vary_str| { + var req_it = transfer.req.headers.iterator(); + + while (req_it.next()) |hdr| { + var vary_iter = std.mem.splitScalar(u8, vary_str, ','); + + while (vary_iter.next()) |part| { + const name = std.mem.trim(u8, part, &std.ascii.whitespace); + if (std.ascii.eqlIgnoreCase(hdr.name, name)) { + try header_list.append(allocator, .{ + .name = try allocator.dupe(u8, hdr.name), + .value = try allocator.dupe(u8, hdr.value), + }); + } + } + } + } + + const metadata = try transfer.arena.allocator().create(CacheMetadata); + metadata.* = cm; + metadata.headers = header_list.items[0..end_of_response]; + metadata.vary_headers = header_list.items[end_of_response..]; + transfer._pending_cache_metadata = metadata; + } + } + return proceed and transfer.aborted == false; } @@ -1455,7 +1693,7 @@ pub const Transfer = struct { fn _fulfill(transfer: *Transfer, status: u16, headers: []const http.Header, body: ?[]const u8) !void { const req = &transfer.req; if (req.start_callback) |cb| { - try cb(transfer); + try cb(Response.fromTransfer(transfer)); } transfer.response_header = .{ @@ -1474,13 +1712,13 @@ pub const Transfer = struct { } lp.assert(transfer._header_done_called == false, "Transfer.fulfill header_done_called", .{}); - if (try req.header_callback(transfer) == false) { + if (try req.header_callback(Response.fromTransfer(transfer)) == false) { transfer.abort(error.Abort); return; } if (body) |b| { - try req.data_callback(transfer, b); + try req.data_callback(Response.fromTransfer(transfer), b); } try req.done_callback(req.ctx); @@ -1517,10 +1755,10 @@ pub const Transfer = struct { }; const Noop = struct { - fn headerCallback(_: *Transfer) !bool { + fn headerCallback(_: Response) !bool { return true; } - fn dataCallback(_: *Transfer, _: []const u8) !void {} + fn dataCallback(_: Response, _: []const u8) !void {} fn doneCallback(_: *anyopaque) !void {} fn errorCallback(_: *anyopaque, _: anyerror) void {} }; diff --git a/src/browser/Mime.zig b/src/browser/Mime.zig index eca97a2d..5ebe2622 100644 --- a/src/browser/Mime.zig +++ b/src/browser/Mime.zig @@ -27,6 +27,9 @@ charset: [41]u8 = default_charset, charset_len: usize = default_charset_len, is_default_charset: bool = true, +type_buf: [127]u8 = @splat(0), +sub_type_buf: [127]u8 = @splat(0), + /// String "UTF-8" continued by null characters. const default_charset = .{ 'U', 'T', 'F', '-', '8' } ++ .{0} ** 36; const default_charset_len = 5; @@ -61,7 +64,10 @@ pub const ContentType = union(ContentTypeEnum) { image_webp: void, application_json: void, unknown: void, - other: struct { type: []const u8, sub_type: []const u8 }, + other: struct { + type: []const u8, + sub_type: []const u8, + }, }; pub fn contentTypeString(mime: *const Mime) []const u8 { @@ -112,17 +118,18 @@ fn parseCharset(value: []const u8) error{ CharsetTooBig, Invalid }![]const u8 { return value; } -pub fn parse(input: []u8) !Mime { +pub fn parse(input: []const u8) !Mime { if (input.len > 255) { return error.TooBig; } - // Zig's trim API is broken. The return type is always `[]const u8`, - // even if the input type is `[]u8`. @constCast is safe here. - var normalized = @constCast(std.mem.trim(u8, input, &std.ascii.whitespace)); + var buf: [255]u8 = undefined; + const normalized = std.ascii.lowerString(&buf, std.mem.trim(u8, input, &std.ascii.whitespace)); _ = std.ascii.lowerString(normalized, normalized); - const content_type, const type_len = try parseContentType(normalized); + var mime = Mime{ .content_type = undefined }; + + const content_type, const type_len = try parseContentType(normalized, &mime.type_buf, &mime.sub_type_buf); if (type_len >= normalized.len) { return .{ .content_type = content_type }; } @@ -163,13 +170,12 @@ pub fn parse(input: []u8) !Mime { } } - return .{ - .params = params, - .charset = charset, - .charset_len = charset_len, - .content_type = content_type, - .is_default_charset = !has_explicit_charset, - }; + mime.params = params; + mime.charset = charset; + mime.charset_len = charset_len; + mime.content_type = content_type; + mime.is_default_charset = !has_explicit_charset; + return mime; } /// Prescan the first 1024 bytes of an HTML document for a charset declaration. @@ -395,7 +401,7 @@ pub fn isText(mime: *const Mime) bool { } // we expect value to be lowercase -fn parseContentType(value: []const u8) !struct { ContentType, usize } { +fn parseContentType(value: []const u8, type_buf: []u8, sub_type_buf: []u8) !struct { ContentType, usize } { const end = std.mem.indexOfScalarPos(u8, value, 0, ';') orelse value.len; const type_name = trimRight(value[0..end]); const attribute_start = end + 1; @@ -444,10 +450,18 @@ fn parseContentType(value: []const u8) !struct { ContentType, usize } { return error.Invalid; } - return .{ .{ .other = .{ - .type = main_type, - .sub_type = sub_type, - } }, attribute_start }; + @memcpy(type_buf[0..main_type.len], main_type); + @memcpy(sub_type_buf[0..sub_type.len], sub_type); + + return .{ + .{ + .other = .{ + .type = type_buf[0..main_type.len], + .sub_type = sub_type_buf[0..sub_type.len], + }, + }, + attribute_start, + }; } const VALID_CODEPOINTS = blk: { @@ -461,6 +475,13 @@ const VALID_CODEPOINTS = blk: { break :blk v; }; +pub fn typeString(self: *const Mime) []const u8 { + return switch (self.content_type) { + .other => |o| o.type[0..o.type_len], + else => "", + }; +} + fn validType(value: []const u8) bool { for (value) |b| { if (VALID_CODEPOINTS[b] == false) { diff --git a/src/browser/Page.zig b/src/browser/Page.zig index df47ab60..b265a45d 100644 --- a/src/browser/Page.zig +++ b/src/browser/Page.zig @@ -886,12 +886,10 @@ fn notifyParentLoadComplete(self: *Page) void { parent.iframeCompletedLoading(self.iframe.?); } -fn pageHeaderDoneCallback(transfer: *HttpClient.Transfer) !bool { - var self: *Page = @ptrCast(@alignCast(transfer.ctx)); +fn pageHeaderDoneCallback(response: HttpClient.Response) !bool { + var self: *Page = @ptrCast(@alignCast(response.ctx)); - const header = &transfer.response_header.?; - - const response_url = std.mem.span(header.url); + const response_url = response.url(); if (std.mem.eql(u8, response_url, self.url) == false) { // would be different than self.url in the case of a redirect self.url = try self.arena.dupeZ(u8, response_url); @@ -905,8 +903,8 @@ fn pageHeaderDoneCallback(transfer: *HttpClient.Transfer) !bool { if (comptime IS_DEBUG) { log.debug(.page, "navigate header", .{ .url = self.url, - .status = header.status, - .content_type = header.contentType(), + .status = response.status(), + .content_type = response.contentType(), .type = self._type, }); } @@ -927,14 +925,14 @@ fn pageHeaderDoneCallback(transfer: *HttpClient.Transfer) !bool { return true; } -fn pageDataCallback(transfer: *HttpClient.Transfer, data: []const u8) !void { - var self: *Page = @ptrCast(@alignCast(transfer.ctx)); +fn pageDataCallback(response: HttpClient.Response, data: []const u8) !void { + var self: *Page = @ptrCast(@alignCast(response.ctx)); if (self._parse_state == .pre) { // we lazily do this, because we might need the first chunk of data // to sniff the content type var mime: Mime = blk: { - if (transfer.response_header.?.contentType()) |ct| { + if (response.contentType()) |ct| { break :blk try Mime.parse(ct); } break :blk Mime.sniff(data); diff --git a/src/browser/ScriptManager.zig b/src/browser/ScriptManager.zig index 39dd71bb..546a05c5 100644 --- a/src/browser/ScriptManager.zig +++ b/src/browser/ScriptManager.zig @@ -273,25 +273,6 @@ pub fn addFromElement(self: *ScriptManager, comptime from_parser: bool, script_e // Let the outer errdefer handle releasing the arena if client.request fails } - try self.client.request(.{ - .url = url, - .ctx = script, - .method = .GET, - .frame_id = page._frame_id, - .headers = try self.getHeaders(), - .blocking = is_blocking, - .cookie_jar = &page._session.cookie_jar, - .cookie_origin = page.url, - .resource_type = .script, - .notification = page._session.notification, - .start_callback = if (log.enabled(.http, .debug)) Script.startCallback else null, - .header_callback = Script.headerCallback, - .data_callback = Script.dataCallback, - .done_callback = Script.doneCallback, - .error_callback = Script.errorCallback, - }); - handover = true; - if (comptime IS_DEBUG) { var ls: js.Local.Scope = undefined; page.js.localScope(&ls); @@ -304,6 +285,32 @@ pub fn addFromElement(self: *ScriptManager, comptime from_parser: bool, script_e .stack = ls.local.stackTrace() catch "???", }); } + + { + const was_evaluating = self.is_evaluating; + self.is_evaluating = true; + defer self.is_evaluating = was_evaluating; + + try self.client.request(.{ + .url = url, + .ctx = script, + .method = .GET, + .frame_id = page._frame_id, + .headers = try self.getHeaders(), + .blocking = is_blocking, + .cookie_jar = &page._session.cookie_jar, + .cookie_origin = page.url, + .resource_type = .script, + .notification = page._session.notification, + .start_callback = if (log.enabled(.http, .debug)) Script.startCallback else null, + .header_callback = Script.headerCallback, + .data_callback = Script.dataCallback, + .done_callback = Script.doneCallback, + .error_callback = Script.errorCallback, + }); + } + + handover = true; } if (is_blocking == false) { @@ -694,82 +701,86 @@ pub const Script = struct { self.manager.page.releaseArena(self.arena); } - fn startCallback(transfer: *HttpClient.Transfer) !void { - log.debug(.http, "script fetch start", .{ .req = transfer }); + fn startCallback(response: HttpClient.Response) !void { + log.debug(.http, "script fetch start", .{ .req = response }); } - fn headerCallback(transfer: *HttpClient.Transfer) !bool { - const self: *Script = @ptrCast(@alignCast(transfer.ctx)); - const header = &transfer.response_header.?; - self.status = header.status; - if (header.status != 200) { + fn headerCallback(response: HttpClient.Response) !bool { + const self: *Script = @ptrCast(@alignCast(response.ctx)); + + self.status = response.status().?; + if (response.status() != 200) { log.info(.http, "script header", .{ - .req = transfer, - .status = header.status, - .content_type = header.contentType(), + .req = response, + .status = response.status(), + .content_type = response.contentType(), }); return false; } if (comptime IS_DEBUG) { log.debug(.http, "script header", .{ - .req = transfer, - .status = header.status, - .content_type = header.contentType(), + .req = response, + .status = response.status(), + .content_type = response.contentType(), }); } - { - // temp debug, trying to figure out why the next assert sometimes - // fails. Is the buffer just corrupt or is headerCallback really - // being called twice? - lp.assert(self.header_callback_called == false, "ScriptManager.Header recall", .{ - .m = @tagName(std.meta.activeTag(self.mode)), - .a1 = self.debug_transfer_id, - .a2 = self.debug_transfer_tries, - .a3 = self.debug_transfer_aborted, - .a4 = self.debug_transfer_bytes_received, - .a5 = self.debug_transfer_notified_fail, - .a7 = self.debug_transfer_intercept_state, - .a8 = self.debug_transfer_auth_challenge, - .a9 = self.debug_transfer_easy_id, - .b1 = transfer.id, - .b2 = transfer._tries, - .b3 = transfer.aborted, - .b4 = transfer.bytes_received, - .b5 = transfer._notified_fail, - .b7 = @intFromEnum(transfer._intercept_state), - .b8 = transfer._auth_challenge != null, - .b9 = if (transfer._conn) |c| @intFromPtr(c._easy) else 0, - }); - self.header_callback_called = true; - self.debug_transfer_id = transfer.id; - self.debug_transfer_tries = transfer._tries; - self.debug_transfer_aborted = transfer.aborted; - self.debug_transfer_bytes_received = transfer.bytes_received; - self.debug_transfer_notified_fail = transfer._notified_fail; - self.debug_transfer_intercept_state = @intFromEnum(transfer._intercept_state); - self.debug_transfer_auth_challenge = transfer._auth_challenge != null; - self.debug_transfer_easy_id = if (transfer._conn) |c| @intFromPtr(c._easy) else 0; + switch (response.inner) { + .transfer => |transfer| { + // temp debug, trying to figure out why the next assert sometimes + // fails. Is the buffer just corrupt or is headerCallback really + // being called twice? + lp.assert(self.header_callback_called == false, "ScriptManager.Header recall", .{ + .m = @tagName(std.meta.activeTag(self.mode)), + .a1 = self.debug_transfer_id, + .a2 = self.debug_transfer_tries, + .a3 = self.debug_transfer_aborted, + .a4 = self.debug_transfer_bytes_received, + .a5 = self.debug_transfer_notified_fail, + .a7 = self.debug_transfer_intercept_state, + .a8 = self.debug_transfer_auth_challenge, + .a9 = self.debug_transfer_easy_id, + .b1 = transfer.id, + .b2 = transfer._tries, + .b3 = transfer.aborted, + .b4 = transfer.bytes_received, + .b5 = transfer._notified_fail, + .b7 = @intFromEnum(transfer._intercept_state), + .b8 = transfer._auth_challenge != null, + .b9 = if (transfer._conn) |c| @intFromPtr(c._easy) else 0, + }); + self.header_callback_called = true; + self.debug_transfer_id = transfer.id; + self.debug_transfer_tries = transfer._tries; + self.debug_transfer_aborted = transfer.aborted; + self.debug_transfer_bytes_received = transfer.bytes_received; + self.debug_transfer_notified_fail = transfer._notified_fail; + self.debug_transfer_intercept_state = @intFromEnum(transfer._intercept_state); + self.debug_transfer_auth_challenge = transfer._auth_challenge != null; + self.debug_transfer_easy_id = if (transfer._conn) |c| @intFromPtr(c._easy) else 0; + }, + else => {}, } lp.assert(self.source.remote.capacity == 0, "ScriptManager.Header buffer", .{ .capacity = self.source.remote.capacity }); var buffer: std.ArrayList(u8) = .empty; - if (transfer.getContentLength()) |cl| { + if (response.contentLength()) |cl| { try buffer.ensureTotalCapacity(self.arena, cl); } self.source = .{ .remote = buffer }; return true; } - fn dataCallback(transfer: *HttpClient.Transfer, data: []const u8) !void { - const self: *Script = @ptrCast(@alignCast(transfer.ctx)); - self._dataCallback(transfer, data) catch |err| { - log.err(.http, "SM.dataCallback", .{ .err = err, .transfer = transfer, .len = data.len }); + fn dataCallback(response: HttpClient.Response, data: []const u8) !void { + const self: *Script = @ptrCast(@alignCast(response.ctx)); + self._dataCallback(response, data) catch |err| { + log.err(.http, "SM.dataCallback", .{ .err = err, .transfer = response, .len = data.len }); return err; }; } - fn _dataCallback(self: *Script, _: *HttpClient.Transfer, data: []const u8) !void { + + fn _dataCallback(self: *Script, _: HttpClient.Response, data: []const u8) !void { try self.source.remote.appendSlice(self.arena, data); } diff --git a/src/browser/webapi/net/Fetch.zig b/src/browser/webapi/net/Fetch.zig index acda1f86..d26771e2 100644 --- a/src/browser/webapi/net/Fetch.zig +++ b/src/browser/webapi/net/Fetch.zig @@ -127,16 +127,16 @@ fn handleBlobUrl(url: []const u8, resolver: js.PromiseResolver, page: *Page) !js return resolver.promise(); } -fn httpStartCallback(transfer: *HttpClient.Transfer) !void { - const self: *Fetch = @ptrCast(@alignCast(transfer.ctx)); +fn httpStartCallback(response: HttpClient.Response) !void { + const self: *Fetch = @ptrCast(@alignCast(response.ctx)); if (comptime IS_DEBUG) { log.debug(.http, "request start", .{ .url = self._url, .source = "fetch" }); } - self._response._transfer = transfer; + self._response._http_response = response; } -fn httpHeaderDoneCallback(transfer: *HttpClient.Transfer) !bool { - const self: *Fetch = @ptrCast(@alignCast(transfer.ctx)); +fn httpHeaderDoneCallback(response: HttpClient.Response) !bool { + const self: *Fetch = @ptrCast(@alignCast(response.ctx)); if (self._signal) |signal| { if (signal._aborted) { @@ -145,25 +145,24 @@ fn httpHeaderDoneCallback(transfer: *HttpClient.Transfer) !bool { } const arena = self._response._arena; - if (transfer.getContentLength()) |cl| { + if (response.contentLength()) |cl| { try self._buf.ensureTotalCapacity(arena, cl); } const res = self._response; - const header = transfer.response_header.?; if (comptime IS_DEBUG) { log.debug(.http, "request header", .{ .source = "fetch", .url = self._url, - .status = header.status, + .status = response.status(), }); } - res._status = header.status; - res._status_text = std.http.Status.phrase(@enumFromInt(header.status)) orelse ""; - res._url = try arena.dupeZ(u8, std.mem.span(header.url)); - res._is_redirected = header.redirect_count > 0; + res._status = response.status().?; + res._status_text = std.http.Status.phrase(@enumFromInt(response.status().?)) orelse ""; + res._url = try arena.dupeZ(u8, response.url()); + res._is_redirected = response.redirectCount().? > 0; // Determine response type based on origin comparison const page_origin = URL.getOrigin(arena, self._page.url) catch null; @@ -183,7 +182,7 @@ fn httpHeaderDoneCallback(transfer: *HttpClient.Transfer) !bool { res._type = .basic; } - var it = transfer.responseHeaderIterator(); + var it = response.headerIterator(); while (it.next()) |hdr| { try res._headers.append(hdr.name, hdr.value, self._page); } @@ -191,8 +190,8 @@ fn httpHeaderDoneCallback(transfer: *HttpClient.Transfer) !bool { return true; } -fn httpDataCallback(transfer: *HttpClient.Transfer, data: []const u8) !void { - const self: *Fetch = @ptrCast(@alignCast(transfer.ctx)); +fn httpDataCallback(response: HttpClient.Response, data: []const u8) !void { + const self: *Fetch = @ptrCast(@alignCast(response.ctx)); // Check if aborted if (self._signal) |signal| { @@ -207,7 +206,7 @@ fn httpDataCallback(transfer: *HttpClient.Transfer, data: []const u8) !void { fn httpDoneCallback(ctx: *anyopaque) !void { const self: *Fetch = @ptrCast(@alignCast(ctx)); var response = self._response; - response._transfer = null; + response._http_response = null; response._body = self._buf.items; log.info(.http, "request complete", .{ @@ -230,7 +229,7 @@ fn httpErrorCallback(ctx: *anyopaque, _: anyerror) void { const self: *Fetch = @ptrCast(@alignCast(ctx)); var response = self._response; - response._transfer = null; + response._http_response = null; // the response is only passed on v8 on success, if we're here, it's safe to // clear this. (defer since `self is in the response's arena). @@ -256,7 +255,7 @@ fn httpShutdownCallback(ctx: *anyopaque) void { if (self._owns_response) { var response = self._response; - response._transfer = null; + response._http_response = null; response.deinit(self._page._session); // Do not access `self` after this point: the Fetch struct was // allocated from response._arena which has been released. diff --git a/src/browser/webapi/net/Response.zig b/src/browser/webapi/net/Response.zig index b9df006e..e4fbd46d 100644 --- a/src/browser/webapi/net/Response.zig +++ b/src/browser/webapi/net/Response.zig @@ -48,7 +48,7 @@ _type: Type, _status_text: []const u8, _url: [:0]const u8, _is_redirected: bool, -_transfer: ?*HttpClient.Transfer = null, +_http_response: ?HttpClient.Response = null, const InitOpts = struct { status: u16 = 200, @@ -81,9 +81,9 @@ pub fn init(body_: ?[]const u8, opts_: ?InitOpts, page: *Page) !*Response { } pub fn deinit(self: *Response, session: *Session) void { - if (self._transfer) |transfer| { - transfer.abort(error.Abort); - self._transfer = null; + if (self._http_response) |resp| { + resp.abort(error.Abort); + self._http_response = null; } session.releaseArena(self._arena); } @@ -191,7 +191,7 @@ pub fn clone(self: *const Response, page: *Page) !*Response { ._type = self._type, ._is_redirected = self._is_redirected, ._headers = try Headers.init(.{ .obj = self._headers }, page), - ._transfer = null, + ._http_response = null, }; return cloned; } diff --git a/src/browser/webapi/net/XMLHttpRequest.zig b/src/browser/webapi/net/XMLHttpRequest.zig index d34e2f2b..9024a1f7 100644 --- a/src/browser/webapi/net/XMLHttpRequest.zig +++ b/src/browser/webapi/net/XMLHttpRequest.zig @@ -43,7 +43,7 @@ _rc: lp.RC(u8) = .{}, _page: *Page, _proto: *XMLHttpRequestEventTarget, _arena: Allocator, -_transfer: ?*HttpClient.Transfer = null, +_http_response: ?HttpClient.Response = null, _active_request: bool = false, _url: [:0]const u8 = "", @@ -100,9 +100,9 @@ pub fn init(page: *Page) !*XMLHttpRequest { } pub fn deinit(self: *XMLHttpRequest, session: *Session) void { - if (self._transfer) |transfer| { - transfer.abort(error.Abort); - self._transfer = null; + if (self._http_response) |resp| { + resp.abort(error.Abort); + self._http_response = null; } if (self._on_ready_state_change) |func| { @@ -184,9 +184,9 @@ pub fn setWithCredentials(self: *XMLHttpRequest, value: bool) !void { // TODO: url should be a union, as it can be multiple things pub fn open(self: *XMLHttpRequest, method_: []const u8, url: [:0]const u8) !void { // Abort any in-progress request - if (self._transfer) |transfer| { + if (self._http_response) |transfer| { transfer.abort(error.Abort); - self._transfer = null; + self._http_response = null; } // Reset internal state @@ -402,34 +402,32 @@ pub fn getResponseXML(self: *XMLHttpRequest, page: *Page) !?*Node.Document { }; } -fn httpStartCallback(transfer: *HttpClient.Transfer) !void { - const self: *XMLHttpRequest = @ptrCast(@alignCast(transfer.ctx)); +fn httpStartCallback(response: HttpClient.Response) !void { + const self: *XMLHttpRequest = @ptrCast(@alignCast(response.ctx)); if (comptime IS_DEBUG) { log.debug(.http, "request start", .{ .method = self._method, .url = self._url, .source = "xhr" }); } - self._transfer = transfer; + self._http_response = response; } -fn httpHeaderCallback(transfer: *HttpClient.Transfer, header: http.Header) !void { - const self: *XMLHttpRequest = @ptrCast(@alignCast(transfer.ctx)); +fn httpHeaderCallback(response: HttpClient.Response, header: http.Header) !void { + const self: *XMLHttpRequest = @ptrCast(@alignCast(response.ctx)); const joined = try std.fmt.allocPrint(self._arena, "{s}: {s}", .{ header.name, header.value }); try self._response_headers.append(self._arena, joined); } -fn httpHeaderDoneCallback(transfer: *HttpClient.Transfer) !bool { - const self: *XMLHttpRequest = @ptrCast(@alignCast(transfer.ctx)); - - const header = &transfer.response_header.?; +fn httpHeaderDoneCallback(response: HttpClient.Response) !bool { + const self: *XMLHttpRequest = @ptrCast(@alignCast(response.ctx)); if (comptime IS_DEBUG) { log.debug(.http, "request header", .{ .source = "xhr", .url = self._url, - .status = header.status, + .status = response.status(), }); } - if (header.contentType()) |ct| { + if (response.contentType()) |ct| { self._response_mime = Mime.parse(ct) catch |e| { log.info(.http, "invalid content type", .{ .content_Type = ct, @@ -440,18 +438,18 @@ fn httpHeaderDoneCallback(transfer: *HttpClient.Transfer) !bool { }; } - var it = transfer.responseHeaderIterator(); + var it = response.headerIterator(); while (it.next()) |hdr| { const joined = try std.fmt.allocPrint(self._arena, "{s}: {s}", .{ hdr.name, hdr.value }); try self._response_headers.append(self._arena, joined); } - self._response_status = header.status; - if (transfer.getContentLength()) |cl| { + self._response_status = response.status().?; + if (response.contentLength()) |cl| { self._response_len = cl; try self._response_data.ensureTotalCapacity(self._arena, cl); } - self._response_url = try self._arena.dupeZ(u8, std.mem.span(header.url)); + self._response_url = try self._arena.dupeZ(u8, response.url()); const page = self._page; @@ -466,8 +464,8 @@ fn httpHeaderDoneCallback(transfer: *HttpClient.Transfer) !bool { return true; } -fn httpDataCallback(transfer: *HttpClient.Transfer, data: []const u8) !void { - const self: *XMLHttpRequest = @ptrCast(@alignCast(transfer.ctx)); +fn httpDataCallback(response: HttpClient.Response, data: []const u8) !void { + const self: *XMLHttpRequest = @ptrCast(@alignCast(response.ctx)); try self._response_data.appendSlice(self._arena, data); const page = self._page; @@ -490,7 +488,7 @@ fn httpDoneCallback(ctx: *anyopaque) !void { // Not that the request is done, the http/client will free the transfer // object. It isn't safe to keep it around. - self._transfer = null; + self._http_response = null; const page = self._page; @@ -513,23 +511,23 @@ fn httpErrorCallback(ctx: *anyopaque, err: anyerror) void { const self: *XMLHttpRequest = @ptrCast(@alignCast(ctx)); // http client will close it after an error, it isn't safe to keep around self.handleError(err); - if (self._transfer != null) { - self._transfer = null; + if (self._http_response != null) { + self._http_response = null; } self.releaseSelfRef(); } fn httpShutdownCallback(ctx: *anyopaque) void { const self: *XMLHttpRequest = @ptrCast(@alignCast(ctx)); - self._transfer = null; + self._http_response = null; self.releaseSelfRef(); } pub fn abort(self: *XMLHttpRequest) void { self.handleError(error.Abort); - if (self._transfer) |transfer| { - self._transfer = null; - transfer.abort(error.Abort); + if (self._http_response) |resp| { + self._http_response = null; + resp.abort(error.Abort); } self.releaseSelfRef(); } diff --git a/src/log.zig b/src/log.zig index 201e6c9f..3e1016c5 100644 --- a/src/log.zig +++ b/src/log.zig @@ -39,6 +39,7 @@ pub const Scope = enum { telemetry, unknown_prop, mcp, + cache, }; const Opts = struct { diff --git a/src/network/Network.zig b/src/network/Network.zig index fad62652..ab11e5ce 100644 --- a/src/network/Network.zig +++ b/src/network/Network.zig @@ -17,6 +17,7 @@ // along with this program. If not, see . const std = @import("std"); +const log = @import("../log.zig"); const builtin = @import("builtin"); const net = std.net; const posix = std.posix; @@ -30,6 +31,10 @@ const http = @import("http.zig"); const RobotStore = @import("Robots.zig").RobotStore; const WebBotAuth = @import("WebBotAuth.zig"); +const Cache = @import("cache/Cache.zig"); +const FsCache = @import("cache/FsCache.zig"); + +const App = @import("../App.zig"); const Network = @This(); const Listener = struct { @@ -45,10 +50,12 @@ const MAX_TICK_CALLBACKS = 16; allocator: Allocator, +app: *App, config: *const Config, ca_blob: ?http.Blob, robot_store: RobotStore, web_bot_auth: ?WebBotAuth, +cache: ?Cache, connections: []http.Connection, available: std.DoublyLinkedList = .{}, @@ -200,7 +207,7 @@ fn globalDeinit() void { libcurl.curl_global_cleanup(); } -pub fn init(allocator: Allocator, config: *const Config) !Network { +pub fn init(allocator: Allocator, app: *App, config: *const Config) !Network { globalInit(allocator); errdefer globalDeinit(); @@ -233,6 +240,22 @@ pub fn init(allocator: Allocator, config: *const Config) !Network { else null; + const cache = if (config.httpCacheDir()) |cache_dir_path| + Cache{ + .kind = .{ + .fs = FsCache.init(cache_dir_path) catch |e| { + log.err(.cache, "failed to init", .{ + .kind = "FsCache", + .path = cache_dir_path, + .err = e, + }); + return e; + }, + }, + } + else + null; + return .{ .allocator = allocator, .config = config, @@ -244,8 +267,10 @@ pub fn init(allocator: Allocator, config: *const Config) !Network { .available = available, .connections = connections, + .app = app, .robot_store = RobotStore.init(allocator), .web_bot_auth = web_bot_auth, + .cache = cache, }; } @@ -278,6 +303,8 @@ pub fn deinit(self: *Network) void { wba.deinit(self.allocator); } + if (self.cache) |*cache| cache.deinit(); + globalDeinit(); } diff --git a/src/network/cache/Cache.zig b/src/network/cache/Cache.zig new file mode 100644 index 00000000..d270310e --- /dev/null +++ b/src/network/cache/Cache.zig @@ -0,0 +1,213 @@ +// Copyright (C) 2023-2026 Lightpanda (Selecy SAS) +// +// Francis Bouvier +// Pierre Tachoire +// +// This program is free software: you can redistribute it and/or modify +// it under the terms of the GNU Affero General Public License as +// published by the Free Software Foundation, either version 3 of the +// License, or (at your option) any later version. +// +// This program is distributed in the hope that it will be useful, +// but WITHOUT ANY WARRANTY; without even the implied warranty of +// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +// GNU Affero General Public License for more details. +// +// You should have received a copy of the GNU Affero General Public License +// along with this program. If not, see . + +const std = @import("std"); +const log = @import("../../log.zig"); +const Http = @import("../http.zig"); +const FsCache = @import("FsCache.zig"); + +/// A browser-wide cache for resources across the network. +/// This mostly conforms to RFC9111 with regards to caching behavior. +pub const Cache = @This(); + +kind: union(enum) { + fs: FsCache, +}, + +pub fn deinit(self: *Cache) void { + return switch (self.kind) { + inline else => |*c| c.deinit(), + }; +} + +pub fn get(self: *Cache, arena: std.mem.Allocator, req: CacheRequest) ?CachedResponse { + return switch (self.kind) { + inline else => |*c| c.get(arena, req), + }; +} + +pub fn put(self: *Cache, metadata: CachedMetadata, body: []const u8) !void { + return switch (self.kind) { + inline else => |*c| c.put(metadata, body), + }; +} + +pub const CacheControl = struct { + max_age: u64, + + pub fn parse(value: []const u8) ?CacheControl { + var cc: CacheControl = .{ .max_age = undefined }; + + var max_age_set = false; + var max_s_age_set = false; + var is_public = false; + + var iter = std.mem.splitScalar(u8, value, ','); + while (iter.next()) |part| { + const directive = std.mem.trim(u8, part, &std.ascii.whitespace); + if (std.ascii.eqlIgnoreCase(directive, "no-store")) { + return null; + } else if (std.ascii.eqlIgnoreCase(directive, "no-cache")) { + return null; + } else if (std.ascii.eqlIgnoreCase(directive, "public")) { + is_public = true; + } else if (std.ascii.startsWithIgnoreCase(directive, "max-age=")) { + if (!max_s_age_set) { + if (std.fmt.parseInt(u64, directive[8..], 10) catch null) |max_age| { + cc.max_age = max_age; + max_age_set = true; + } + } + } else if (std.ascii.startsWithIgnoreCase(directive, "s-maxage=")) { + if (std.fmt.parseInt(u64, directive[9..], 10) catch null) |max_age| { + cc.max_age = max_age; + max_age_set = true; + max_s_age_set = true; + } + } + } + + if (!max_age_set) return null; + if (!is_public) return null; + if (cc.max_age == 0) return null; + + return cc; + } +}; + +pub const CachedMetadata = struct { + url: [:0]const u8, + content_type: []const u8, + + status: u16, + stored_at: i64, + age_at_store: u64, + + cache_control: CacheControl, + /// Response Headers + headers: []const Http.Header, + + /// These are Request Headers used by Vary. + vary_headers: []const Http.Header, + + pub fn format(self: CachedMetadata, writer: *std.Io.Writer) !void { + try writer.print("url={s} | status={d} | content_type={s} | max_age={d} | vary=[", .{ + self.url, + self.status, + self.content_type, + self.cache_control.max_age, + }); + + // Logging all headers gets pretty verbose... + // so we just log the Vary ones that matter for caching. + + if (self.vary_headers.len > 0) { + for (self.vary_headers, 0..) |hdr, i| { + if (i > 0) try writer.print(", ", .{}); + try writer.print("{s}: {s}", .{ hdr.name, hdr.value }); + } + } + try writer.print("]", .{}); + } +}; + +pub const CacheRequest = struct { + url: []const u8, + timestamp: i64, + request_headers: []const Http.Header, +}; + +pub const CachedData = union(enum) { + buffer: []const u8, + file: struct { + file: std.fs.File, + offset: usize, + len: usize, + }, + + pub fn format(self: CachedData, writer: *std.Io.Writer) !void { + switch (self) { + .buffer => |buf| try writer.print("buffer({d} bytes)", .{buf.len}), + .file => |f| try writer.print("file(offset={d}, len={d} bytes)", .{ f.offset, f.len }), + } + } +}; + +pub const CachedResponse = struct { + metadata: CachedMetadata, + data: CachedData, + + pub fn format(self: *const CachedResponse, writer: *std.Io.Writer) !void { + try writer.print("metadata=(", .{}); + try self.metadata.format(writer); + try writer.print("), data=", .{}); + try self.data.format(writer); + } +}; + +pub fn tryCache( + arena: std.mem.Allocator, + timestamp: i64, + url: [:0]const u8, + status: u16, + content_type: ?[]const u8, + cache_control: ?[]const u8, + vary: ?[]const u8, + age: ?[]const u8, + has_set_cookie: bool, + has_authorization: bool, +) !?CachedMetadata { + if (status != 200) { + log.debug(.cache, "no store", .{ .url = url, .code = status, .reason = "status" }); + return null; + } + if (has_set_cookie) { + log.debug(.cache, "no store", .{ .url = url, .reason = "has_cookies" }); + return null; + } + if (has_authorization) { + log.debug(.cache, "no store", .{ .url = url, .reason = "has_authorization" }); + return null; + } + if (vary) |v| if (std.mem.eql(u8, v, "*")) { + log.debug(.cache, "no store", .{ .url = url, .vary = v, .reason = "vary" }); + return null; + }; + const cc = blk: { + if (cache_control == null) { + log.debug(.cache, "no store", .{ .url = url, .reason = "no cache control" }); + return null; + } + if (CacheControl.parse(cache_control.?)) |cc| { + break :blk cc; + } + log.debug(.cache, "no store", .{ .url = url, .cache_control = cache_control.?, .reason = "cache control" }); + return null; + }; + + return .{ + .url = try arena.dupeZ(u8, url), + .content_type = if (content_type) |ct| try arena.dupe(u8, ct) else "application/octet-stream", + .status = status, + .stored_at = timestamp, + .age_at_store = if (age) |a| std.fmt.parseInt(u64, a, 10) catch 0 else 0, + .cache_control = cc, + .headers = &.{}, + .vary_headers = &.{}, + }; +} diff --git a/src/network/cache/FsCache.zig b/src/network/cache/FsCache.zig new file mode 100644 index 00000000..3d67a945 --- /dev/null +++ b/src/network/cache/FsCache.zig @@ -0,0 +1,612 @@ +// Copyright (C) 2023-2026 Lightpanda (Selecy SAS) +// +// Francis Bouvier +// Pierre Tachoire +// +// This program is free software: you can redistribute it and/or modify +// it under the terms of the GNU Affero General Public License as +// published by the Free Software Foundation, either version 3 of the +// License, or (at your option) any later version. +// +// This program is distributed in the hope that it will be useful, +// but WITHOUT ANY WARRANTY; without even the implied warranty of +// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +// GNU Affero General Public License for more details. +// +// You should have received a copy of the GNU Affero General Public License +// along with this program. If not, see . + +const std = @import("std"); +const log = @import("../../log.zig"); +const Cache = @import("Cache.zig"); +const Http = @import("../http.zig"); +const CacheRequest = Cache.CacheRequest; +const CachedMetadata = Cache.CachedMetadata; +const CachedResponse = Cache.CachedResponse; + +const CACHE_VERSION: usize = 1; +const LOCK_STRIPES = 16; +comptime { + std.debug.assert(std.math.isPowerOfTwo(LOCK_STRIPES)); +} + +pub const FsCache = @This(); + +dir: std.fs.Dir, +locks: [LOCK_STRIPES]std.Thread.Mutex = .{std.Thread.Mutex{}} ** LOCK_STRIPES, + +const CacheMetadataJson = struct { + version: usize, + metadata: CachedMetadata, +}; + +fn getLockPtr(self: *FsCache, key: *const [HASHED_KEY_LEN]u8) *std.Thread.Mutex { + const lock_idx = std.hash.Wyhash.hash(0, key[0..]) & (LOCK_STRIPES - 1); + return &self.locks[lock_idx]; +} + +const BODY_LEN_HEADER_LEN = 8; +const HASHED_KEY_LEN = 64; +const HASHED_PATH_LEN = HASHED_KEY_LEN + 6; +const HASHED_TMP_PATH_LEN = HASHED_PATH_LEN + 4; + +fn hashKey(key: []const u8) [HASHED_KEY_LEN]u8 { + var digest: [std.crypto.hash.sha2.Sha256.digest_length]u8 = undefined; + std.crypto.hash.sha2.Sha256.hash(key, &digest, .{}); + var hex: [HASHED_KEY_LEN]u8 = undefined; + _ = std.fmt.bufPrint(&hex, "{s}", .{std.fmt.bytesToHex(&digest, .lower)}) catch unreachable; + return hex; +} + +fn cachePath(hashed_key: *const [HASHED_KEY_LEN]u8) [HASHED_PATH_LEN]u8 { + var path: [HASHED_PATH_LEN]u8 = undefined; + _ = std.fmt.bufPrint(&path, "{s}.cache", .{hashed_key}) catch unreachable; + return path; +} + +fn cacheTmpPath(hashed_key: *const [HASHED_KEY_LEN]u8) [HASHED_TMP_PATH_LEN]u8 { + var path: [HASHED_TMP_PATH_LEN]u8 = undefined; + _ = std.fmt.bufPrint(&path, "{s}.cache.tmp", .{hashed_key}) catch unreachable; + return path; +} + +pub fn init(path: []const u8) !FsCache { + const cwd = std.fs.cwd(); + + cwd.makeDir(path) catch |err| switch (err) { + error.PathAlreadyExists => {}, + else => return err, + }; + + const dir = try cwd.openDir(path, .{ .iterate = true }); + return .{ .dir = dir }; +} + +pub fn deinit(self: *FsCache) void { + self.dir.close(); +} + +pub fn get(self: *FsCache, arena: std.mem.Allocator, req: CacheRequest) ?Cache.CachedResponse { + const hashed_key = hashKey(req.url); + const cache_p = cachePath(&hashed_key); + + const lock = self.getLockPtr(&hashed_key); + lock.lock(); + defer lock.unlock(); + + const file = self.dir.openFile(&cache_p, .{ .mode = .read_only }) catch |e| { + switch (e) { + std.fs.File.OpenError.FileNotFound => { + log.debug(.cache, "miss", .{ .url = req.url, .hash = &hashed_key, .reason = "missing" }); + }, + else => |err| { + log.warn(.cache, "open file err", .{ .url = req.url, .err = err }); + }, + } + return null; + }; + + var cleanup = false; + defer if (cleanup) { + file.close(); + self.dir.deleteFile(&cache_p) catch |e| { + log.err(.cache, "clean fail", .{ .url = req.url, .file = &cache_p, .err = e }); + }; + }; + + var file_buf: [1024]u8 = undefined; + var len_buf: [BODY_LEN_HEADER_LEN]u8 = undefined; + + var file_reader = file.reader(&file_buf); + const file_reader_iface = &file_reader.interface; + + file_reader_iface.readSliceAll(&len_buf) catch |e| { + log.warn(.cache, "read header", .{ .url = req.url, .err = e }); + cleanup = true; + return null; + }; + const body_len = std.mem.readInt(u64, &len_buf, .little); + + // Now we read metadata. + file_reader.seekTo(body_len + BODY_LEN_HEADER_LEN) catch |e| { + log.warn(.cache, "seek metadata", .{ .url = req.url, .err = e }); + cleanup = true; + return null; + }; + + var json_reader = std.json.Reader.init(arena, file_reader_iface); + const cache_file: CacheMetadataJson = std.json.parseFromTokenSourceLeaky( + CacheMetadataJson, + arena, + &json_reader, + .{ .allocate = .alloc_always }, + ) catch |e| { + // Warn because malformed metadata can be a deeper symptom. + log.warn(.cache, "miss", .{ .url = req.url, .err = e, .reason = "malformed metadata" }); + cleanup = true; + return null; + }; + + if (cache_file.version != CACHE_VERSION) { + log.debug(.cache, "miss", .{ + .url = req.url, + .reason = "version mismatch", + .expected = CACHE_VERSION, + .got = cache_file.version, + }); + cleanup = true; + return null; + } + + const metadata = cache_file.metadata; + + // Check entry expiration. + const now = req.timestamp; + const age = (now - metadata.stored_at) + @as(i64, @intCast(metadata.age_at_store)); + if (age < 0 or @as(u64, @intCast(age)) >= metadata.cache_control.max_age) { + log.debug(.cache, "miss", .{ .url = req.url, .reason = "expired" }); + cleanup = true; + return null; + } + + // If we have Vary headers, ensure they are present & matching. + for (metadata.vary_headers) |vary_hdr| { + const name = vary_hdr.name; + const value = vary_hdr.value; + + const incoming = for (req.request_headers) |h| { + if (std.ascii.eqlIgnoreCase(h.name, name)) break h.value; + } else ""; + + if (!std.ascii.eqlIgnoreCase(value, incoming)) { + log.debug(.cache, "miss", .{ + .url = req.url, + .reason = "vary mismatch", + .header = name, + .expected = value, + .got = incoming, + }); + return null; + } + } + + // On the case of a hash collision. + if (!std.ascii.eqlIgnoreCase(metadata.url, req.url)) { + log.warn(.cache, "collision", .{ .url = req.url, .expected = metadata.url, .got = req.url }); + cleanup = true; + return null; + } + + log.debug(.cache, "hit", .{ .url = req.url, .hash = &hashed_key }); + + return .{ + .metadata = metadata, + .data = .{ + .file = .{ + .file = file, + .offset = BODY_LEN_HEADER_LEN, + .len = body_len, + }, + }, + }; +} + +pub fn put(self: *FsCache, meta: CachedMetadata, body: []const u8) !void { + const hashed_key = hashKey(meta.url); + const cache_p = cachePath(&hashed_key); + const cache_tmp_p = cacheTmpPath(&hashed_key); + + const lock = self.getLockPtr(&hashed_key); + lock.lock(); + defer lock.unlock(); + + const file = self.dir.createFile(&cache_tmp_p, .{ .truncate = true }) catch |e| { + log.err(.cache, "create file", .{ .url = meta.url, .file = &cache_tmp_p, .err = e }); + return e; + }; + defer file.close(); + + var writer_buf: [1024]u8 = undefined; + var file_writer = file.writer(&writer_buf); + var file_writer_iface = &file_writer.interface; + + var len_buf: [8]u8 = undefined; + std.mem.writeInt(u64, &len_buf, body.len, .little); + + file_writer_iface.writeAll(&len_buf) catch |e| { + log.err(.cache, "write body len", .{ .url = meta.url, .err = e }); + return e; + }; + file_writer_iface.writeAll(body) catch |e| { + log.err(.cache, "write body", .{ .url = meta.url, .err = e }); + return e; + }; + std.json.Stringify.value( + CacheMetadataJson{ .version = CACHE_VERSION, .metadata = meta }, + .{ .whitespace = .minified }, + file_writer_iface, + ) catch |e| { + log.err(.cache, "write metadata", .{ .url = meta.url, .err = e }); + return e; + }; + file_writer_iface.flush() catch |e| { + log.err(.cache, "flush", .{ .url = meta.url, .err = e }); + return e; + }; + self.dir.rename(&cache_tmp_p, &cache_p) catch |e| { + log.err(.cache, "rename", .{ .url = meta.url, .from = &cache_tmp_p, .to = &cache_p, .err = e }); + return e; + }; + + log.debug(.cache, "put", .{ .url = meta.url, .hash = &hashed_key, .body_len = body.len }); +} + +const testing = std.testing; + +fn setupCache() !struct { tmp: testing.TmpDir, cache: Cache } { + var tmp = testing.tmpDir(.{}); + errdefer tmp.cleanup(); + + const path = try tmp.dir.realpathAlloc(testing.allocator, "."); + defer testing.allocator.free(path); + + return .{ + .tmp = tmp, + .cache = Cache{ .kind = .{ .fs = try FsCache.init(path) } }, + }; +} + +test "FsCache: basic put and get" { + var setup = try setupCache(); + defer { + setup.cache.deinit(); + setup.tmp.cleanup(); + } + + const cache = &setup.cache; + + var arena = std.heap.ArenaAllocator.init(testing.allocator); + defer arena.deinit(); + + const now = std.time.timestamp(); + const meta = CachedMetadata{ + .url = "https://example.com", + .content_type = "text/html", + .status = 200, + .stored_at = now, + .age_at_store = 0, + .cache_control = .{ .max_age = 600 }, + .headers = &.{}, + .vary_headers = &.{}, + }; + + const body = "hello world"; + try cache.put(meta, body); + + const result = cache.get( + arena.allocator(), + .{ + .url = "https://example.com", + .timestamp = now, + .request_headers = &.{}, + }, + ) orelse return error.CacheMiss; + const f = result.data.file; + const file = f.file; + defer file.close(); + + var buf: [64]u8 = undefined; + var file_reader = file.reader(&buf); + try file_reader.seekTo(f.offset); + + const read_buf = try file_reader.interface.readAlloc(testing.allocator, f.len); + defer testing.allocator.free(read_buf); + try testing.expectEqualStrings(body, read_buf); +} + +test "FsCache: get expiration" { + var setup = try setupCache(); + defer { + setup.cache.deinit(); + setup.tmp.cleanup(); + } + + const cache = &setup.cache; + + var arena = std.heap.ArenaAllocator.init(testing.allocator); + defer arena.deinit(); + + const now = 5000; + const max_age = 1000; + + const meta = CachedMetadata{ + .url = "https://example.com", + .content_type = "text/html", + .status = 200, + .stored_at = now, + .age_at_store = 900, + .cache_control = .{ .max_age = max_age }, + .headers = &.{}, + .vary_headers = &.{}, + }; + + const body = "hello world"; + try cache.put(meta, body); + + const result = cache.get( + arena.allocator(), + .{ + .url = "https://example.com", + .timestamp = now + 50, + .request_headers = &.{}, + }, + ) orelse return error.CacheMiss; + result.data.file.file.close(); + + try testing.expectEqual(null, cache.get( + arena.allocator(), + .{ + .url = "https://example.com", + .timestamp = now + 200, + .request_headers = &.{}, + }, + )); + + try testing.expectEqual(null, cache.get( + arena.allocator(), + .{ + .url = "https://example.com", + .timestamp = now, + .request_headers = &.{}, + }, + )); +} + +test "FsCache: put override" { + var setup = try setupCache(); + defer { + setup.cache.deinit(); + setup.tmp.cleanup(); + } + + const cache = &setup.cache; + + var arena = std.heap.ArenaAllocator.init(testing.allocator); + defer arena.deinit(); + + { + const now = 5000; + const max_age = 1000; + + const meta = CachedMetadata{ + .url = "https://example.com", + .content_type = "text/html", + .status = 200, + .stored_at = now, + .age_at_store = 900, + .cache_control = .{ .max_age = max_age }, + .headers = &.{}, + .vary_headers = &.{}, + }; + + const body = "hello world"; + try cache.put(meta, body); + + const result = cache.get( + arena.allocator(), + .{ + .url = "https://example.com", + .timestamp = now, + .request_headers = &.{}, + }, + ) orelse return error.CacheMiss; + const f = result.data.file; + const file = f.file; + defer file.close(); + + var buf: [64]u8 = undefined; + var file_reader = file.reader(&buf); + try file_reader.seekTo(f.offset); + + const read_buf = try file_reader.interface.readAlloc(testing.allocator, f.len); + defer testing.allocator.free(read_buf); + + try testing.expectEqualStrings(body, read_buf); + } + + { + const now = 10000; + const max_age = 2000; + + const meta = CachedMetadata{ + .url = "https://example.com", + .content_type = "text/html", + .status = 200, + .stored_at = now, + .age_at_store = 0, + .cache_control = .{ .max_age = max_age }, + .headers = &.{}, + .vary_headers = &.{}, + }; + + const body = "goodbye world"; + try cache.put(meta, body); + + const result = cache.get( + arena.allocator(), + .{ + .url = "https://example.com", + .timestamp = now, + .request_headers = &.{}, + }, + ) orelse return error.CacheMiss; + const f = result.data.file; + const file = f.file; + defer file.close(); + + var buf: [64]u8 = undefined; + var file_reader = file.reader(&buf); + try file_reader.seekTo(f.offset); + + const read_buf = try file_reader.interface.readAlloc(testing.allocator, f.len); + defer testing.allocator.free(read_buf); + + try testing.expectEqualStrings(body, read_buf); + } +} + +test "FsCache: garbage file" { + var setup = try setupCache(); + defer { + setup.cache.deinit(); + setup.tmp.cleanup(); + } + + const hashed_key = hashKey("https://example.com"); + const cache_p = cachePath(&hashed_key); + const file = try setup.cache.kind.fs.dir.createFile(&cache_p, .{}); + try file.writeAll("this is not a valid cache file !@#$%"); + file.close(); + + var arena = std.heap.ArenaAllocator.init(testing.allocator); + defer arena.deinit(); + + try testing.expectEqual( + null, + setup.cache.get(arena.allocator(), .{ + .url = "https://example.com", + .timestamp = 5000, + .request_headers = &.{}, + }), + ); +} + +test "FsCache: vary hit and miss" { + var setup = try setupCache(); + defer { + setup.cache.deinit(); + setup.tmp.cleanup(); + } + + const cache = &setup.cache; + + var arena = std.heap.ArenaAllocator.init(testing.allocator); + defer arena.deinit(); + + const now = std.time.timestamp(); + const meta = CachedMetadata{ + .url = "https://example.com", + .content_type = "text/html", + .status = 200, + .stored_at = now, + .age_at_store = 0, + .cache_control = .{ .max_age = 600 }, + .headers = &.{}, + .vary_headers = &.{ + .{ .name = "Accept-Encoding", .value = "gzip" }, + }, + }; + + try cache.put(meta, "hello world"); + + const result = cache.get(arena.allocator(), .{ + .url = "https://example.com", + .timestamp = now, + .request_headers = &.{ + .{ .name = "Accept-Encoding", .value = "gzip" }, + }, + }) orelse return error.CacheMiss; + result.data.file.file.close(); + + try testing.expectEqual(null, cache.get(arena.allocator(), .{ + .url = "https://example.com", + .timestamp = now, + .request_headers = &.{ + .{ .name = "Accept-Encoding", .value = "br" }, + }, + })); + + try testing.expectEqual(null, cache.get(arena.allocator(), .{ + .url = "https://example.com", + .timestamp = now, + .request_headers = &.{}, + })); + + const result2 = cache.get(arena.allocator(), .{ + .url = "https://example.com", + .timestamp = now, + .request_headers = &.{ + .{ .name = "Accept-Encoding", .value = "gzip" }, + }, + }) orelse return error.CacheMiss; + result2.data.file.file.close(); +} + +test "FsCache: vary multiple headers" { + var setup = try setupCache(); + defer { + setup.cache.deinit(); + setup.tmp.cleanup(); + } + + const cache = &setup.cache; + + var arena = std.heap.ArenaAllocator.init(testing.allocator); + defer arena.deinit(); + + const now = std.time.timestamp(); + const meta = CachedMetadata{ + .url = "https://example.com", + .content_type = "text/html", + .status = 200, + .stored_at = now, + .age_at_store = 0, + .cache_control = .{ .max_age = 600 }, + .headers = &.{}, + .vary_headers = &.{ + .{ .name = "Accept-Encoding", .value = "gzip" }, + .{ .name = "Accept-Language", .value = "en" }, + }, + }; + + try cache.put(meta, "hello world"); + + const result = cache.get(arena.allocator(), .{ + .url = "https://example.com", + .timestamp = now, + .request_headers = &.{ + .{ .name = "Accept-Encoding", .value = "gzip" }, + .{ .name = "Accept-Language", .value = "en" }, + }, + }) orelse return error.CacheMiss; + result.data.file.file.close(); + + try testing.expectEqual(null, cache.get(arena.allocator(), .{ + .url = "https://example.com", + .timestamp = now, + .request_headers = &.{ + .{ .name = "Accept-Encoding", .value = "gzip" }, + .{ .name = "Accept-Language", .value = "fr" }, + }, + })); +} diff --git a/src/network/http.zig b/src/network/http.zig index 2bfabac0..6dc217ea 100644 --- a/src/network/http.zig +++ b/src/network/http.zig @@ -79,7 +79,7 @@ pub const Headers = struct { self.headers = updated_headers; } - fn parseHeader(header_str: []const u8) ?Header { + pub fn parseHeader(header_str: []const u8) ?Header { const colon_pos = std.mem.indexOfScalar(u8, header_str, ':') orelse return null; const name = std.mem.trim(u8, header_str[0..colon_pos], " \t"); @@ -88,22 +88,9 @@ pub const Headers = struct { return .{ .name = name, .value = value }; } - pub fn iterator(self: *Headers) Iterator { - return .{ - .header = self.headers, - }; + pub fn iterator(self: Headers) HeaderIterator { + return .{ .curl_slist = .{ .header = self.headers } }; } - - const Iterator = struct { - header: [*c]libcurl.CurlSList, - - pub fn next(self: *Iterator) ?Header { - const h = self.header orelse return null; - - self.header = h.*.next; - return parseHeader(std.mem.span(@as([*:0]const u8, @ptrCast(h.*.data)))); - } - }; }; // In normal cases, the header iterator comes from the curl linked list. @@ -112,6 +99,7 @@ pub const Headers = struct { // This union, is an iterator that exposes the same API for either case. pub const HeaderIterator = union(enum) { curl: CurlHeaderIterator, + curl_slist: CurlSListIterator, list: ListHeaderIterator, pub fn next(self: *HeaderIterator) ?Header { @@ -120,6 +108,19 @@ pub const HeaderIterator = union(enum) { } } + pub fn collect(self: *HeaderIterator, allocator: std.mem.Allocator) !std.ArrayList(Header) { + var list: std.ArrayList(Header) = .empty; + + while (self.next()) |hdr| { + try list.append(allocator, .{ + .name = try allocator.dupe(u8, hdr.name), + .value = try allocator.dupe(u8, hdr.value), + }); + } + + return list; + } + const CurlHeaderIterator = struct { conn: *const Connection, prev: ?*libcurl.CurlHeader = null, @@ -136,6 +137,16 @@ pub const HeaderIterator = union(enum) { } }; + const CurlSListIterator = struct { + header: [*c]libcurl.CurlSList, + + pub fn next(self: *CurlSListIterator) ?Header { + const h = self.header orelse return null; + self.header = h.*.next; + return Headers.parseHeader(std.mem.span(@as([*:0]const u8, @ptrCast(h.*.data)))); + } + }; + const ListHeaderIterator = struct { index: usize = 0, list: []const Header,