diff --git a/lp-net-cache/6ba046812fd9e6b6c7e484ec946c6f2dcc1e6022693e854809c72179a7a486b7.cache b/lp-net-cache/6ba046812fd9e6b6c7e484ec946c6f2dcc1e6022693e854809c72179a7a486b7.cache deleted file mode 100644 index a944dbb8..00000000 Binary files a/lp-net-cache/6ba046812fd9e6b6c7e484ec946c6f2dcc1e6022693e854809c72179a7a486b7.cache and /dev/null differ diff --git a/src/browser/HttpClient.zig b/src/browser/HttpClient.zig index e4c7a8f2..98a2c2eb 100644 --- a/src/browser/HttpClient.zig +++ b/src/browser/HttpClient.zig @@ -367,7 +367,14 @@ fn processRequest(self: *Client, req: Request) !void { const arena = try self.network.app.arena_pool.acquire(.{ .debug = "HttpClient.processRequest.cache" }); defer self.network.app.arena_pool.release(arena); - if (cache.get(arena, .{ .url = req.url, .timestamp = std.time.timestamp() })) |cached| { + var iter = req.headers.iterator(); + const req_header_list = try iter.collect(arena); + + if (cache.get(arena, .{ + .url = req.url, + .timestamp = std.time.timestamp(), + .request_headers = req_header_list.items, + })) |cached| { log.debug(.browser, "http.cache.get", .{ .url = req.url, .found = true, @@ -963,23 +970,6 @@ fn processOneMessage(self: *Client, msg: http.Handles.MultiMessage, transfer: *T } } - const allocator = transfer.arena.allocator(); - var header_list: std.ArrayList(http.Header) = .empty; - - var it = transfer.responseHeaderIterator(); - while (it.next()) |hdr| { - header_list.append( - allocator, - .{ - .name = try allocator.dupe(u8, hdr.name), - .value = try allocator.dupe(u8, hdr.value), - }, - ) catch |err| { - log.warn(.http, "cache header collect failed", .{ .err = err }); - break; - }; - } - // release conn ASAP so that it's available; some done_callbacks // will load more resources. transfer.releaseConn(); @@ -1562,6 +1552,8 @@ pub const Transfer = struct { const rh = &transfer.response_header.?; const allocator = transfer.arena.allocator(); + const vary = if (conn.getResponseHeader("vary", 0)) |h| h.value else null; + const maybe_cm = try Cache.tryCache( allocator, std.time.timestamp(), @@ -1569,7 +1561,7 @@ pub const Transfer = struct { rh.status, rh.contentType(), if (conn.getResponseHeader("cache-control", 0)) |h| h.value else null, - if (conn.getResponseHeader("vary", 0)) |h| h.value else null, + vary, if (conn.getResponseHeader("etag", 0)) |h| h.value else null, if (conn.getResponseHeader("last-modified", 0)) |h| h.value else null, if (conn.getResponseHeader("age", 0)) |h| h.value else null, @@ -1578,17 +1570,32 @@ pub const Transfer = struct { ); if (maybe_cm) |cm| { - var header_list: std.ArrayList(http.Header) = .empty; - var it = transfer.responseHeaderIterator(); - while (it.next()) |hdr| { - try header_list.append(allocator, .{ - .name = try allocator.dupe(u8, hdr.name), - .value = try allocator.dupe(u8, hdr.value), - }); - } - transfer._pending_cache_metadata = cm; - transfer._pending_cache_metadata.?.headers = header_list.items; + + var iter = transfer.responseHeaderIterator(); + var header_list = try iter.collect(allocator); + const end_of_response = header_list.items.len; + transfer._pending_cache_metadata.?.headers = header_list.items[0..end_of_response]; + + if (vary) |vary_str| { + var req_it = transfer.req.headers.iterator(); + + while (req_it.next()) |hdr| { + var vary_iter = std.mem.splitScalar(u8, vary_str, ','); + + while (vary_iter.next()) |part| { + const name = std.mem.trim(u8, part, &std.ascii.whitespace); + if (std.ascii.eqlIgnoreCase(hdr.name, name)) { + try header_list.append(allocator, .{ + .name = try allocator.dupe(u8, hdr.name), + .value = try allocator.dupe(u8, hdr.value), + }); + } + } + } + + transfer._pending_cache_metadata.?.vary_headers = header_list.items[end_of_response..]; + } } } diff --git a/src/network/cache/Cache.zig b/src/network/cache/Cache.zig index c5d9af56..fd20e967 100644 --- a/src/network/cache/Cache.zig +++ b/src/network/cache/Cache.zig @@ -95,23 +95,6 @@ pub const CacheControl = struct { } }; -pub const Vary = union(enum) { - wildcard: void, - value: []const u8, - - pub fn parse(value: []const u8) Vary { - if (std.mem.eql(u8, value, "*")) return .wildcard; - return .{ .value = value }; - } - - pub fn toString(self: Vary) []const u8 { - return switch (self) { - .wildcard => "*", - .value => |v| v, - }; - } -}; - pub const CachedMetadata = struct { url: [:0]const u8, content_type: []const u8, @@ -126,13 +109,17 @@ pub const CachedMetadata = struct { last_modified: ?[]const u8, cache_control: CacheControl, - vary: ?Vary, + /// Response Headers headers: []const Http.Header, + + /// These are Request Headers used by Vary. + vary_headers: []const Http.Header, }; pub const CacheRequest = struct { url: []const u8, timestamp: i64, + request_headers: []const Http.Header, }; pub const CachedData = union(enum) { @@ -166,6 +153,7 @@ pub fn tryCache( if (status != 200) return null; if (has_set_cookie) return null; if (has_authorization) return null; + if (vary) |v| if (std.mem.eql(u8, v, "*")) return null; const cc = CacheControl.parse(cache_control orelse return null) orelse return null; return .{ @@ -175,9 +163,9 @@ pub fn tryCache( .stored_at = timestamp, .age_at_store = if (age) |a| std.fmt.parseInt(u64, a, 10) catch 0 else 0, .cache_control = cc, - .vary = if (vary) |v| Vary.parse(v) else null, .etag = if (etag) |e| try arena.dupe(u8, e) else null, .last_modified = if (last_modified) |lm| try arena.dupe(u8, lm) else null, .headers = &.{}, + .vary_headers = &.{}, }; } diff --git a/src/network/cache/FsCache.zig b/src/network/cache/FsCache.zig index 4d70866d..fef6a4b4 100644 --- a/src/network/cache/FsCache.zig +++ b/src/network/cache/FsCache.zig @@ -154,6 +154,7 @@ pub fn get(self: *FsCache, arena: std.mem.Allocator, req: CacheRequest) ?Cache.C const metadata = cache_file.metadata; + // Check entry expiration. const now = req.timestamp; const age = (now - metadata.stored_at) + @as(i64, @intCast(metadata.age_at_store)); if (age < 0 or @as(u64, @intCast(age)) >= metadata.cache_control.max_age) { @@ -162,6 +163,28 @@ pub fn get(self: *FsCache, arena: std.mem.Allocator, req: CacheRequest) ?Cache.C return null; } + // If we have Vary headers, ensure they are present & matching. + for (metadata.vary_headers) |vary_hdr| { + const name = vary_hdr.name; + const value = vary_hdr.value; + + const incoming = for (req.request_headers) |h| { + if (std.ascii.eqlIgnoreCase(h.name, name)) break h.value; + } else ""; + + if (!std.ascii.eqlIgnoreCase(value, incoming)) { + log.debug(.cache, "vary mismatch", .{ .url = req.url, .header = name }); + return null; + } + } + + // On the case of a hash collision. + if (!std.ascii.eqlIgnoreCase(metadata.url, req.url)) { + log.warn(.cache, "collision", .{ .url = req.url, .expected = metadata.url, .got = req.url }); + cleanup = true; + return null; + } + return .{ .metadata = metadata, .data = .{ @@ -243,8 +266,8 @@ test "FsCache: basic put and get" { .etag = null, .last_modified = null, .cache_control = .{ .max_age = 600 }, - .vary = null, .headers = &.{}, + .vary_headers = &.{}, }; const body = "hello world"; @@ -252,7 +275,11 @@ test "FsCache: basic put and get" { const result = cache.get( arena.allocator(), - .{ .url = "https://example.com", .timestamp = now }, + .{ + .url = "https://example.com", + .timestamp = now, + .request_headers = &.{}, + }, ) orelse return error.CacheMiss; const f = result.data.file; const file = f.file; @@ -291,8 +318,8 @@ test "FsCache: get expiration" { .etag = null, .last_modified = null, .cache_control = .{ .max_age = max_age }, - .vary = null, .headers = &.{}, + .vary_headers = &.{}, }; const body = "hello world"; @@ -300,18 +327,30 @@ test "FsCache: get expiration" { const result = cache.get( arena.allocator(), - .{ .url = "https://example.com", .timestamp = now + 50 }, + .{ + .url = "https://example.com", + .timestamp = now + 50, + .request_headers = &.{}, + }, ) orelse return error.CacheMiss; result.data.file.file.close(); try testing.expectEqual(null, cache.get( arena.allocator(), - .{ .url = "https://example.com", .timestamp = now + 200 }, + .{ + .url = "https://example.com", + .timestamp = now + 200, + .request_headers = &.{}, + }, )); try testing.expectEqual(null, cache.get( arena.allocator(), - .{ .url = "https://example.com", .timestamp = now }, + .{ + .url = "https://example.com", + .timestamp = now, + .request_headers = &.{}, + }, )); } @@ -340,8 +379,8 @@ test "FsCache: put override" { .etag = null, .last_modified = null, .cache_control = .{ .max_age = max_age }, - .vary = null, .headers = &.{}, + .vary_headers = &.{}, }; const body = "hello world"; @@ -349,7 +388,11 @@ test "FsCache: put override" { const result = cache.get( arena.allocator(), - .{ .url = "https://example.com", .timestamp = now }, + .{ + .url = "https://example.com", + .timestamp = now, + .request_headers = &.{}, + }, ) orelse return error.CacheMiss; const f = result.data.file; const file = f.file; @@ -378,8 +421,8 @@ test "FsCache: put override" { .etag = null, .last_modified = null, .cache_control = .{ .max_age = max_age }, - .vary = null, .headers = &.{}, + .vary_headers = &.{}, }; const body = "goodbye world"; @@ -387,7 +430,11 @@ test "FsCache: put override" { const result = cache.get( arena.allocator(), - .{ .url = "https://example.com", .timestamp = now }, + .{ + .url = "https://example.com", + .timestamp = now, + .request_headers = &.{}, + }, ) orelse return error.CacheMiss; const f = result.data.file; const file = f.file; @@ -422,6 +469,124 @@ test "FsCache: garbage file" { try testing.expectEqual( null, - setup.cache.get(arena.allocator(), .{ .url = "https://example.com", .timestamp = 5000 }), + setup.cache.get(arena.allocator(), .{ + .url = "https://example.com", + .timestamp = 5000, + .request_headers = &.{}, + }), ); } + +test "FsCache: vary hit and miss" { + var setup = try setupCache(); + defer { + setup.cache.deinit(); + setup.tmp.cleanup(); + } + + const cache = &setup.cache; + + var arena = std.heap.ArenaAllocator.init(testing.allocator); + defer arena.deinit(); + + const now = std.time.timestamp(); + const meta = CachedMetadata{ + .url = "https://example.com", + .content_type = "text/html", + .status = 200, + .stored_at = now, + .age_at_store = 0, + .etag = null, + .last_modified = null, + .cache_control = .{ .max_age = 600 }, + .headers = &.{}, + .vary_headers = &.{ + .{ .name = "Accept-Encoding", .value = "gzip" }, + }, + }; + + try cache.put(meta, "hello world"); + + const result = cache.get(arena.allocator(), .{ + .url = "https://example.com", + .timestamp = now, + .request_headers = &.{ + .{ .name = "Accept-Encoding", .value = "gzip" }, + }, + }) orelse return error.CacheMiss; + result.data.file.file.close(); + + try testing.expectEqual(null, cache.get(arena.allocator(), .{ + .url = "https://example.com", + .timestamp = now, + .request_headers = &.{ + .{ .name = "Accept-Encoding", .value = "br" }, + }, + })); + + try testing.expectEqual(null, cache.get(arena.allocator(), .{ + .url = "https://example.com", + .timestamp = now, + .request_headers = &.{}, + })); + + const result2 = cache.get(arena.allocator(), .{ + .url = "https://example.com", + .timestamp = now, + .request_headers = &.{ + .{ .name = "Accept-Encoding", .value = "gzip" }, + }, + }) orelse return error.CacheMiss; + result2.data.file.file.close(); +} + +test "FsCache: vary multiple headers" { + var setup = try setupCache(); + defer { + setup.cache.deinit(); + setup.tmp.cleanup(); + } + + const cache = &setup.cache; + + var arena = std.heap.ArenaAllocator.init(testing.allocator); + defer arena.deinit(); + + const now = std.time.timestamp(); + const meta = CachedMetadata{ + .url = "https://example.com", + .content_type = "text/html", + .status = 200, + .stored_at = now, + .age_at_store = 0, + .etag = null, + .last_modified = null, + .cache_control = .{ .max_age = 600 }, + .headers = &.{}, + .vary_headers = &.{ + .{ .name = "Accept-Encoding", .value = "gzip" }, + .{ .name = "Accept-Language", .value = "en" }, + }, + }; + + try cache.put(meta, "hello world"); + + const result = cache.get(arena.allocator(), .{ + .url = "https://example.com", + .timestamp = now, + .request_headers = &.{ + .{ .name = "Accept-Encoding", .value = "gzip" }, + .{ .name = "Accept-Language", .value = "en" }, + }, + }) orelse return error.CacheMiss; + result.data.file.file.close(); + + try testing.expectEqual(null, cache.get(arena.allocator(), .{ + .url = "https://example.com", + .timestamp = now, + .request_headers = &.{ + .{ .name = "Accept-Encoding", .value = "gzip" }, + .{ .name = "Accept-Language", .value = "fr" }, + }, + })); +} diff --git a/src/network/http.zig b/src/network/http.zig index 2bfabac0..6dc217ea 100644 --- a/src/network/http.zig +++ b/src/network/http.zig @@ -79,7 +79,7 @@ pub const Headers = struct { self.headers = updated_headers; } - fn parseHeader(header_str: []const u8) ?Header { + pub fn parseHeader(header_str: []const u8) ?Header { const colon_pos = std.mem.indexOfScalar(u8, header_str, ':') orelse return null; const name = std.mem.trim(u8, header_str[0..colon_pos], " \t"); @@ -88,22 +88,9 @@ pub const Headers = struct { return .{ .name = name, .value = value }; } - pub fn iterator(self: *Headers) Iterator { - return .{ - .header = self.headers, - }; + pub fn iterator(self: Headers) HeaderIterator { + return .{ .curl_slist = .{ .header = self.headers } }; } - - const Iterator = struct { - header: [*c]libcurl.CurlSList, - - pub fn next(self: *Iterator) ?Header { - const h = self.header orelse return null; - - self.header = h.*.next; - return parseHeader(std.mem.span(@as([*:0]const u8, @ptrCast(h.*.data)))); - } - }; }; // In normal cases, the header iterator comes from the curl linked list. @@ -112,6 +99,7 @@ pub const Headers = struct { // This union, is an iterator that exposes the same API for either case. pub const HeaderIterator = union(enum) { curl: CurlHeaderIterator, + curl_slist: CurlSListIterator, list: ListHeaderIterator, pub fn next(self: *HeaderIterator) ?Header { @@ -120,6 +108,19 @@ pub const HeaderIterator = union(enum) { } } + pub fn collect(self: *HeaderIterator, allocator: std.mem.Allocator) !std.ArrayList(Header) { + var list: std.ArrayList(Header) = .empty; + + while (self.next()) |hdr| { + try list.append(allocator, .{ + .name = try allocator.dupe(u8, hdr.name), + .value = try allocator.dupe(u8, hdr.value), + }); + } + + return list; + } + const CurlHeaderIterator = struct { conn: *const Connection, prev: ?*libcurl.CurlHeader = null, @@ -136,6 +137,16 @@ pub const HeaderIterator = union(enum) { } }; + const CurlSListIterator = struct { + header: [*c]libcurl.CurlSList, + + pub fn next(self: *CurlSListIterator) ?Header { + const h = self.header orelse return null; + self.header = h.*.next; + return Headers.parseHeader(std.mem.span(@as([*:0]const u8, @ptrCast(h.*.data)))); + } + }; + const ListHeaderIterator = struct { index: usize = 0, list: []const Header,